Esempio n. 1
0
def try_space_inference_from_list(list_op):
    """
    Attempts to infer shape space from a list op. A list op may be the result of fetching state from a Python
    memory.

    Args:
        list_op (list): List with arbitrary sub-structure.

    Returns:
        Space: Inferred Space object represented by list.
    """
    if get_backend() == "pytorch":
        batch_shape = len(list_op)
        if batch_shape > 0:
            # Try to infer more things by looking inside list.
            elem = list_op[0]
            if isinstance(elem, torch.Tensor):
                list_type = elem.dtype
                inner_shape = elem.shape
                return BoxSpace.from_spec(spec=convert_dtype(list_type, "np"),
                                          shape=(batch_shape, ) + inner_shape,
                                          add_batch_rank=True)
            elif isinstance(elem, list):
                inner_shape = len(elem)
                return BoxSpace.from_spec(spec=convert_dtype(float, "np"),
                                          shape=(batch_shape, inner_shape),
                                          add_batch_rank=True)
        else:
            # Most general guess is a Float box.
            return FloatBox(shape=(batch_shape, ))
    else:
        raise ValueError(
            "List inference should only be attempted on the Python backend.")
Esempio n. 2
0
 def _graph_fn_call(self, inputs):
     if self.backend == "python" or get_backend() == "python":
         if isinstance(inputs, list):
             inputs = np.asarray(inputs)
         return inputs.astype(
             dtype=util.convert_dtype(self.to_dtype, to="np"))
     elif get_backend() == "pytorch":
         torch_dtype = util.convert_dtype(self.to_dtype, to="pytorch")
         if torch_dtype == torch.float or torch.float32:
             return inputs.float()
         elif torch_dtype == torch.int or torch.int32:
             return inputs.int()
         elif torch_dtype == torch.uint8:
             return inputs.byte()
     elif get_backend() == "tf":
         in_space = get_space_from_op(inputs)
         to_dtype = util.convert_dtype(self.to_dtype, to="tf")
         if inputs.dtype != to_dtype:
             ret = tf.cast(x=inputs, dtype=to_dtype)
             if in_space.has_batch_rank is True:
                 ret._batch_rank = 0 if in_space.time_major is False else 1
             if in_space.has_time_rank is True:
                 ret._time_rank = 0 if in_space.time_major is True else 1
             return ret
         else:
             return inputs
Esempio n. 3
0
def try_space_inference_from_list(list_op, dtype=None, **low_high):
    """
    Attempts to infer shape space from a list op. A list op may be the result of fetching state from a Python
    memory.

    Args:
        list_op (list): List with arbitrary sub-structure.

    Returns:
        Space: Inferred Space object represented by list.
    """
    shape = len(list_op)
    if shape > 0:
        # Try to infer more things by looking inside list.
        elem = list_op[0]
        if (get_backend() == "pytorch" and isinstance(elem, torch.Tensor)) or \
                get_backend() == "tf" and isinstance(elem, tf.Tensor):
            list_type = dtype or elem.dtype
            inner_shape = elem.shape
            return BoxSpace.from_spec(spec=convert_dtype(list_type, "np"),
                                      shape=(shape, ) + inner_shape,
                                      add_batch_rank=True,
                                      **low_high)
        elif isinstance(elem, list):
            inner_shape = len(elem)
            return BoxSpace.from_spec(spec=convert_dtype(dtype or float, "np"),
                                      shape=(shape, inner_shape),
                                      add_batch_rank=True,
                                      **low_high)
        # IntBox -> elem must be int and dtype hint must match (or None).
        elif isinstance(elem, int) and (dtype is None or dtype == "int"):
            # In case of missing comma values, check all other items in list for float.
            # If one float in there -> FloatBox, otherwise -> IntBox.
            has_floats = any(isinstance(el, float) for el in list_op)
            if has_floats is False:
                return IntBox.from_spec(shape=(shape, ),
                                        add_batch_rank=True,
                                        **low_high)
            else:
                return FloatBox.from_spec(shape=(shape, ),
                                          add_batch_rank=True,
                                          **low_high)
        # FloatBox -> elem must be float (or int) and dtype hint must match (or None).
        elif isinstance(elem,
                        (float, int)) and (dtype is None or dtype == "float"):
            return FloatBox.from_spec(shape=(shape, ),
                                      add_batch_rank=True,
                                      **low_high)

    # Most general guess is a Float box.
    return FloatBox(shape=(shape, ), **low_high)
Esempio n. 4
0
    def update(self, batch=None):
        # In apex, syncing is based on num steps trained, not steps sampled.
        sync_call = None
        # Apex uses train time steps for syncing.
        self.steps_since_target_net_sync += len(batch["terminals"])
        if self.steps_since_target_net_sync >= self.update_spec["sync_interval"]:
            sync_call = "sync_target_qnet"
            self.steps_since_target_net_sync = 0
        return_ops = [0, 1]
        self.num_updates += 1
        if batch is None:
            # Add some additional return-ops to pull (left out normally for performance reasons).
            ret = self.graph_executor.execute(("update_from_memory", None, return_ops), sync_call)

            # Remove unnecessary return dicts (e.g. sync-op).
            if isinstance(ret, dict):
                ret = ret["update_from_memory"]

            if self.store_last_q_table is True:
                q_table = dict(
                    states=ret[3]["states"],
                    q_values=ret[4]
                )
                self.last_q_table = q_table

            return ret[1]
        else:
            # Add some additional return-ops to pull (left out normally for performance reasons).
            pps_dtype = self.preprocessed_state_space.dtype
            batch_input = [np.asarray(batch["states"], dtype=util.convert_dtype(dtype=pps_dtype, to='np')),
                           batch["actions"],
                           batch["rewards"], batch["terminals"],
                           np.asarray(batch["next_states"], dtype=util.convert_dtype(dtype=pps_dtype, to='np')),
                           batch["importance_weights"],
                           True]
            ret = self.graph_executor.execute(("update_from_external_batch", batch_input), sync_call)
            # Remove unnecessary return dicts (e.g. sync-op).
            if isinstance(ret, dict):
                ret = ret["update_from_external_batch"]

            if self.store_last_q_table is True:
                q_table = dict(
                    states=batch["states"],
                    q_values=ret[3]
                )
                self.last_q_table = q_table


            # Return [1]=total loss, [2]=loss-per-item (skip [0]=update noop).
            return ret[1], ret[2]
Esempio n. 5
0
    def _batch_process_sample(self, states, actions, rewards, next_states,
                              terminals):
        """
        Batch Post-processes sample, e.g. by computing priority weights, and compressing.

        Args:
            states (list): List of states.
            actions (list, dict): List of actions or dict of lists  for container actions.
            rewards (list): List of rewards.
            next_states: (list): List of next_states.
            terminals (list): List of terminals.

        Returns:
            dict: Sample batch dict.
        """
        weights = np.ones_like(rewards)

        # Compute loss-per-item.
        if self.worker_executes_postprocessing:
            # Next states were just collected, we batch process them here.
            _, loss_per_item = self.agent.post_process(
                dict(states=states,
                     actions=actions,
                     rewards=rewards,
                     terminals=terminals,
                     next_states=next_states,
                     importance_weights=weights))
            weights = np.abs(loss_per_item) + SMALL_NUMBER
        env_dtype = self.vector_env.state_space.dtype
        compressed_states = [
            ray_compress(
                np.asarray(state,
                           dtype=util.convert_dtype(dtype=env_dtype, to='np')))
            for state in states
        ]

        compressed_next_states = compressed_states[self.n_step_adjustment:] + \
                                 [ray_compress(np.asarray(next_s,dtype=util.convert_dtype(dtype=env_dtype, to='np')))
                                  for next_s in next_states[-self.n_step_adjustment:]]
        if self.container_actions:
            for name in self.action_space.keys():
                actions[name] = np.array(actions[name])
        else:
            actions = np.array(actions)
        return dict(states=compressed_states,
                    actions=actions,
                    rewards=np.array(rewards),
                    terminals=np.array(terminals),
                    next_states=compressed_next_states,
                    importance_weights=np.array(weights)), len(rewards)
Esempio n. 6
0
    def _graph_fn_get_records(self, num_records=1):
        if get_backend() == "tf":
            size = self.read_variable(self.size)

            # Sample and retrieve a random range, including terminals.
            index = self.read_variable(self.index)
            indices = tf.random_uniform(shape=(num_records,), maxval=size, dtype=tf.int32)
            indices = (index - 1 - indices) % self.capacity

            # Return default importance weight one.
            return self._read_records(indices=indices), indices, tf.ones_like(tensor=indices, dtype=tf.float32)
        elif get_backend() == "pytorch":
            indices = []
            if self. size > 0:
                indices = np.random.choice(np.arange(0, self.size), size=int(num_records))
                indices = (self.index - 1 - indices) % self.capacity
            records = OrderedDict()
            for name, variable in self.memory.items():
                records[name] = self.read_variable(variable, indices, dtype=
                                                   util.convert_dtype(self.flat_record_space[name].dtype, to="pytorch"),
                                                   shape=self.flat_record_space[name].shape)
            records = define_by_run_unflatten(records)
            weights = torch.ones(indices.shape, dtype=torch.float32) if len(indices) > 0 \
                else torch.ones(1, dtype=torch.float32)
            return records, indices, weights
Esempio n. 7
0
 def _graph_fn_sample_deterministic(self, distribution):
     if get_backend() == "tf":
         return tf.argmax(input=distribution.probs,
                          axis=-1,
                          output_type=util.convert_dtype("int"))
     elif get_backend() == "pytorch":
         return torch.argmax(distribution.probs, dim=-1).int()
Esempio n. 8
0
 def _graph_fn_get_noise(self):
     if get_backend() == "tf":
         return tf.random_normal(shape=(1, ) + self.action_space.shape,
                                 mean=self.mean,
                                 stddev=self.stddev,
                                 dtype=convert_dtype(
                                     self.action_space.dtype))
Esempio n. 9
0
    def _graph_fn_get_records(self, num_records=1):
        available_records = min(num_records, self.size)
        indices = []
        prob_sum = self.merged_segment_tree.sum_segment_tree.get_sum(0, self.size - 1)
        samples = np.random.random(size=(available_records,)) * prob_sum
        for sample in samples:
            indices.append(self.merged_segment_tree.sum_segment_tree.index_of_prefixsum(prefix_sum=sample))

        sum_prob = self.merged_segment_tree.sum_segment_tree.get_sum() + SMALL_NUMBER
        min_prob = self.merged_segment_tree.min_segment_tree.get_min_value() / sum_prob
        max_weight = (min_prob * self.size) ** (-self.beta)
        weights = []
        for index in indices:
            sample_prob = self.merged_segment_tree.sum_segment_tree.get(index) / sum_prob
            weight = (sample_prob * self.size) ** (-self.beta)
            weights.append(weight / max_weight)

        if get_backend() == "pytorch":
            indices = torch.tensor(indices)
            weights = torch.tensor(weights)
        else:
            indices = np.asarray(indices)
            weights = np.asarray(weights)

        records = OrderedDict()
        for name, variable in self.record_registry.items():
            records[name] = self.read_variable(variable, indices, dtype=
            util.convert_dtype(self.flat_record_space[name].dtype, to="pytorch"))
        records = define_by_run_unflatten(records)
        return records, indices, weights
Esempio n. 10
0
    def _process_policy_trajectories(self, states, actions, rewards, terminals,
                                     sequence_indices):
        """
        Post-processes policy trajectories.
        """
        if self.worker_executes_postprocessing:
            rewards = self.agent.post_process(
                dict(states=states,
                     rewards=rewards,
                     terminals=terminals,
                     sequence_indices=sequence_indices))

        if self.compress:
            env_dtype = self.vector_env.state_space.dtype
            states = [
                ray_compress(
                    np.asarray(state,
                               dtype=util.convert_dtype(dtype=env_dtype,
                                                        to='np')))
                for state in states
            ]
        return dict(states=states,
                    actions=actions,
                    rewards=rewards,
                    terminals=terminals), len(rewards)
Esempio n. 11
0
 def _graph_fn_get_noise(self):
     drift = self.theta * (self.mu - self.ou_state)
     if get_backend() == "tf":
         diffusion = self.sigma * tf.random_normal(
             shape=self.action_space.shape, dtype=convert_dtype(self.action_space.dtype)
         )
         delta = drift + diffusion
         return tf.assign_add(ref=self.ou_state, value=delta)
Esempio n. 12
0
    def update(self,
               batch=None,
               time_percentage=None,
               sequence_indices=None,
               apply_postprocessing=True):
        """
        Args:
            sequence_indices (Optional[np.ndarray, list]): Sequence indices are used in multi-env batches where
                partial episode fragments may be concatenated within the trajectory. For a single env, these are equal
                to terminals. If None are given, terminals will be used as sequence indices. A sequence index is True
                where an episode fragment ends and False otherwise. The reason separate indices are necessary is so that
                e.g. in GAE discounting, correct boot-strapping is applied depending on whether a true terminal state
                was reached, or a partial episode fragment of an environment ended.

                Example: If env_1 has terminals [0 0 0] for an episode fragment and env_2 terminals = [0 0 1],
                    we may pass them in as one combined array [0 0 0 0 0 1] with sequence indices showing where each
                    episode ends: [0 0 1 0 0 1].
            apply_postprocessing (Optional[(bool]): If True, apply post-processing such as generalised
                advantage estimation to collected batch in-graph. If False, update assumed post-processing has already
                been applied. The purpose of internal versus external post-processing is to be able to off-load
                post-processing in large scale distributed scenarios.
        """
        # TODO: Move update_spec to Worker. Agent should not hold these execution details.
        if time_percentage is None:
            time_percentage = self.timesteps / self.update_spec.get(
                "max_timesteps", 1e6)

        # [0] = the loss; [1] = loss-per-item, [2] = vf-loss, [3] = vf-loss- per item
        return_ops = [0, 1, 2, 3]
        if batch is None:
            ret = self.graph_executor.execute(
                ("update_from_memory", [True, time_percentage], return_ops))

            # Remove unnecessary return dicts (e.g. sync-op).
            if isinstance(ret, dict):
                ret = ret["update_from_memory"]
        else:
            # No sequence indices means terminals are used in place.
            if sequence_indices is None:
                sequence_indices = batch["terminals"]

            pps_dtype = self.preprocessed_state_space.dtype
            batch["states"] = np.asarray(batch["states"],
                                         dtype=util.convert_dtype(
                                             dtype=pps_dtype, to='np'))

            ret = self.graph_executor.execute(("update_from_external_batch", [
                batch["states"], batch["actions"], batch["rewards"],
                batch["terminals"], sequence_indices, apply_postprocessing,
                time_percentage
            ], return_ops))
            # Remove unnecessary return dicts (e.g. sync-op).
            if isinstance(ret, dict):
                ret = ret["update_from_external_batch"]

        # [0] loss, [1] loss per item
        return ret[0], ret[1]
Esempio n. 13
0
 def _graph_fn_get_distribution(self, parameters):
     """
     Args:
         parameters (DataOp): The p value (probability that distribution returns True).
     """
     if get_backend() == "tf":
         return tf.distributions.Bernoulli(probs=parameters, dtype=util.convert_dtype("bool"))
     elif get_backend() == "pytorch":
         return torch.distributions.Bernoulli(probs=parameters)
Esempio n. 14
0
    def create_variables(self, input_spaces, action_space=None):
        # Create weights matrix and (maybe) biases vector.
        shape = (self.vocab_size, self.embed_dim)
        self.initializer = Initializer.from_spec(shape=shape, specification=self.initializer_spec)
        # TODO: For IMPALA partitioner is not needed. Do this later.
        self.embedding_matrix = self.get_variable(
            name="embedding-matrix", shape=shape, dtype=convert_dtype("float"), initializer=self.initializer.initializer,
            #partitioner=self.partitioners, regularizer=self.regularizers,
            trainable=self.trainable
        )

        self.ids_space = input_spaces["ids"]
Esempio n. 15
0
    def _graph_fn_get_episodes(self, num_episodes=1):
        if get_backend() == "tf":
            stored_episodes = self.read_variable(self.num_episodes)
            available_episodes = tf.minimum(x=num_episodes, y=stored_episodes)

            # Say we have two episodes with this layout:
            # terminals = [0 0 1 0 1]
            # episode_indices = [2, 4]
            # If we want to fetch the most recent episode, the start index is:
            # stored_episodes - 1 - num_episodes = 2 - 1 - 1 = 0, which points to buffer index 2
            # The next episode starts one element after this, hence + 1.
            # However, this points to index -1 if stored_episodes = available_episodes,
            # in this case we want start = 0 to get everything.
            start = tf.cond(pred=tf.equal(x=stored_episodes,
                                          y=available_episodes),
                            true_fn=lambda: 0,
                            false_fn=lambda: self.episode_indices[
                                stored_episodes - available_episodes - 1] + 1)
            # End index is just the pointer to the most recent episode.
            limit = self.episode_indices[stored_episodes - 1]

            limit += tf.where(condition=(start < limit),
                              x=0,
                              y=self.capacity - 1)
            # limit = tf.Print(limit, [stored_episodes, start, limit], summarize=100, message="start | limit")
            indices = tf.range(start=start, limit=limit + 1) % self.capacity
            return self._read_records(indices=indices)
        elif get_backend() == "pytorch":
            stored_episodes = self.num_episodes
            available_episodes = min(num_episodes, self.num_episodes)

            if stored_episodes == available_episodes:
                start = 0
            else:
                start = self.episode_indices[stored_episodes -
                                             available_episodes - 1] + 1

            # End index is just the pointer to the most recent episode.
            limit = self.episode_indices[stored_episodes - 1]
            if start >= limit:
                limit += self.capacity - 1
            indices = torch.arange(start, limit + 1) % self.capacity

            records = DataOpDict()
            for name, variable in self.memory.items():
                records[name] = self.read_variable(
                    variable,
                    indices,
                    dtype=util.convert_dtype(
                        self.flat_record_space[name].dtype, to="pytorch"),
                    shape=self.flat_record_space[name].shape)
            records = define_by_run_unflatten(records)
            return records
Esempio n. 16
0
    def _graph_fn_pick(self, key, use_exploration, epsilon_decisions, sample):
        """
        Exploration for discrete action spaces.
        Either pick a random action (if `use_exploration` and `epsilon_decision` are True),
            or return non-exploratory action.

        Args:
            use_exploration (DataOp): The master switch determining, whether to use exploration or not.
            epsilon_decisions (DataOp): The bool coming from the epsilon-exploration component specifying
                whether to use exploration or not (per batch item).
            sample (DataOp): The output from a distribution's "sample_deterministic" OR "sample_stochastic".

        Returns:
            DataOp: The DataOp representing the action. This will match the shape of self.action_space.
        """
        if get_backend() == "tf":
            if use_exploration is False:
                return sample
            else:
                random_actions = tf.random_uniform(
                    shape=tf.shape(sample),
                    maxval=self.flat_action_space[key].num_categories,
                    dtype=convert_dtype("int")
                )

                return tf.where(
                    # `use_exploration` given as actual bool or as tensor?
                    condition=epsilon_decisions if use_exploration is True else tf.logical_and(
                        use_exploration, epsilon_decisions
                    ),
                    x=random_actions,
                    y=sample
                )
        elif get_backend() == "pytorch":
            # N.b. different order versus TF because we dont want to execute the sampling below.
            if use_exploration is False:
                    return sample

            if self.sample_obj is None:
                # Don't create new sample objects very time.
                self.sample_obj = torch.distributions.Uniform(0, self.flat_action_space[key].num_categories)

            random_actions = self.sample_obj.sample(sample.shape).int()
            if use_exploration is True:
                return torch.where(epsilon_decisions, random_actions, sample)
            else:
                if not isinstance(use_exploration, torch.ByteTensor):
                    use_exploration = use_exploration.byte()
                if not isinstance(epsilon_decisions, torch.ByteTensor):
                    epsilon_decisions = epsilon_decisions.byte()
                return torch.where(use_exploration & epsilon_decisions, random_actions, sample)
Esempio n. 17
0
 def _graph_fn_get_records(self, num_records=1):
     if get_backend() == "tf":
         index = self.read_variable(self.index)
         indices = tf.range(start=index - num_records,
                            limit=index) % self.capacity
         return self._read_records(indices=indices)
     elif get_backend() == "pytorch":
         indices = np.arange(self.index - num_records,
                             self.index) % self.capacity
         records = OrderedDict()
         for name, variable in self.record_registry.items():
             records[name] = self.read_variable(
                 variable,
                 indices,
                 dtype=util.convert_dtype(self.record_space[name].dtype,
                                          to="pytorch"))
         return records
Esempio n. 18
0
    def _graph_fn_get_records(self, num_records=1):
        if get_backend() == "tf":
            stored_records = self.read_variable(self.size)
            available_records = tf.minimum(x=num_records, y=stored_records)
            index = self.read_variable(self.index)
            indices = tf.range(start=index - available_records, limit=index) % self.capacity
            return self._read_records(indices=indices)
        elif get_backend() == "pytorch":
            available_records = min(num_records, self.size)
            indices = np.arange(self.index - available_records, self.index) % self.capacity
            records = DataOpDict()

            for name, variable in self.memory.items():
                records[name] = self.read_variable(variable, indices, dtype=
                                                   util.convert_dtype(self.flat_record_space[name].dtype, to="pytorch"),
                                                   shape=self.flat_record_space[name].shape)

            records = define_by_run_unflatten(records)
            return records
Esempio n. 19
0
    def test_specifiable_server(self):
        action_space = IntBox(2)
        state_space = FloatBox()
        env_spec = dict(type="random_env",
                        state_space=state_space,
                        action_space=action_space,
                        deterministic=True)
        # Create the server, but don't start it yet. This will be done fully automatically by the tf-Session.
        specifiable_server = SpecifiableServer(
            Environment, env_spec, dict(step_flow=[state_space, float, bool]),
            "terminate")

        # ret are ops now in the graph.
        ret1 = specifiable_server.step_flow(action_space.sample())
        ret2 = specifiable_server.step_flow(action_space.sample())

        # Check all 3 outputs of the Env step (next state, reward, terminal).
        self.assertEqual(ret1[0].shape, ())
        self.assertEqual(ret1[0].dtype, convert_dtype("float32"))
        self.assertEqual(ret1[1].shape, ())
        self.assertEqual(ret1[1].dtype, convert_dtype("float32"))
        self.assertEqual(ret1[2].shape, ())
        self.assertEqual(ret1[2].dtype, convert_dtype("bool"))
        self.assertEqual(ret2[0].shape, ())
        self.assertEqual(ret2[0].dtype, convert_dtype("float32"))
        self.assertEqual(ret2[1].shape, ())
        self.assertEqual(ret2[1].dtype, convert_dtype("float32"))
        self.assertEqual(ret2[2].shape, ())
        self.assertEqual(ret2[2].dtype, convert_dtype("bool"))

        # Start the session and run the op, then check its actual values.
        with tf.train.SingularMonitoredSession(
                hooks=[SpecifiableServerHook()]) as sess:
            out1 = sess.run(ret1)
            out2 = sess.run(ret2)

        # next state
        self.assertAlmostEqual(out1[0], 0.7713, places=4)
        self.assertAlmostEqual(out2[0], 0.7488, places=4)
        # reward
        self.assertAlmostEqual(out1[1], 0.0208, places=4)
        self.assertAlmostEqual(out2[1], 0.4985, places=4)
        # terminal
        self.assertTrue(out1[2] is np.bool_(False))
        self.assertTrue(out2[2] is np.bool_(False))
Esempio n. 20
0
    def get_variable(self, name, is_input_feed=False, add_batch_rank=None, add_time_rank=None,
                     time_major=None, is_python=False, local=False, **kwargs):
        add_batch_rank = self.has_batch_rank if add_batch_rank is None else add_batch_rank
        batch_rank = () if add_batch_rank is False else (None,) if add_batch_rank is True else (add_batch_rank,)

        add_time_rank = self.has_time_rank if add_time_rank is None else add_time_rank
        time_rank = () if add_time_rank is False else (None,) if add_time_rank is True else (add_time_rank,)

        time_major = self.time_major if time_major is None else time_major

        if time_major is False:
            shape = batch_rank + time_rank + self.shape
        else:
            shape = time_rank + batch_rank + self.shape

        if is_python is True or get_backend() == "python":
            if isinstance(add_batch_rank, int):
                if isinstance(add_time_rank, int):
                    if time_major:
                        var = [[0 for _ in range_(add_batch_rank)] for _ in range_(add_time_rank)]
                    else:
                        var = [[0 for _ in range_(add_time_rank)] for _ in range_(add_batch_rank)]
                else:
                    var = [0 for _ in range_(add_batch_rank)]
            elif isinstance(add_time_rank, int):
                var = [0 for _ in range_(add_time_rank)]
            else:
                var = []

            # Un-indent and just directly construct pytorch?
            if get_backend() == "pytorch" and is_input_feed:
                # Convert to PyTorch tensors as a faux placehodler.
                return torch.zeros(shape, dtype=convert_dtype(dtype=self.dtype, to="pytorch"))
            else:
                # TODO also convert?
                return var

        elif get_backend() == "tf":
            # TODO: re-evaluate the cutting of a leading '/_?' (tf doesn't like it)
            name = re.sub(r'^/_?', "", name)
            if is_input_feed:
                variable = tf.placeholder(dtype=convert_dtype(self.dtype), shape=shape, name=name)
            else:
                init_spec = kwargs.pop("initializer", None)
                # Bools should be initializable via 0 or not 0.
                if self.dtype == np.bool_ and isinstance(init_spec, (int, float)):
                    init_spec = (init_spec != 0)

                if self.dtype == np.str_ and init_spec == 0:
                    initializer = None
                else:
                    initializer = Initializer.from_spec(shape=shape, specification=init_spec).initializer

                variable = tf.get_variable(
                    name, shape=shape, dtype=convert_dtype(self.dtype), initializer=initializer,
                    collections=[tf.GraphKeys.GLOBAL_VARIABLES if local is False else tf.GraphKeys.LOCAL_VARIABLES],
                    **kwargs
                )
            # Add batch/time rank flags to the op.
            if self.has_batch_rank:
                variable._batch_rank = 0 if self.time_major is False else 1
            if self.has_time_rank:
                variable._time_rank = 1 if self.time_major is False else 0
            return variable
Esempio n. 21
0
    def _graph_fn_decayed_value(self, time_step):
        """
        Args:
            time_step (DataOp): The int-type DataOp that holds the current global time_step.

        Returns:
            DataOp: The decay'd value depending on the current time step.
        """
        if get_backend() == "tf":
            smaller_than_start = time_step <= self.start_timestep

            shape = tf.shape(time_step)
            # time_step comes in as a time-sequence of time-steps.
            if shape.shape[0] > 0:
                return tf.where(
                    condition=smaller_than_start,
                    # We are still in pre-decay time.
                    x=tf.tile(tf.constant([self.from_]), multiples=shape),
                    # We are past pre-decay time.
                    y=tf.where(
                        condition=(time_step >=
                                   self.start_timestep + self.num_timesteps),
                        # We are in post-decay time.
                        x=tf.tile(tf.constant([self.to_]), multiples=shape),
                        # We are inside the decay time window.
                        y=self._graph_fn_decay(
                            tf.cast(x=time_step - self.start_timestep,
                                    dtype=util.convert_dtype("float"))),
                        name="cond-past-end-time"),
                    name="cond-before-start-time")
            # Single 0D time step.
            else:
                return tf.cond(
                    pred=smaller_than_start,
                    # We are still in pre-decay time.
                    true_fn=lambda: self.from_,
                    # We are past pre-decay time.
                    false_fn=lambda: tf.cond(
                        pred=(time_step >= self.start_timestep + self.
                              num_timesteps),
                        # We are in post-decay time.
                        true_fn=lambda: self.to_,
                        # We are inside the decay time window.
                        false_fn=lambda: self._graph_fn_decay(
                            tf.cast(x=time_step - self.start_timestep,
                                    dtype=util.convert_dtype("float"))),
                    ),
                )
        elif get_backend() == "pytorch":
            if time_step is None:
                time_step = torch.tensor([0])
            smaller_than_start = time_step <= self.start_timestep
            if time_step.dim() == 0:
                time_step = time_step.unsqueeze(-1)
            shape = time_step.shape
            # time_step comes in as a time-sequence of time-steps.
            # TODO tile shape is confusing -> num tiles should be shape[0] not shape?
            if shape[0] > 0:
                past_decay = torch.where(
                    (time_step >= self.start_timestep + self.num_timesteps),
                    # We are in post-decay time.
                    pytorch_tile(torch.tensor([self.to_]), shape),
                    # We are inside the decay time window.
                    torch.tensor(
                        self._graph_fn_decay(
                            torch.FloatTensor(
                                [time_step - self.start_timestep]))))
                return torch.where(
                    smaller_than_start,
                    # We are still in pre-decay time.
                    pytorch_tile(torch.tensor([self.from_]), shape),
                    # We are past pre-decay time.
                    past_decay)
            # Single 0D time step.
            else:
                if smaller_than_start:
                    return self.from_
                else:
                    if time_step >= self.start_timestep + self.num_timesteps:
                        return self.to_
                    else:
                        return self._graph_fn_decay(
                            torch.FloatTensor(
                                [time_step - self.start_timestep]))
Esempio n. 22
0
    def __init__(self, shape, specification=None, **kwargs):
        """
        Args:
            shape (tuple): The shape of the Variables to initialize.
            specification (any): A spec that determines the nature of this initializer.

        Raises:
            RLGraphError: If a fixed shape in `specification` does not match `shape`.
        """
        super(Initializer, self).__init__()

        # The shape of the variable to be initialized.
        self.shape = shape
        # The actual underlying initializer object.
        self.initializer = None

        # Truncated Normal.
        if specification == "truncated_normal":
            if get_backend() == "tf":
                # Use the first dimension (num_rows or batch rank) to figure out the stddev.
                stddev = 1 / math.sqrt(shape[0] if isinstance(
                    shape, (tuple, list)) and len(shape) > 0 else 1.0)
                self.initializer = tf.truncated_normal_initializer(
                    stddev=stddev)
            elif get_backend() == "pytorch":
                stddev = 1 / math.sqrt(shape[0] if isinstance(
                    shape, (tuple, list)) and len(shape) > 0 else 1.0)
                self.initializer = lambda t: torch.nn.init.normal_(tensor=t,
                                                                   std=stddev)

        # No spec -> Leave initializer as None for TF (will then use default;
        #  e.g. for tf weights: Xavier uniform). For PyTorch, still have to set Xavier.
        # TODO this is None or is False is very unclean because TF and PT have different defaults ->
        # change to clean default values for weights and biases.
        elif specification is None or specification is False:
            if get_backend() == "tf":
                pass
            elif get_backend() == "pytorch":
                self.initializer = torch.nn.init.xavier_uniform_

        # Fixed values spec -> Use them, just do sanity checking.
        else:
            # Constant value across the variable.
            if isinstance(specification, (float, int)):
                pass
            # A 1D initializer (e.g. for biases).
            elif isinstance(specification, list):
                array = np.asarray(specification,
                                   dtype=convert_dtype("float32", "np"))
                if array.shape != self.shape:
                    raise RLGraphError(
                        "ERROR: Number/shape of given items ({}) not identical with shape ({})!"
                        .format(array.shape, self.shape))
            # A nD initializer (numpy-array).
            elif isinstance(specification, np.ndarray):
                if specification.shape != self.shape:
                    raise RLGraphError(
                        "ERROR: Shape of given items ({}) not identical with shape ({})!"
                        .format(specification.shape, self.shape))
            # Unknown type.
            else:
                raise RLGraphError(
                    "ERROR: Bad specification given ({}) for Initializer object!"
                    .format(specification))

            # Create the backend initializer object.
            if get_backend() == "tf":
                self.initializer = tf.constant_initializer(
                    value=specification, dtype=convert_dtype("float32"))
            elif get_backend() == "pytorch":
                self.initializer = lambda t: torch.nn.init.constant_(
                    tensor=t, val=specification)
Esempio n. 23
0
        def call(*args):
            if isinstance(self.output_spaces, dict):
                assert method_name in self.output_spaces, "ERROR: Method '{}' not specified in output_spaces: {}!".\
                    format(method_name, self.output_spaces)
                specs = self.output_spaces[method_name]
            else:
                specs = self.output_spaces(method_name)

            if specs is None:
                raise RLGraphError(
                    "No Space information received for method '{}:{}'".format(
                        self.specifiable_class.__name__, method_name))

            dtypes = []
            shapes = []
            return_slots = []
            for i, space in enumerate(force_list(specs)):
                assert not isinstance(space, ContainerSpace)
                # Expecting an op (space 0).
                if space == 0:
                    dtypes.append(0)
                    shapes.append(0)
                    return_slots.append(i)
                # Expecting a tensor.
                elif space is not None:
                    dtypes.append(convert_dtype(space.dtype))
                    shapes.append(space.shape)
                    return_slots.append(i)

            if get_backend() == "tf":
                # This function will send the method-call-comment via the out-pipe to the remote (server) Specifiable
                # object - all in-graph - and return the results to be used further by other graph ops.
                def py_call(*call_args):
                    call_args = [
                        arg.decode('UTF-8') if isinstance(arg, bytes) else arg
                        for arg in call_args
                    ]
                    try:
                        self.out_pipe.send(call_args)
                        received_results = self.out_pipe.recv()

                        # If an error occurred, it'll be passed back through the pipe.
                        if isinstance(received_results, Exception):
                            raise received_results
                        elif received_results is not None:
                            return received_results

                    except Exception as e:
                        if isinstance(e, IOError):
                            raise StopIteration()  # Clean exit.
                        else:
                            print("ERROR: Sent={} Exception={}".format(
                                call_args, e))
                            raise

                results = tf.py_func(py_call, (method_name, ) + tuple(args),
                                     dtypes,
                                     name=method_name)

                # Force known shapes on the returned tensors.
                for i, (result, shape) in enumerate(zip(results, shapes)):
                    # Not an op (which have shape=0).
                    if shape != 0:
                        result.set_shape(shape)
            else:
                raise NotImplementedError

            return results[0] if len(dtypes) == 1 else tuple(results)
Esempio n. 24
0
 def _graph_fn_get_distribution(self, parameters):
     if get_backend() == "tf":
         return tf.distributions.Categorical(
             probs=parameters, dtype=util.convert_dtype("int"))
     elif get_backend() == "pytorch":
         return torch.distributions.Categorical(probs=parameters)
Esempio n. 25
0
def get_space_from_op(op):
    """
    Tries to re-create a Space object given some DataOp (e.g. a tf op).
    This is useful for shape inference on returned ops after having run through a graph_fn.

    Args:
        op (DataOp): The op to create a corresponding Space for.

    Returns:
        Space: The inferred Space object.
    """
    # a Dict
    if isinstance(op, dict):  # DataOpDict
        spec = {}
        add_batch_rank = False
        add_time_rank = False
        for key, value in op.items():
            spec[key] = get_space_from_op(value)
            if spec[key].has_batch_rank:
                add_batch_rank = True
            if spec[key].has_time_rank:
                add_time_rank = True
        return Dict(spec,
                    add_batch_rank=add_batch_rank,
                    add_time_rank=add_time_rank)
    # a Tuple
    elif isinstance(op, tuple):  # DataOpTuple
        spec = []
        add_batch_rank = False
        add_time_rank = False
        for i in op:
            space = get_space_from_op(i)
            if space == 0:
                return 0
            spec.append(space)
            if spec[-1].has_batch_rank:
                add_batch_rank = True
            if spec[-1].has_time_rank:
                add_time_rank = True
        return Tuple(spec,
                     add_batch_rank=add_batch_rank,
                     add_time_rank=add_time_rank)
    # primitive Space -> infer from op dtype and shape
    else:
        # Op itself is a single value, simple python type.
        if isinstance(op, (bool, int, float)):
            return BoxSpace.from_spec(spec=type(op), shape=())
        elif isinstance(op, str):
            raise RLGraphError(
                "Cannot derive Space from non-allowed op ({})!".format(op))
        # A single numpy array.
        elif isinstance(op, np.ndarray):
            return BoxSpace.from_spec(spec=convert_dtype(str(op.dtype), "np"),
                                      shape=op.shape)
        elif isinstance(op, list):
            return try_space_inference_from_list(op)
        # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor).
        # PyTorch Tensors do not have get_shape so must check backend.
        elif hasattr(op, "dtype") is False or (get_backend() == "tf" and
                                               not hasattr(op, "get_shape")):
            return 0
        # Some tensor: can be converted into a BoxSpace.
        else:
            shape = get_shape(op)
            # Unknown shape (e.g. a cond op).
            if shape is None:
                return 0
            add_batch_rank = False
            add_time_rank = False
            time_major = False
            new_shape = list(shape)

            # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are.
            if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int):
                add_batch_rank = True
                new_shape[op._batch_rank] = -1

            # elif get_backend() == "pytorch":
            #     if isinstance(op, torch.Tensor):
            #         if op.dim() > 1 and shape[0] == 1:
            #             add_batch_rank = True
            #             new_shape[0] = 1
            if hasattr(op, "_time_rank") and isinstance(op._time_rank, int):
                add_time_rank = True
                if op._time_rank == 0:
                    time_major = True
                new_shape[op._time_rank] = -1
            shape = tuple(n for n in new_shape if n != -1)

            # Old way: Detect automatically whether the first rank(s) are batch and/or time rank.
            if add_batch_rank is False and add_time_rank is False and shape != (
            ) and shape[0] is None:
                if len(shape) > 1 and shape[1] is None:
                    #raise RLGraphError(
                    #    "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying "
                    #    "which rank goes to which position!"
                    #)
                    shape = shape[2:]
                    add_time_rank = True
                else:
                    shape = shape[1:]
                add_batch_rank = True

            # TODO: If op._batch_rank and/or op._time_rank are not set, set them now.

            base_dtype = op.dtype.base_dtype if hasattr(
                op.dtype, "base_dtype") else op.dtype
            # PyTorch does not have a bool type
            if get_backend() == "pytorch":
                if op.dtype is torch.uint8:
                    base_dtype = bool
            base_dtype_str = str(base_dtype)

            # FloatBox
            if "float" in base_dtype_str:
                return FloatBox(shape=shape,
                                add_batch_rank=add_batch_rank,
                                add_time_rank=add_time_rank,
                                time_major=time_major,
                                dtype=convert_dtype(base_dtype, "np"))
            # IntBox
            elif "int" in base_dtype_str:
                high = getattr(op, "_num_categories", None)
                return IntBox(high,
                              shape=shape,
                              add_batch_rank=add_batch_rank,
                              add_time_rank=add_time_rank,
                              time_major=time_major,
                              dtype=convert_dtype(base_dtype, "np"))
            # a BoolBox
            elif "bool" in base_dtype_str:
                return BoolBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)
            # a TextBox
            elif "string" in base_dtype_str:
                return TextBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)

    raise RLGraphError(
        "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))
Esempio n. 26
0
 def contains(self, sample):
     if self.shape == ():
         return isinstance(sample, (bool, np.bool_))
     else:
         return convert_dtype(sample.dtype, "np") == np.bool_
Esempio n. 27
0
def get_space_from_op(op,
                      read_key_hints=False,
                      dtype=None,
                      low=None,
                      high=None):
    """
    Tries to re-create a Space object given some DataOp (e.g. a tf op).
    This is useful for shape inference on returned ops after having run through a graph_fn.

    Args:
        op (DataOp): The op to create a corresponding Space for.

        read_key_hints (bool): If True, tries to read type- and low/high-hints from the pattern of the Dict keys (str).
            - Preceding "I_": IntBox, "F_": FloatBox, "B_": BoolBox.
            - Succeeding "_low=0.0": Low value.
            - Succeeding "_high=1.0": High value.
            E.g. Dict key "F_somekey_low=0.0_high=2.0" indicates a FloatBox with low=0.0 and high=2.0.
                 Dict key "I_somekey" indicates an intbox with no limits.
                 Dict key "I_somekey_high=5" indicates an intbox with high=5 (values 0-4).

            Default: False.

        dtype (Optional[str]): An optional indicator, what the `dtype` of a BoxSpace should be.
        low (Optional[int,float]): An optional indicator, what the `low` property for a BoxSpace should be.
        high (Optional[int,float]): An optional indicator, what the `high` property for a BoxSpace should be.

    Returns:
        Space: The inferred Space object.
    """
    # a Dict
    if isinstance(op, dict):  # DataOpDict
        spec = {}
        add_batch_rank = False
        add_time_rank = False
        for key, value in op.items():
            # Try to infer hints from the key.
            if read_key_hints is True:
                dtype, low, high = get_space_hints_from_dict_key(key)
            spec[key] = get_space_from_op(value,
                                          dtype=dtype,
                                          low=low,
                                          high=high)
            # Return
            if spec[key] == 0:
                return 0
            if spec[key].has_batch_rank:
                add_batch_rank = True
            if spec[key].has_time_rank:
                add_time_rank = True
        return Dict(spec,
                    add_batch_rank=add_batch_rank,
                    add_time_rank=add_time_rank)
    # a Tuple
    elif isinstance(op, tuple):  # DataOpTuple
        spec = []
        add_batch_rank = False
        add_time_rank = False
        for i in op:
            space = get_space_from_op(i)
            if space == 0:
                return 0
            spec.append(space)
            if spec[-1].has_batch_rank:
                add_batch_rank = True
            if spec[-1].has_time_rank:
                add_time_rank = True
        return Tuple(spec,
                     add_batch_rank=add_batch_rank,
                     add_time_rank=add_time_rank)

    # primitive Space -> infer from op dtype and shape
    else:
        low_high = {}
        if high is not None:
            low_high["high"] = high
        if low is not None:
            low_high["low"] = low
        # Op itself is a single value, simple python type.
        if isinstance(op, (bool, int, float)):
            return BoxSpace.from_spec(spec=(dtype or type(op)),
                                      shape=(),
                                      **low_high)
        elif isinstance(op, str):
            raise RLGraphError(
                "Cannot derive Space from non-allowed op ({})!".format(op))
        # A single numpy array.
        elif isinstance(op, np.ndarray):
            return BoxSpace.from_spec(spec=convert_dtype(str(op.dtype), "np"),
                                      shape=op.shape,
                                      **low_high)
        elif isinstance(op, list):
            return try_space_inference_from_list(op, dtype=dtype, **low_high)
        # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor).
        # PyTorch Tensors do not have get_shape so must check backend.
        elif hasattr(op, "dtype") is False or (get_backend() == "tf" and
                                               not hasattr(op, "get_shape")):
            return 0
        # Some tensor: can be converted into a BoxSpace.
        else:
            shape = get_shape(op)
            # Unknown shape (e.g. a cond op).
            if shape is None:
                return 0
            add_batch_rank = False
            add_time_rank = False
            time_major = False
            new_shape = list(shape)

            # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are.
            if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int):
                add_batch_rank = True
                new_shape[op._batch_rank] = -1

            # elif get_backend() == "pytorch":
            #     if isinstance(op, torch.Tensor):
            #         if op.dim() > 1 and shape[0] == 1:
            #             add_batch_rank = True
            #             new_shape[0] = 1
            if hasattr(op, "_time_rank") and isinstance(op._time_rank, int):
                add_time_rank = True
                if op._time_rank == 0:
                    time_major = True
                new_shape[op._time_rank] = -1
            shape = tuple(n for n in new_shape if n != -1)

            # Old way: Detect automatically whether the first rank(s) are batch and/or time rank.
            if add_batch_rank is False and add_time_rank is False and shape != (
            ) and shape[0] is None:
                if len(shape) > 1 and shape[1] is None:
                    #raise RLGraphError(
                    #    "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying "
                    #    "which rank goes to which position!"
                    #)
                    shape = shape[2:]
                    add_time_rank = True
                else:
                    shape = shape[1:]
                add_batch_rank = True

            # TODO: If op._batch_rank and/or op._time_rank are not set, set them now.

            base_dtype = op.dtype.base_dtype if hasattr(
                op.dtype, "base_dtype") else op.dtype
            # PyTorch does not have a bool type
            if get_backend() == "pytorch":
                if op.dtype is torch.uint8:
                    base_dtype = bool
            base_dtype_str = str(base_dtype)

            # FloatBox
            if "float" in base_dtype_str:
                return FloatBox(shape=shape,
                                add_batch_rank=add_batch_rank,
                                add_time_rank=add_time_rank,
                                time_major=time_major,
                                dtype=convert_dtype(base_dtype, "np"))
            # IntBox
            elif "int" in base_dtype_str:
                high_ = high or getattr(op, "_num_categories", None)
                return IntBox(high_,
                              shape=shape,
                              add_batch_rank=add_batch_rank,
                              add_time_rank=add_time_rank,
                              time_major=time_major,
                              dtype=convert_dtype(base_dtype, "np"))
            # a BoolBox
            elif "bool" in base_dtype_str:
                return BoolBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)
            # a TextBox
            elif "string" in base_dtype_str:
                return TextBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)

    raise RLGraphError(
        "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))