def setup_preprocessor(self, preprocessing_spec, in_space):
     if preprocessing_spec is not None:
         # TODO move ingraph for python component assembly.
         preprocessing_spec = deepcopy(preprocessing_spec)
         in_space = deepcopy(in_space)
         # Set scopes.
         scopes = [
             preprocessor["scope"] for preprocessor in preprocessing_spec
         ]
         # Set backend to python.
         for spec in preprocessing_spec:
             spec["backend"] = "python"
         processor_stack = PreprocessorStack(*preprocessing_spec,
                                             backend="python")
         build_space = in_space
         for sub_comp_scope in scopes:
             processor_stack.sub_components[
                 sub_comp_scope].create_variables(
                     input_spaces=dict(preprocessing_inputs=build_space),
                     action_space=None)
             build_space = processor_stack.sub_components[
                 sub_comp_scope].get_preprocessed_space(build_space)
         processor_stack.reset()
         return processor_stack
     else:
         return None
    def __init__(self,
                 preprocessor_spec,
                 policy_spec,
                 exploration_spec=None,
                 **kwargs):
        """
        Args:
            preprocessor_spec (Union[list,dict,PreprocessorSpec]):
                - A dict if the state from the Env will come in as a ContainerSpace (e.g. Dict). In this case, each
                    each key in this dict specifies, which value in the incoming dict should go through which PreprocessorStack.
                - A list with layer specs.
                - A PreprocessorStack object.

            policy_spec (Union[dict,Policy]): A specification dict for a Policy object or a Policy object directly.

            exploration_spec (Union[dict,Exploration]): A specification dict for an Exploration object or an Exploration
                object directly.
        """
        super(ActorComponent,
              self).__init__(scope=kwargs.pop("scope", "actor-component"),
                             **kwargs)

        self.preprocessor = PreprocessorStack.from_spec(preprocessor_spec)
        self.policy = Policy.from_spec(policy_spec)
        self.num_nn_inputs = self.policy.neural_network.num_inputs
        self.exploration = Exploration.from_spec(exploration_spec)

        self.tuple_merger = ContainerMerger(is_tuple=True,
                                            merge_tuples_into_one=True)
        self.tuple_splitter = ContainerSplitter(
            tuple_length=self.num_nn_inputs)

        self.add_components(self.policy, self.exploration, self.preprocessor,
                            self.tuple_merger, self.tuple_splitter)
    def __init__(self,
                 preprocessor_spec,
                 policy_spec,
                 exploration_spec,
                 max_likelihood=None,
                 **kwargs):
        """
        Args:
            preprocessor_spec (Union[list,dict,PreprocessorSpec]):
                - A dict if the state from the Env will come in as a ContainerSpace (e.g. Dict). In this case, each
                    each key in this dict specifies, which value in the incoming dict should go through which PreprocessorStack.
                - A list with layer specs.
                - A PreprocessorStack object.
            policy_spec (Union[dict,Policy]): A specification dict for a Policy object or a Policy object directly.
            exploration_spec (Union[dict,Exploration]): A specification dict for an Exploration object or an Exploration
                object directly.
            max_likelihood (Optional[bool]): See Policy's property `max_likelihood`.
                If not None, overwrites the equally named setting in the Policy object (defined by `policy_spec`).
        """
        super(ActorComponent,
              self).__init__(scope=kwargs.pop("scope", "actor-component"),
                             **kwargs)

        self.preprocessor = PreprocessorStack.from_spec(preprocessor_spec)
        self.policy = Policy.from_spec(policy_spec)
        self.exploration = Exploration.from_spec(exploration_spec)

        self.max_likelihood = max_likelihood

        self.add_components(self.policy, self.exploration, self.preprocessor)
    def test_actor_component_with_lstm_network(self):
        # state space and internal state space
        state_space = FloatBox(shape=(2,), add_batch_rank=True, add_time_rank=True, time_major=False)
        internal_states_space = Tuple(FloatBox(shape=(3,)), FloatBox(shape=(3,)), add_batch_rank=True)
        time_percentages_space = FloatBox()
        # action_space.
        action_space = IntBox(2, add_batch_rank=True, add_time_rank=True)

        preprocessor = PreprocessorStack.from_spec(
            [dict(type="convert_type", to_dtype="float"), dict(type="divide", divisor=10)]
        )
        policy = Policy(network_spec=config_from_path("configs/test_lstm_nn.json"), action_space=action_space)
        exploration = Exploration(epsilon_spec=dict(decay_spec=dict(
            type="linear_decay", from_=1.0, to_=0.1)
        ))
        actor_component = ActorComponent(preprocessor, policy, exploration)
        test = ComponentTest(
            component=actor_component,
            input_spaces=dict(
                states=state_space,
                other_nn_inputs=Tuple(internal_states_space, add_batch_rank=True),
                time_percentage=time_percentages_space
            ),
            action_space=action_space
        )
        # Some state inputs (batch size=2, seq-len=1000; batch-major).
        np.random.seed(10)
        states = state_space.sample(size=(1000, 2))
        initial_internal_states = internal_states_space.zeros(size=2)  # only batch
        time_percentages = time_percentages_space.sample(1000)

        # Run n times a single time-step to simulate acting and env interaction with an LSTM.
        preprocessed_states = np.ndarray(shape=(1000, 2, 2), dtype=np.float)
        actions = np.ndarray(shape=(1000, 2, 1), dtype=np.int)
        for i, time_percentage in enumerate(time_percentages):
            ret = test.test((
                "get_preprocessed_state_and_action",
                # expand time dim at 1st slot as we are time-major == False
                [np.expand_dims(states[i], 1), tuple([initial_internal_states]), time_percentage]
            ))
            preprocessed_states[i] = ret["preprocessed_state"][:, 0, :]  # take out time-rank again ()
            actions[i] = ret["action"]
            # Check c/h-state shape.
            self.assertEqual(ret["nn_outputs"][1][0].shape, (2, 3))  # batch-size=2, LSTM units=3
            self.assertEqual(ret["nn_outputs"][1][1].shape, (2, 3))

        # Check all preprocessed states (easy: just divided by 10).
        expected_preprocessed_state = states / 10
        recursive_assert_almost_equal(preprocessed_states, expected_preprocessed_state)

        # Check the exploration functionality over the actions.
        # Not checking mean as we are mostly in the non-exploratory region, that's why the stddev should be small.
        stddev_actions = actions.std()
        self.assertGreater(stddev_actions, 0.4)
        self.assertLess(stddev_actions, 0.6)
Exemple #5
0
    def __init__(self, preprocessors, **kwargs):
        """
        Args:
            preprocessors (dict):

        Raises:
            RLGraphError: If a sub-component is not a PreprocessLayer object.
        """
        # Create one separate PreprocessorStack per given key.
        # All possibly other keys in an input will be pass through un-preprocessed.
        self.flattened_preprocessors = flatten_op(preprocessors)
        for i, (flat_key, spec) in enumerate(self.flattened_preprocessors.items()):
            self.flattened_preprocessors[flat_key] = PreprocessorStack.from_spec(
                spec, scope="preprocessor-stack-{}".format(i)
            )

        # NOTE: No automatic API-methods. Define them all ourselves.
        kwargs["api_methods"] = {}
        default_dict(kwargs, dict(scope=kwargs.pop("scope", "dict-preprocessor-stack")))
        super(DictPreprocessorStack, self).__init__(*list(self.flattened_preprocessors.values()), **kwargs)
Exemple #6
0
    def test_simple_actor_component(self):
        # state_space (NN is a simple single fc-layer relu network (2 units), random biases, random weights).
        state_space = FloatBox(shape=(5, ), add_batch_rank=True)
        # action_space.
        action_space = IntBox(10)

        preprocessor = PreprocessorStack.from_spec([
            dict(type="convert_type", to_dtype="float"),
            dict(type="multiply", factor=2)
        ])
        policy = Policy(
            network_spec=config_from_path("configs/test_simple_nn.json"),
            action_space=action_space)
        exploration = Exploration()  # no exploration
        actor_component = ActorComponent(preprocessor, policy, exploration)
        test = ComponentTest(component=actor_component,
                             input_spaces=dict(states=state_space),
                             action_space=action_space)
        # Get and check some actions.
        actor_component_params = test.read_variable_values(
            actor_component.variables)

        # Some state inputs (5 input nodes, batch size=2).
        states = state_space.sample(2)
        # Expected NN-output.
        expected_nn_output = np.matmul(
            states * 2, actor_component_params[
                "actor-component/policy/test-network/hidden-layer/dense/kernel"]
        )
        # Raw action layer output.
        expected_action_layer_output = np.matmul(
            expected_nn_output, actor_component_params[
                "actor-component/policy/action-adapter-0/action-network/action-layer/dense/kernel"]
        )
        # Final actions (max-likelihood/greedy pick).
        expected_actions = np.argmax(expected_action_layer_output, axis=-1)
        expected_preprocessed_state = states * 2
        test.test(("get_preprocessed_state_and_action", states),
                  expected_outputs=dict(
                      preprocessed_state=expected_preprocessed_state,
                      action=expected_actions))

        # Get actions and action-probs by calling a different API-method.
        states = state_space.sample(5)
        # Get and check some actions.
        actor_component_params = test.read_variable_values(
            actor_component.variables)
        # Expected NN-output.
        expected_nn_output = np.matmul(
            states * 2, actor_component_params[
                "actor-component/policy/test-network/hidden-layer/dense/kernel"]
        )
        # Raw action layer output.
        expected_action_layer_output = np.matmul(
            expected_nn_output, actor_component_params[
                "actor-component/policy/action-adapter-0/action-network/action-layer/dense/kernel"]
        )
        # No reshape necessary (simple action space), softmax to get probs.
        expected_action_probs = softmax(expected_action_layer_output)
        # Final actions (max-likelihood/greedy pick).
        expected_actions = np.argmax(expected_action_layer_output, axis=-1)
        expected_preprocessed_state = states * 2
        test.test(("get_preprocessed_state_action_and_action_probs", states),
                  expected_outputs=dict(
                      preprocessed_state=expected_preprocessed_state,
                      action=expected_actions,
                      action_probs=expected_action_probs))