def test_sync_functionality(self):
        # Two Components, one with Synchronizable dropped in:
        # A: Can only push out values.
        # B: To be synced by A's values.
        sync_from = MyCompWithVars(scope="sync-from")
        sync_to = MyCompWithVars(initializer1=8.0, initializer2=7.0, scope="sync-to", synchronizable=True)

        # Create a dummy test component that contains our two Synchronizables.
        container = Component(name="container")
        container.add_components(sync_from, sync_to)

        @rlgraph_api(component=container)
        def execute_sync(self):
            values_ = sync_from.variables()
            return sync_to.sync(values_)

        test = ComponentTest(component=container)

        # Test syncing the variable from->to and check them before and after the sync.
        # Before the sync.
        test.variable_test(sync_to.get_variables(VARIABLE_NAMES), {
            "sync-to/"+VARIABLE_NAMES[0]: np.full(shape=sync_from.space.shape, fill_value=8.0),
            "sync-to/"+VARIABLE_NAMES[1]: np.full(shape=sync_from.space.shape, fill_value=7.0)
        })

        # Now sync and re-check.
        test.test("execute_sync", expected_outputs=None)

        # After the sync.
        test.variable_test(sync_to.get_variables(VARIABLE_NAMES), {
            "sync-to/"+VARIABLE_NAMES[0]: np.zeros(shape=sync_from.space.shape),
            "sync-to/"+VARIABLE_NAMES[1]: np.ones(shape=sync_from.space.shape)
        })
 def read_variable_values(self, variables):
     # For test compatibility.
     if isinstance(variables, dict):
         ret = {}
         for name, var in variables.items():
             ret[name] = Component.read_variable(var)
         return ret
     elif isinstance(variables, list):
         return [Component.read_variable(var) for var in variables]
     else:
         # Attempt to read as single var.
         return Component.read_variable(variables)
Beispiel #3
0
    def test_exploration_with_continuous_action_space(self):
        # TODO not portable, redo with more general mean/stddev checks over a sample of distributed outputs.
        return
        # 2x2 action-pick, each composite action with 5 categories.
        action_space = FloatBox(shape=(2,2), add_batch_rank=True)

        distribution = Normal()
        action_adapter = ActionAdapter(action_space=action_space)

        # Our distribution to go into the Exploration object.
        nn_output_space = FloatBox(shape=(13,), add_batch_rank=True)  # 13: Any flat nn-output should be ok.

        exploration = Exploration.from_spec(dict(noise_spec=dict(type="gaussian_noise", mean=10.0, stddev=2.0)))

        # The Component to test.
        exploration_pipeline = Component(scope="continuous-plus-noise")
        exploration_pipeline.add_components(action_adapter, distribution, exploration, scope="exploration-pipeline")

        @rlgraph_api(component=exploration_pipeline)
        def get_action(self_, nn_output):
            _, parameters, _ = action_adapter.get_logits_probabilities_log_probs(nn_output)
            sample_stochastic = distribution.sample_stochastic(parameters)
            sample_deterministic = distribution.sample_deterministic(parameters)
            action = exploration.get_action(sample_stochastic, sample_deterministic)
            return action

        @rlgraph_api(component=exploration_pipeline)
        def get_noise(self_):
            return exploration.noise_component.get_noise()

        test = ComponentTest(component=exploration_pipeline, input_spaces=dict(nn_output=nn_output_space),
                             action_space=action_space)

        # Collect outputs in `collected` list to compare moments.
        collected = list()
        for _ in range_(1000):
            test.test("get_noise", fn_test=lambda component_test, outs: collected.append(outs))

        self.assertAlmostEqual(10.0, np.mean(collected), places=1)
        self.assertAlmostEqual(2.0, np.std(collected), places=1)

        np.random.seed(10)
        input_ = nn_output_space.sample(size=3)
        expected = np.array([[[13.163095, 8.46925],
                              [10.375976, 5.4675055]],
                             [[13.239931, 7.990649],
                              [10.03761, 10.465796]],
                             [[10.280741, 7.2384844],
                              [10.040194, 8.248206]]], dtype=np.float32)
        test.test(("get_action", input_), expected_outputs=expected, decimals=3)
Beispiel #4
0
    def test_copying_a_component(self):
        # Flatten a simple 2x2 FloatBox to (4,).
        space = FloatBox(shape=(2, 2), add_batch_rank=False)

        flatten_orig = ReShape(flatten=True, scope="A")
        flatten_copy = flatten_orig.copy(scope="B")
        container = Component(flatten_orig, flatten_copy)

        @rlgraph_api(component=container)
        def flatten1(self, input_):
            return self.sub_components["A"].call(input_)

        @rlgraph_api(component=container)
        def flatten2(self, input_):
            return self.sub_components["B"].call(input_)

        test = ComponentTest(component=container,
                             input_spaces=dict(input_=space))

        input_ = dict(input1=np.array([[0.5, 2.0], [1.0, 2.0]]),
                      input2=np.array([[1.0, 2.0], [3.0, 4.0]]))
        expected = dict(output1=np.array([0.5, 2.0, 1.0, 2.0]),
                        output2=np.array([1.0, 2.0, 3.0, 4.0]))
        for i in range_(1, 3):
            test.test(("flatten" + str(i), input_["input" + str(i)]),
                      expected_outputs=expected["output" + str(i)])
    def test_sync_between_2_identical_comps_that_have_vars_only_in_their_sub_comps(self):
        """
        Similar to the Policy scenario, where the Policy Component owns a NeuralNetwork (which has vars)
        and has to be synced with other Policies.
        """
        # Create 2x: A custom Component (with vars) that holds another Component (with vars).
        # Then sync between them.
        comp1 = MyCompWithVars(scope="A")
        comp1.add_components(MyCompWithVars(scope="sub-of-A-with-vars"))

        comp2_writable = MyCompWithVars(scope="B", initializer1=3.0, initializer2=4.2, synchronizable=True)
        comp2_writable.add_components(MyCompWithVars(scope="sub-of-B-with-vars", initializer1=5.0, initializer2=6.2))

        container = Component(comp1, comp2_writable, scope="container")

        @rlgraph_api(component=container)
        def execute_sync(self):
            values_ = comp1.variables()
            return comp2_writable.sync(values_)

        test = ComponentTest(component=container)

        # Before the sync.
        test.variable_test(comp2_writable.get_variables([
            "container/B/variable_to_sync1",
            "container/B/variable_to_sync2",
            "container/B/sub-of-B-with-vars/variable_to_sync1",
            "container/B/sub-of-B-with-vars/variable_to_sync2"
        ]), {
            "container/B/variable_to_sync1": np.full(shape=comp1.space.shape, fill_value=3.0, dtype=np.float32),
            "container/B/variable_to_sync2": np.full(shape=comp1.space.shape, fill_value=4.2, dtype=np.float32),
            "container/B/sub-of-B-with-vars/variable_to_sync1": np.full(shape=comp1.space.shape, fill_value=5.0,
                                                                        dtype=np.float32),
            "container/B/sub-of-B-with-vars/variable_to_sync2": np.full(shape=comp1.space.shape, fill_value=6.2,
                                                                        dtype=np.float32)
        })

        # Now sync and re-check.
        test.test(("execute_sync", None), expected_outputs=None)

        # After the sync.
        test.variable_test(comp2_writable.get_variables([
            "container/B/variable_to_sync1",
            "container/B/variable_to_sync2",
            "container/B/sub-of-B-with-vars/variable_to_sync1",
            "container/B/sub-of-B-with-vars/variable_to_sync2"
        ]), {
            "container/B/variable_to_sync1": np.zeros(shape=comp1.space.shape, dtype=np.float32),
            "container/B/variable_to_sync2": np.ones(shape=comp1.space.shape, dtype=np.float32),
            "container/B/sub-of-B-with-vars/variable_to_sync1": np.zeros(shape=comp1.space.shape, dtype=np.float32),
            "container/B/sub-of-B-with-vars/variable_to_sync2": np.ones(shape=comp1.space.shape, dtype=np.float32)
        })
Beispiel #6
0
    def test_call_in_comprehension(self):
        container = Component(scope="container")
        sub_comps = [Dummy1To1(scope="dummy-{}".format(i)) for i in range(3)]
        container.add_components(*sub_comps)

        # Define container's API:
        @rlgraph_api(name="test", component=container)
        def container_test(self_, input_):
            # results = []
            # for i in range(len(sub_comps)):
            #     results.append(sub_comps[i].run(input_))
            results = [x.run(input_) for x in sub_comps]
            return self_._graph_fn_sum(*results)

        @graph_fn(component=container)
        def _graph_fn_sum(self_, *inputs):
            return sum(inputs)

        test = ComponentTest(component=container,
                             input_spaces=dict(input_=float))
        test.test(("test", 1.23),
                  expected_outputs=len(sub_comps) * (1.23 + 1),
                  decimals=2)
    def test_exploration_with_discrete_action_space(self):
        nn_output_space = FloatBox(shape=(13, ), add_batch_rank=True)
        time_step_space = IntBox(10000)
        # 2x2 action-pick, each composite action with 5 categories.
        action_space = IntBox(5, shape=(2, 2), add_batch_rank=True)

        # Our distribution to go into the Exploration object.
        distribution = Categorical()
        action_adapter = ActionAdapter(action_space=action_space)

        exploration = Exploration.from_spec(
            dict(epsilon_spec=dict(decay_spec=dict(type="linear_decay",
                                                   from_=1.0,
                                                   to_=0.0,
                                                   start_timestep=0,
                                                   num_timesteps=10000))))
        # The Component to test.
        exploration_pipeline = Component(action_adapter,
                                         distribution,
                                         exploration,
                                         scope="exploration-pipeline")

        @rlgraph_api(component=exploration_pipeline)
        def get_action(self_, nn_output, time_step):
            out = action_adapter.get_logits_probabilities_log_probs(nn_output)
            sample = distribution.sample_deterministic(out["probabilities"])
            action = exploration.get_action(sample, time_step)
            return action

        test = ComponentTest(component=exploration_pipeline,
                             input_spaces=dict(nn_output=nn_output_space,
                                               time_step=int),
                             action_space=action_space)

        # With exploration: Check, whether actions are equally distributed.
        nn_outputs = nn_output_space.sample(2)
        time_steps = time_step_space.sample(30)
        # Collect action-batch-of-2 for each of our various random time steps.
        # Each action is an int box of shape=(2,2)
        actions = np.ndarray(shape=(30, 2, 2, 2), dtype=np.int)
        for i, time_step in enumerate(time_steps):
            actions[i] = test.test(("get_action", [nn_outputs, time_step]),
                                   expected_outputs=None)

        # Assert some distribution of the actions.
        mean_action = actions.mean()
        stddev_action = actions.std()
        self.assertAlmostEqual(mean_action, 2.0, places=0)
        self.assertAlmostEqual(stddev_action, 1.0, places=0)

        # Without exploration (epsilon is force-set to 0.0): Check, whether actions are always the same
        # (given same nn_output all the time).
        nn_outputs = nn_output_space.sample(2)
        time_steps = time_step_space.sample(30) + 10000
        # Collect action-batch-of-2 for each of our various random time steps.
        # Each action is an int box of shape=(2,2)
        actions = np.ndarray(shape=(30, 2, 2, 2), dtype=np.int)
        for i, time_step in enumerate(time_steps):
            actions[i] = test.test(("get_action", [nn_outputs, time_step]),
                                   expected_outputs=None)

        # Assert zero stddev of the single action components.
        stddev_action_a = actions[:, 0, 0, 0].std(
        )  # batch item 0, action-component (0,0)
        self.assertAlmostEqual(stddev_action_a, 0.0, places=1)
        stddev_action_b = actions[:, 1, 1, 0].std(
        )  # batch item 1, action-component (1,0)
        self.assertAlmostEqual(stddev_action_b, 0.0, places=1)
        stddev_action_c = actions[:, 0, 0, 1].std(
        )  # batch item 0, action-component (0,1)
        self.assertAlmostEqual(stddev_action_c, 0.0, places=1)
        stddev_action_d = actions[:, 1, 1, 1].std(
        )  # batch item 1, action-component (1,1)
        self.assertAlmostEqual(stddev_action_d, 0.0, places=1)
        self.assertAlmostEqual(actions.std(), 1.0, places=0)
    def test_exploration_with_discrete_container_action_space(self):
        nn_output_space = FloatBox(shape=(12, ), add_batch_rank=True)
        time_step_space = IntBox(10000)
        # Some container action space.
        action_space = Dict(dict(a=IntBox(3), b=IntBox(2), c=IntBox(4)),
                            add_batch_rank=True)

        # Our distribution to go into the Exploration object.
        distribution_a = Categorical(scope="d_a")
        distribution_b = Categorical(scope="d_b")
        distribution_c = Categorical(scope="d_c")
        action_adapter_a = ActionAdapter(action_space=action_space["a"],
                                         scope="aa_a")
        action_adapter_b = ActionAdapter(action_space=action_space["b"],
                                         scope="aa_b")
        action_adapter_c = ActionAdapter(action_space=action_space["c"],
                                         scope="aa_c")

        exploration = Exploration.from_spec(
            dict(epsilon_spec=dict(decay_spec=dict(type="linear_decay",
                                                   from_=1.0,
                                                   to_=0.0,
                                                   start_timestep=0,
                                                   num_timesteps=10000))))
        # The Component to test.
        exploration_pipeline = Component(action_adapter_a,
                                         action_adapter_b,
                                         action_adapter_c,
                                         distribution_a,
                                         distribution_b,
                                         distribution_c,
                                         exploration,
                                         scope="exploration-pipeline")

        @rlgraph_api(component=exploration_pipeline)
        def get_action(self_, nn_output, time_step):
            out_a = action_adapter_a.get_logits_probabilities_log_probs(
                nn_output)
            out_b = action_adapter_b.get_logits_probabilities_log_probs(
                nn_output)
            out_c = action_adapter_c.get_logits_probabilities_log_probs(
                nn_output)
            sample_a = distribution_a.sample_deterministic(
                out_a["probabilities"])
            sample_b = distribution_b.sample_deterministic(
                out_b["probabilities"])
            sample_c = distribution_c.sample_deterministic(
                out_c["probabilities"])
            sample = self_._graph_fn_merge_actions(sample_a, sample_b,
                                                   sample_c)
            action = exploration.get_action(sample, time_step)
            return action

        @graph_fn(component=exploration_pipeline)
        def _graph_fn_merge_actions(self, a, b, c):
            return DataOpDict(a=a, b=b, c=c)

        test = ComponentTest(component=exploration_pipeline,
                             input_spaces=dict(nn_output=nn_output_space,
                                               time_step=int),
                             action_space=action_space)

        # With exploration: Check, whether actions are equally distributed.
        batch_size = 2
        num_time_steps = 30
        nn_outputs = nn_output_space.sample(batch_size)
        time_steps = time_step_space.sample(num_time_steps)
        # Collect action-batch-of-2 for each of our various random time steps.
        actions_a = np.ndarray(shape=(num_time_steps, batch_size),
                               dtype=np.int)
        actions_b = np.ndarray(shape=(num_time_steps, batch_size),
                               dtype=np.int)
        actions_c = np.ndarray(shape=(num_time_steps, batch_size),
                               dtype=np.int)
        for i, t in enumerate(time_steps):
            a = test.test(("get_action", [nn_outputs, t]),
                          expected_outputs=None)
            actions_a[i] = a["a"]
            actions_b[i] = a["b"]
            actions_c[i] = a["c"]

        # Assert some distribution of the actions.
        mean_action_a = actions_a.mean()
        stddev_action_a = actions_a.std()
        self.assertAlmostEqual(mean_action_a, 1.0, places=0)
        self.assertAlmostEqual(stddev_action_a, 1.0, places=0)
        mean_action_b = actions_b.mean()
        stddev_action_b = actions_b.std()
        self.assertAlmostEqual(mean_action_b, 0.5, places=0)
        self.assertAlmostEqual(stddev_action_b, 0.5, places=0)
        mean_action_c = actions_c.mean()
        stddev_action_c = actions_c.std()
        self.assertAlmostEqual(mean_action_c, 1.5, places=0)
        self.assertAlmostEqual(stddev_action_c, 1.0, places=0)

        # Without exploration (epsilon is force-set to 0.0): Check, whether actions are always the same
        # (given same nn_output all the time).
        nn_outputs = nn_output_space.sample(batch_size)
        time_steps = time_step_space.sample(num_time_steps) + 10000
        # Collect action-batch-of-2 for each of our various random time steps.
        actions_a = np.ndarray(shape=(num_time_steps, batch_size),
                               dtype=np.int)
        actions_b = np.ndarray(shape=(num_time_steps, batch_size),
                               dtype=np.int)
        actions_c = np.ndarray(shape=(num_time_steps, batch_size),
                               dtype=np.int)
        for i, t in enumerate(time_steps):
            a = test.test(("get_action", [nn_outputs, t]),
                          expected_outputs=None)
            actions_a[i] = a["a"]
            actions_b[i] = a["b"]
            actions_c[i] = a["c"]

        # Assert zero stddev of the single action components.
        stddev_action = actions_a[:,
                                  0].std()  # batch item 0, action-component a
        self.assertAlmostEqual(stddev_action, 0.0, places=1)
        stddev_action = actions_a[:,
                                  1].std()  # batch item 1, action-component a
        self.assertAlmostEqual(stddev_action, 0.0, places=1)

        stddev_action = actions_b[:,
                                  0].std()  # batch item 0, action-component b
        self.assertAlmostEqual(stddev_action, 0.0, places=1)
        stddev_action = actions_b[:,
                                  1].std()  # batch item 1, action-component b
        self.assertAlmostEqual(stddev_action, 0.0, places=1)

        stddev_action = actions_c[:,
                                  0].std()  # batch item 0, action-component c
        self.assertAlmostEqual(stddev_action, 0.0, places=1)
        stddev_action = actions_c[:,
                                  1].std()  # batch item 1, action-component c
        self.assertAlmostEqual(stddev_action, 0.0, places=1)