def test_insert_demos(self):
        """
        Tests inserting into the demo memory.
        """
        env = OpenAIGymEnv.from_spec(self.env_spec)

        agent_config = config_from_path("configs/dqfd_agent_for_cartpole.json")
        agent = DQFDAgent.from_spec(
            agent_config,
            state_space=env.state_space,
            action_space=env.action_space
        )
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)

        # Observe a single data point.
        agent.observe_demos(
            preprocessed_states=agent.preprocessed_state_space.with_batch_rank().sample(1),
            actions=env.action_space.with_batch_rank().sample(1),
            rewards=rewards.sample(1),
            next_states=agent.preprocessed_state_space.with_batch_rank().sample(1),
            terminals=terminals.sample(1),
        )

        # Observe a batch of demos.
        agent.observe_demos(
            preprocessed_states=agent.preprocessed_state_space.sample(10),
            actions=env.action_space.sample(10),
            rewards=FloatBox().sample(10),
            terminals=terminals.sample(10),
            next_states=agent.preprocessed_state_space.sample(10)
        )
    def test_post_processing(self):
        """
        Tests external batch post-processing for the PPO agent.
        """
        env = OpenAIGymEnv("Pong-v0",
                           frameskip=4,
                           max_num_noops=30,
                           episodic_life=True)
        agent_config = config_from_path("configs/ppo_agent_for_pong.json")
        agent = PPOAgent.from_spec(agent_config,
                                   state_space=env.state_space,
                                   action_space=env.action_space)
        num_samples = 200
        states = agent.preprocessed_state_space.sample(num_samples)
        reward_space = FloatBox(add_batch_rank=True)
        terminal_space = BoolBox(add_batch_rank=True)
        sequence_indices_space = BoolBox(add_batch_rank=True)

        # GAE is separately tested, just testing if this API method returns results.
        pg_advantages = agent.post_process(
            dict(states=states,
                 rewards=reward_space.sample(num_samples),
                 terminals=terminal_space.sample(num_samples, fill_value=0),
                 sequence_indices=sequence_indices_space.sample(num_samples,
                                                                fill_value=0)))
Beispiel #3
0
    def test_impala_actor_compilation(self):
        """
        Tests IMPALA agent compilation (actor).
        """
        try:
            from rlgraph.environments.deepmind_lab import DeepmindLabEnv
        except ImportError:
            print("Deepmind Lab not installed: Will skip this test.")
            return

        agent_config = config_from_path("configs/impala_agent_for_deepmind_lab_env.json")
        env = DeepmindLabEnv(
            level_id="seekavoid_arena_01", observations=["RGB_INTERLEAVED", "INSTR"], frameskip=4
        )

        actor_agent = IMPALAAgent.from_spec(
            agent_config,
            type="actor",
            state_space=env.state_space,
            action_space=env.action_space,
            internal_states_space=Tuple(FloatBox(shape=(256,)), FloatBox(shape=(256,)), add_batch_rank=True),
            # Make session-creation hang in docker.
            execution_spec=dict(disable_monitoring=True)
        )
        # Start Specifiable Server with Env manually.
        actor_agent.environment_stepper.environment_server.start()
        print("Compiled IMPALA type=actor agent.")
        actor_agent.environment_stepper.environment_server.stop()
Beispiel #4
0
    def test_activation_functions(self):
        # Test single activation functions (no other custom computations in layer).
        space = FloatBox(shape=(3, ), add_batch_rank=True)

        # ReLU.
        relu_layer = NNLayer(activation="relu")
        test = ComponentTest(component=relu_layer,
                             input_spaces=dict(inputs=space))

        input_ = space.sample(size=5)
        expected = relu(input_)
        test.test(("apply", input_), expected_outputs=expected)

        # Again manually in case util numpy-relu is broken.
        input_ = np.array([[1.0, 2.0, -5.0], [-10.0, -100.1, 4.5]])
        expected = np.array([[1.0, 2.0, 0.0], [0.0, 0.0, 4.5]])
        test.test(("apply", input_), expected_outputs=expected)

        # Sigmoid.
        sigmoid_layer = NNLayer(activation="sigmoid")
        test = ComponentTest(component=sigmoid_layer,
                             input_spaces=dict(inputs=space))

        input_ = space.sample(size=10)
        expected = sigmoid(input_)
        test.test(("apply", input_), expected_outputs=expected)
    def test_simple_nn_using_layers(self):
        # Space must contain batch dimension (otherwise, NNlayer will complain).
        space = FloatBox(shape=(4, ), add_batch_rank=True)

        # Create a simple neural net from json.
        nn_layers = config_from_path("configs/test_simple_nn.json")
        neural_net = NeuralNetwork(*nn_layers["layers"])

        # Do not seed, we calculate expectations manually.
        test = ComponentTest(component=neural_net,
                             input_spaces=dict(inputs=space))

        # Batch of size=3.
        input_ = space.sample(4)
        # Calculate output manually.
        var_dict = neural_net.get_variables("hidden-layer/dense/kernel",
                                            "hidden-layer/dense/bias",
                                            global_scope=False)
        w1_value = test.read_variable_values(
            var_dict["hidden-layer/dense/kernel"])
        b1_value = test.read_variable_values(
            var_dict["hidden-layer/dense/bias"])

        expected = dense_layer(input_, w1_value, b1_value)

        test.test(("call", input_), expected_outputs=expected, decimals=5)

        test.terminate()
Beispiel #6
0
    def test_time_rank_folding_for_large_cnn_nn(self):
        width = 86
        height = 86
        time_rank = 20
        input_space = FloatBox(shape=(width, height, 3),
                               add_batch_rank=True,
                               add_time_rank=True,
                               time_major=True)
        base_config = config_from_path("configs/test_3x_cnn_nn.json")
        base_config.insert(0, {"type": "reshape", "fold_time_rank": True})
        base_config.append({
            "type": "reshape",
            "unfold_time_rank": time_rank,
            "time_major": True
        })
        neural_net = NeuralNetwork.from_spec(base_config)

        test = ComponentTest(component=neural_net,
                             input_spaces=dict(nn_input=input_space))

        # Pull a large batch+time ranked sample.
        sample_shape = (time_rank, 256)
        inputs = input_space.sample(sample_shape)

        out = test.test(("call", inputs), expected_outputs=None)["output"]

        self.assertTrue(out.shape == (time_rank, 256, 7 * 7 * 64))
        self.assertTrue(out.dtype == np.float32)
Beispiel #7
0
    def test_maxpool2d_layer(self):
        space = FloatBox(shape=(2, 2, 3),
                         add_batch_rank=True)  # e.g. a simple 3-color image

        # NOTE: Strides shouldn't matter.
        maxpool2d_layer = MaxPool2DLayer(pool_size=2,
                                         strides=2,
                                         padding="valid")
        test = ComponentTest(component=maxpool2d_layer,
                             input_spaces=dict(inputs=space))

        # Batch of 2 sample.
        input_ = space.sample(2)
        item0_ch0 = max(input_[0][0][0][0], input_[0][0][1][0],
                        input_[0][1][0][0], input_[0][1][1][0])
        item0_ch1 = max(input_[0][0][0][1], input_[0][0][1][1],
                        input_[0][1][0][1], input_[0][1][1][1])
        item0_ch2 = max(input_[0][0][0][2], input_[0][0][1][2],
                        input_[0][1][0][2], input_[0][1][1][2])
        item1_ch0 = max(input_[1][0][0][0], input_[1][0][1][0],
                        input_[1][1][0][0], input_[1][1][1][0])
        item1_ch1 = max(input_[1][0][0][1], input_[1][0][1][1],
                        input_[1][1][0][1], input_[1][1][1][1])
        item1_ch2 = max(input_[1][0][0][2], input_[1][0][1][2],
                        input_[1][1][0][2], input_[1][1][1][2])
        expected = np.array([[[[item0_ch0, item0_ch1, item0_ch2]]],
                             [[[item1_ch0, item1_ch1, item1_ch2]]]])
        test.test(("apply", input_), expected_outputs=expected)
    def test_add_layer_to_simple_nn(self):
        # Space must contain batch dimension (otherwise, NNlayer will complain).
        space = FloatBox(shape=(3, ), add_batch_rank=True)

        # Create a simple neural net from json.
        neural_net = NeuralNetwork.from_spec(
            config_from_path(
                "configs/test_simple_nn.json"))  # type: NeuralNetwork
        # Add another layer to it.
        neural_net.add_layer(DenseLayer(units=10, scope="last-layer"))

        # Do not seed, we calculate expectations manually.
        test = ComponentTest(component=neural_net,
                             input_spaces=dict(nn_input=space))

        # Batch of size=3.
        input_ = space.sample(3)
        # Calculate output manually.
        var_dict = test.read_variable_values(neural_net.variable_registry)

        expected = dense_layer(
            dense_layer(input_,
                        var_dict["test-network/hidden-layer/dense/kernel"],
                        var_dict["test-network/hidden-layer/dense/bias"]),
            var_dict["test-network/last-layer/dense/kernel"],
            var_dict["test-network/last-layer/dense/bias"])

        test.test(("apply", input_),
                  expected_outputs=dict(output=expected),
                  decimals=5)

        test.terminate()
    def test_keras_style_two_separate_input_spaces(self):
        # Define two input Spaces first. Independently (no container).
        input_space_1 = IntBox(3, add_batch_rank=True)
        input_space_2 = FloatBox(shape=(4,), add_batch_rank=True)

        # One-hot flatten the int tensor.
        flatten_layer_out = ReShape(flatten=True, flatten_categories=True)(input_space_1)
        # Run the float tensor through two dense layers.
        dense_1_out = DenseLayer(units=3, scope="d1")(input_space_2)
        dense_2_out = DenseLayer(units=5, scope="d2")(dense_1_out)
        # Concat everything.
        cat_out = ConcatLayer()(flatten_layer_out, dense_2_out)

        # Use the `outputs` arg to allow your network to trace back the data flow until the input space.
        neural_net = NeuralNetwork(inputs=[input_space_1, input_space_2], outputs=cat_out)

        test = ComponentTest(component=neural_net, input_spaces=dict(inputs=[input_space_1, input_space_2]))

        var_dict = neural_net.variable_registry
        w1_value = test.read_variable_values(var_dict["neural-network/d1/dense/kernel"])
        b1_value = test.read_variable_values(var_dict["neural-network/d1/dense/bias"])
        w2_value = test.read_variable_values(var_dict["neural-network/d2/dense/kernel"])
        b2_value = test.read_variable_values(var_dict["neural-network/d2/dense/bias"])

        # Batch of size=n.
        input_ = [input_space_1.sample(4), input_space_2.sample(4)]

        expected = np.concatenate([  # concat everything
            one_hot(input_[0]),  # int flattening
            dense_layer(dense_layer(input_[1], w1_value, b1_value), w2_value, b2_value)  # float -> 2 x dense
        ], axis=-1)
        out = test.test(("call", input_), expected_outputs=expected)

        test.terminate()
    def test_keras_style_simple_nn(self):
        # Input Space of the network.
        input_space = FloatBox(shape=(3,), add_batch_rank=True)

        # Create a DenseLayer with a fixed `call` method input space for the arg `inputs`.
        output1 = DenseLayer(units=5, activation="linear", scope="a")(input_space)
        # Create a DenseLayer whose `inputs` arg is the resulting DataOpRec of output1's `call` output.
        output2 = DenseLayer(units=7, activation="relu", scope="b")(output1)

        # This will trace back automatically through the given output DataOpRec(s) and add all components
        # on the way to the input-space to this network.
        neural_net = NeuralNetwork(outputs=output2)

        test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_space))

        # Batch of size=n.
        input_ = input_space.sample(5)
        # Calculate output manually.
        var_dict = neural_net.get_variables("a/dense/kernel", "a/dense/bias", "b/dense/kernel", "b/dense/bias", global_scope=False)
        w1_value = test.read_variable_values(var_dict["a/dense/kernel"])
        b1_value = test.read_variable_values(var_dict["a/dense/bias"])
        w2_value = test.read_variable_values(var_dict["b/dense/kernel"])
        b2_value = test.read_variable_values(var_dict["b/dense/bias"])

        expected = relu(dense_layer(dense_layer(input_, w1_value, b1_value), w2_value, b2_value))

        test.test(("call", input_), expected_outputs=expected, decimals=5)

        test.terminate()
    def test_functional_api_one_output_is_discarded(self):
        # Input Space of the network.
        input_space = FloatBox(shape=(3, ),
                               add_batch_rank=True,
                               add_time_rank=True)

        # Pass input through an LSTM and get two outputs (output and internal states), only one of which will be used.
        lstm_out, _ = LSTMLayer(units=2, return_sequences=False)(input_space)

        # A NN with 1 output (don't return internal_states of LSTM).
        neural_net = NeuralNetwork(outputs=lstm_out)

        test = ComponentTest(component=neural_net,
                             input_spaces=dict(inputs=input_space))

        # Batch of size=n.
        input_ = input_space.sample((5, 3))
        # Calculate output manually.
        var_dict = neural_net.variable_registry
        w1_value = test.read_variable_values(
            var_dict["neural-network/lstm-layer/lstm-cell/kernel"])
        b1_value = test.read_variable_values(
            var_dict["neural-network/lstm-layer/lstm-cell/bias"])

        expected_out, _ = lstm_layer(input_, w1_value, b1_value)
        expected_out = expected_out[:, -1, :]  # last time step only

        # Don't expect internal states (our NN does not return these as per the functional API definition above).
        test.test(("call", input_), expected_outputs=expected_out, decimals=5)

        test.terminate()
    def test_update_online(self):
        """
        Tests if joint updates from demo and online memory work.
        """
        env = OpenAIGymEnv.from_spec(self.env_spec)
        agent_config = config_from_path("configs/dqfd_agent_for_cartpole.json")
        agent = DQFDAgent.from_spec(
            agent_config,
            state_space=env.state_space,
            action_space=env.action_space
        )
        terminals = BoolBox(add_batch_rank=True)

        # Observe a batch of demos.
        agent.observe_demos(
            preprocessed_states=agent.preprocessed_state_space.sample(32),
            actions=env.action_space.sample(32),
            rewards=FloatBox().sample(32),
            terminals=terminals.sample(32),
            next_states=agent.preprocessed_state_space.sample(32)
        )

        # Observe a batch of online data.
        agent._observe_graph(
            preprocessed_states=agent.preprocessed_state_space.sample(32),
            actions=env.action_space.sample(32),
            rewards=FloatBox().sample(32),
            internals=[],
            terminals=terminals.sample(32),
            next_states=agent.preprocessed_state_space.sample(32)
        )
        # Call update.
        agent.update()
    def test_environment_stepper_on_deterministic_env_with_action_probs_lstm(self):
        internal_states_space = Tuple(FloatBox(shape=(3,)), FloatBox(shape=(3,)))
        preprocessor_spec = [dict(type="multiply", factor=0.1)]
        network_spec = config_from_path("configs/test_lstm_nn.json")
        exploration_spec = None
        actor_component = ActorComponent(
            preprocessor_spec,
            dict(network_spec=network_spec, action_space=self.deterministic_env_action_space),
            exploration_spec
        )
        environment_stepper = EnvironmentStepper(
            environment_spec=dict(type="deterministic_env", steps_to_terminal=3),
            actor_component_spec=actor_component,
            state_space=self.deterministic_env_state_space,
            reward_space="float32",
            internal_states_space=internal_states_space,
            add_action_probs=True,
            action_probs_space=self.deterministic_action_probs_space,
            num_steps=4,
        )

        test = ComponentTest(
            component=environment_stepper,
            action_space=self.deterministic_env_action_space,
        )

        weights = test.read_variable_values(environment_stepper.actor_component.policy.variable_registry)
        policy_scope = "environment-stepper/actor-component/policy/"
        weights_lstm = weights[policy_scope+"test-lstm-network/lstm-layer/lstm-cell/kernel"]
        biases_lstm = weights[policy_scope+"test-lstm-network/lstm-layer/lstm-cell/bias"]
        weights_action = weights[policy_scope+"action-adapter-0/action-network/action-layer/dense/kernel"]
        biases_action = weights[policy_scope+"action-adapter-0/action-network/action-layer/dense/bias"]

        # Step 3 times through the Env and collect results.
        lstm_1 = lstm_layer(np.array([[[0.0]]]), weights_lstm, biases_lstm)
        lstm_2 = lstm_layer(np.array([[[0.1]]]), weights_lstm, biases_lstm, lstm_1[1])
        lstm_3 = lstm_layer(np.array([[[0.2]]]), weights_lstm, biases_lstm, lstm_2[1])
        lstm_4 = lstm_layer(np.array([[[0.0]]]), weights_lstm, biases_lstm, lstm_3[1])
        expected = (
            np.array([False, False, True, False]),
            np.array([[0.0], [1.0], [2.0], [0.0], [1.0]]),  # s' (raw)
            np.array([
                softmax(dense_layer(np.squeeze(lstm_1[0]), weights_action, biases_action)),
                softmax(dense_layer(np.squeeze(lstm_2[0]), weights_action, biases_action)),
                softmax(dense_layer(np.squeeze(lstm_3[0]), weights_action, biases_action)),
                softmax(dense_layer(np.squeeze(lstm_4[0]), weights_action, biases_action)),
            ]),  # action probs
            # internal states
            (
                np.squeeze(np.array([[[0.0, 0.0, 0.0]], lstm_1[1][0], lstm_2[1][0], lstm_3[1][0], lstm_4[1][0]])),
                np.squeeze(np.array([[[0.0, 0.0, 0.0]], lstm_1[1][1], lstm_2[1][1], lstm_3[1][1], lstm_4[1][1]]))
            )
        )
        test.test("step", expected_outputs=expected)

        # Make sure we close the session (to shut down the Env on the server).
        test.terminate()
Beispiel #14
0
 def __init__(self, episode_length=5, scale=0.1):
     super(GaussianDensityAsRewardEnvironment,
           self).__init__(state_space=FloatBox(shape=(1, )),
                          action_space=FloatBox(shape=(1, ),
                                                low=-2.0,
                                                high=2.0))
     self.episode_length = episode_length
     self.episode_step = 0
     self.loc = None
     self.scale = scale
Beispiel #15
0
    def test_dummy_nn_layer(self):
        # Tests simple pass through (no activation, no layer (graph_fn) computation).
        space = FloatBox(shape=(3,), add_batch_rank=True)

        # - fixed 1.0 weights, no biases
        dummy_layer = NNLayer(activation=None)
        test = ComponentTest(component=dummy_layer, input_spaces=dict(inputs=space))

        input_ = space.sample(size=5)
        test.test(("apply", input_), expected_outputs=input_)
Beispiel #16
0
    def test_demos_with_container_actions(self):
        # Tests if dqfd can fit a set of states to a set of actions.
        vocab_size = 100
        embed_dim = 128
        # ID/state space.
        state_space = IntBox(vocab_size, shape=(10, ))
        # Container action space.
        actions_space = {}
        num_outputs = 3
        for i in range(3):
            actions_space['action_{}'.format(i)] = IntBox(low=0,
                                                          high=num_outputs)
        actions_space = Dict(actions_space)

        agent_config = config_from_path("configs/dqfd_container.json")
        agent_config["network_spec"] = [
            dict(type="embedding", embed_dim=embed_dim, vocab_size=vocab_size),
            dict(type="reshape", flatten=True),
            dict(type="dense",
                 units=embed_dim,
                 activation="relu",
                 scope="dense_1")
        ]
        agent = DQFDAgent.from_spec(agent_config,
                                    state_space=state_space,
                                    action_space=actions_space)
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)

        # Create a set of demos.
        demo_states = agent.preprocessed_state_space.with_batch_rank().sample(
            20)
        demo_actions = actions_space.with_batch_rank().sample(20)
        demo_rewards = rewards.sample(20, fill_value=1.0)
        demo_next_states = agent.preprocessed_state_space.with_batch_rank(
        ).sample(20)
        demo_terminals = terminals.sample(20, fill_value=False)

        # Insert.
        agent.observe_demos(
            preprocessed_states=demo_states,
            actions=demo_actions,
            rewards=demo_rewards,
            next_states=demo_next_states,
            terminals=demo_terminals,
        )

        # Fit demos.
        agent.update_from_demos(num_updates=5000, batch_size=20)

        # Evaluate demos:
        agent_actions = agent.get_action(demo_states,
                                         apply_preprocessing=False,
                                         use_exploration=False)
        recursive_assert_almost_equal(agent_actions, demo_actions)
Beispiel #17
0
    def test_softmax_on_simple_inputs(self):
        softmax = Softmax()
        input_space = FloatBox(shape=(2, 2, 3), add_batch_rank=True)
        test = ComponentTest(component=softmax,
                             input_spaces=dict(logits=input_space))

        # Batch=5
        inputs = input_space.sample(5)
        expected = softmax_(inputs)
        test.test(("softmax", inputs),
                  expected_outputs=(expected, np.log(expected)))
    def test_multi_lstm_layer(self):
        return  # TODO: finish this test case
        # Tests a double MultiLSTMLayer.
        input_spaces = dict(inputs=FloatBox(shape=(3, ),
                                            add_batch_rank=True,
                                            add_time_rank=True),
                            initial_c_and_h_states=Tuple(
                                Tuple(FloatBox(shape=(5, )),
                                      FloatBox(shape=(5, ))),
                                Tuple(FloatBox(shape=(5, )),
                                      FloatBox(shape=(5, ))),
                                add_batch_rank=True))

        multi_lstm_layer = MultiLSTMLayer(
            num_lstms=2,
            units=5,
            # Full skip connections (x goes into both layers, out0 goes into layer1).
            skip_connections=[[True, False], [True, True]])

        # Do not seed, we calculate expectations manually.
        test = ComponentTest(component=multi_lstm_layer,
                             input_spaces=input_spaces)

        # Batch of size=n, time-steps=m.
        input_ = input_spaces["inputs"].sample((2, 3))

        global_scope = "variational-auto-encoder/"
        # Calculate output manually.
        var_dict = test.read_variable_values(
            multi_lstm_layer.variable_registry)

        encoder_network_out = dense_layer(
            input_, var_dict[global_scope +
                             "encoder-network/encoder-layer/dense/kernel"],
            var_dict[global_scope +
                     "encoder-network/encoder-layer/dense/bias"])
        expected_mean = dense_layer(
            encoder_network_out,
            var_dict[global_scope + "mean-layer/dense/kernel"],
            var_dict[global_scope + "mean-layer/dense/bias"])
        expected_stddev = dense_layer(
            encoder_network_out,
            var_dict[global_scope + "stddev-layer/dense/kernel"],
            var_dict[global_scope + "stddev-layer/dense/bias"])
        out = test.test(("encode", input_), expected_outputs=None)
        recursive_assert_almost_equal(out["mean"], expected_mean, decimals=5)
        recursive_assert_almost_equal(out["stddev"],
                                      expected_stddev,
                                      decimals=5)
        self.assertTrue(out["z_sample"].shape == (3, 1))

        test.terminate()
Beispiel #19
0
    def get_preprocessed_space(self, space):
        # TODO map of allowed conversions in utils?
        if isinstance(space, IntBox):
            if self.to_dtype == "float" or self.to_dtype == "float32" or self.to_dtype == "np.float"\
                    or self.to_dtype == "tf.float32" or self.to_dtype == "torch.float32":
                return FloatBox(shape=space.shape,
                                low=space.low,
                                high=space.high,
                                add_batch_rank=space.has_batch_rank,
                                add_time_rank=space.has_time_rank)
            elif self.to_dtype == "bool":
                if space.low == 0 and space.high == 1:
                    return BoolBox(shape=space.shape,
                                   add_batch_rank=space.has_batch_rank,
                                   add_time_rank=space.has_time_rank)
                else:
                    raise RLGraphError(
                        "ERROR: Conversion from IntBox to BoolBox not allowed if low is not 0 and "
                        "high is not 1.")
        elif isinstance(space, BoolBox):
            if self.to_dtype == "float" or self.to_dtype == "float32" or self.to_dtype == "np.float" \
                 or self.to_dtype == "tf.float32" or self.to_dtype == "torch.float32":
                return FloatBox(shape=space.shape,
                                low=0.0,
                                high=1.0,
                                add_batch_rank=space.has_batch_rank,
                                add_time_rank=space.has_time_rank)
            elif self.to_dtype == "int" or self.to_dtype == "int32" or self.to_dtype  == "np.int32" or \
                    self.to_dtype == "tf.int32" or self.to_dtype == "torch.int32":
                return IntBox(shape=space.shape,
                              low=0,
                              high=1,
                              add_batch_rank=space.has_batch_rank,
                              add_time_rank=space.has_time_rank)
        elif isinstance(space, FloatBox):
            if self.to_dtype == "int" or self.to_dtype == "int32" or self.to_dtype  == "np.int32" or \
                 self.to_dtype == "tf.int32" or self.to_dtype == "torch.int32":
                return IntBox(shape=space.shape,
                              low=space.low,
                              high=space.high,
                              add_batch_rank=space.has_batch_rank,
                              add_time_rank=space.has_time_rank)

        # Wrong conversion.
        else:
            raise RLGraphError(
                "ERROR: Space conversion from: {} to type {} not supported".
                format(space, self.to_dtype))

        # No conversion.
        return space
Beispiel #20
0
    def test_time_rank_folding_for_large_dense_nn(self):
        vector_dim = 256
        input_space = FloatBox(shape=(vector_dim, ),
                               add_batch_rank=True,
                               add_time_rank=True)
        base_config = config_from_path("configs/test_large_dense_nn.json")
        neural_net_wo_folding = NeuralNetwork.from_spec(base_config)

        test = ComponentTest(component=neural_net_wo_folding,
                             input_spaces=dict(nn_input=input_space))

        # Pull a large batch+time ranked sample.
        sample_shape = (256, 200)
        inputs = input_space.sample(sample_shape)

        start = time.monotonic()
        runs = 10
        for _ in range(runs):
            print(".", flush=True, end="")
            test.test(("call", inputs), expected_outputs=None)
        runtime_wo_folding = time.monotonic() - start

        print(
            "\nTesting large dense NN w/o time-rank folding: {}x pass through with {}-data took "
            "{}s".format(runs, sample_shape, runtime_wo_folding))

        neural_net_w_folding = NeuralNetwork.from_spec(base_config)

        # Folded space.
        input_space_folded = FloatBox(shape=(vector_dim, ),
                                      add_batch_rank=True)
        inputs = input_space.sample(sample_shape[0] * sample_shape[1])

        test = ComponentTest(component=neural_net_w_folding,
                             input_spaces=dict(nn_input=input_space_folded))

        start = time.monotonic()
        for _ in range(runs):
            print(".", flush=True, end="")
            test.test(("call", inputs), expected_outputs=None)
        runtime_w_folding = time.monotonic() - start

        print(
            "\nTesting large dense NN w/ time-rank folding: {}x pass through with {}-data took "
            "{}s".format(runs, sample_shape, runtime_w_folding))

        recursive_assert_almost_equal(runtime_w_folding,
                                      runtime_wo_folding,
                                      decimals=0)
    def test_update_from_demos(self):
        """
        Tests the separate API method to update from demos.
        """
        env = OpenAIGymEnv.from_spec(self.env_spec)
        agent_config = config_from_path("configs/dqfd_agent_for_cartpole.json")
        agent = DQFDAgent.from_spec(agent_config,
                                    state_space=env.state_space,
                                    action_space=env.action_space)
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)
        state_1 = agent.preprocessed_state_space.with_batch_rank().sample(1)
        action_1 = [1]
        state_2 = agent.preprocessed_state_space.with_batch_rank().sample(1)
        action_2 = [0]

        # Insert two states with fixed actions and a few random examples.
        for _ in range(10):
            # State with correct action
            agent.observe_demos(
                preprocessed_states=state_1,
                actions=action_1,
                rewards=rewards.sample(1),
                next_states=agent.preprocessed_state_space.with_batch_rank().
                sample(1),
                terminals=terminals.sample(1),
            )
            agent.observe_demos(
                preprocessed_states=state_2,
                actions=action_2,
                rewards=rewards.sample(1),
                next_states=agent.preprocessed_state_space.with_batch_rank().
                sample(1),
                terminals=terminals.sample(1),
            )

        # Update.
        agent.update_from_demos(num_updates=100, batch_size=8)

        # Test if fixed states and actions map.
        action = agent.get_action(states=state_1,
                                  apply_preprocessing=False,
                                  use_exploration=False)
        self.assertEqual(action, action_1)

        action = agent.get_action(states=state_2,
                                  apply_preprocessing=False,
                                  use_exploration=False)
        self.assertEqual(action, action_2)
Beispiel #22
0
    def test_softmax_on_complex_inputs(self):
        softmax = Softmax()
        input_space = Dict(dict(a=FloatBox(shape=(4, 5)),
                                b=FloatBox(shape=(3, ))),
                           add_batch_rank=True,
                           add_time_rank=True)
        test = ComponentTest(component=softmax,
                             input_spaces=dict(logits=input_space))

        inputs = input_space.sample(size=(4, 5))
        expected = dict(a=softmax_(inputs["a"]), b=softmax_(inputs["b"]))
        expected_logs = dict(a=np.log(expected["a"]), b=np.log(expected["b"]))
        test.test(("softmax", inputs),
                  expected_outputs=(expected, expected_logs),
                  decimals=5)
    def test_apply_gradients(self):
        component = DummyWithOptimizer(variable_value=2.0)

        test = ComponentTest(
            component=component,
            input_spaces=dict(input_=FloatBox(add_batch_rank=True)))

        expected_grad = 0.69314718
        expected_outputs = [expected_grad, 2.0]
        test.test(("calc_grads"), expected_outputs=expected_outputs)

        # Now apply the grad and check the variable value.
        expected_loss = np.square(np.log(2.0))
        expected_outputs = [None, expected_loss, expected_loss]
        var_values_before = test.read_variable_values(
            component.variable_registry)
        test.test(("step"), expected_outputs=expected_outputs)

        # Check against variable now. Should change by -learning_rate*grad.
        var_values_after = test.read_variable_values(
            component.variable_registry)
        expected_new_value = var_values_before[
            "dummy-with-optimizer/variable"] - (component.learning_rate *
                                                expected_grad)
        recursive_assert_almost_equal(
            var_values_after["dummy-with-optimizer/variable"],
            expected_new_value,
            decimals=5)
    def test_keras_style_one_container_input_space(self):
        # Define one container input Space.
        input_space = Tuple(IntBox(3), FloatBox(shape=(4,)), add_batch_rank=True)

        # One-hot flatten the int tensor.
        flatten_layer_out = ReShape(flatten=True, flatten_categories=True)(input_space[0])
        # Run the float tensor through two dense layers.
        dense_1_out = DenseLayer(units=3, scope="d1")(input_space[1])
        dense_2_out = DenseLayer(units=5, scope="d2")(dense_1_out)
        # Concat everything.
        cat_out = ConcatLayer()(flatten_layer_out, dense_2_out)

        # Use the `outputs` arg to allow your network to trace back the data flow until the input space.
        # `inputs` is not needed  here as we only have one single input (the Tuple).
        neural_net = NeuralNetwork(outputs=cat_out)

        test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_space))

        var_dict = neural_net.variable_registry
        w1_value = test.read_variable_values(var_dict["neural-network/d1/dense/kernel"])
        b1_value = test.read_variable_values(var_dict["neural-network/d1/dense/bias"])
        w2_value = test.read_variable_values(var_dict["neural-network/d2/dense/kernel"])
        b2_value = test.read_variable_values(var_dict["neural-network/d2/dense/bias"])

        # Batch of size=n.
        input_ = input_space.sample(4)

        expected = np.concatenate([  # concat everything
            one_hot(input_[0]),  # int flattening
            dense_layer(dense_layer(input_[1], w1_value, b1_value), w2_value, b2_value)  # float -> 2 x dense
        ], axis=-1)
        out = test.test(("call", tuple([input_])), expected_outputs=expected)

        test.terminate()
Beispiel #25
0
    def test_sac_2x2_grid_world_with_container_actions(self):
        """
        Creates a SAC agent and runs it via a Runner on a simple 2x2 GridWorld using container actions.
        """
        # ftj = forward + turn + jump
        env_spec = dict(world="2x2", action_type="ftj", state_representation="xy+orientation")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path("configs/sac_agent_for_2x2_gridworld_with_container_actions.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")

        agent = SACAgent.from_spec(
            agent_config,
            state_space=FloatBox(shape=(4,)),
            action_space=dummy_env.action_space,
        )

        time_steps = 10000
        worker = SingleThreadedWorker(
            env_spec=lambda: GridWorld.from_spec(env_spec),
            agent=agent,
            preprocessing_spec=preprocessing_spec,
            worker_executes_preprocessing=False,
            render=False
        )
        results = worker.execute_timesteps(time_steps, use_exploration=True)
        print(results)
    def test_simple_nn(self):
        # Space must contain batch dimension (otherwise, NNlayer will complain).
        space = FloatBox(shape=(3, ), add_batch_rank=True)

        # Create a simple neural net from json.
        neural_net = NeuralNetwork.from_spec(
            config_from_path(
                "configs/test_simple_nn.json"))  # type: NeuralNetwork

        # Do not seed, we calculate expectations manually.
        test = ComponentTest(component=neural_net,
                             input_spaces=dict(nn_input=space))

        # Batch of size=3.
        input_ = np.array([[0.1, 0.2, 0.3], [1.0, 2.0, 3.0],
                           [10.0, 20.0, 30.0]])
        # Calculate output manually.
        var_dict = neural_net.get_variables("hidden-layer/dense/kernel",
                                            "hidden-layer/dense/bias",
                                            global_scope=False)
        w1_value = test.read_variable_values(
            var_dict["hidden-layer/dense/kernel"])
        b1_value = test.read_variable_values(
            var_dict["hidden-layer/dense/bias"])

        expected = dense_layer(input_, w1_value, b1_value)

        test.test(("apply", input_),
                  expected_outputs=dict(output=expected),
                  decimals=5)

        test.terminate()
Beispiel #27
0
    def test_memory_compilation(self):
        # Builds a memory and returns build stats.
        env = OpenAIGymEnv("Pong-v0",
                           frameskip=4,
                           max_num_noops=30,
                           episodic_life=True)

        record_space = Dict(states=env.state_space,
                            actions=env.action_space,
                            rewards=float,
                            terminals=BoolBox(),
                            add_batch_rank=True)
        input_spaces = dict(
            # insert: records
            records=record_space,
            # get_records: num_records
            num_records=int,
            # update_records: indices, update
            indices=IntBox(add_batch_rank=True),
            update=FloatBox(add_batch_rank=True))

        input_spaces.pop("num_records")
        memory = MemPrioritizedReplay(capacity=20000, )
        test = ComponentTest(component=memory,
                             input_spaces=input_spaces,
                             auto_build=False)
        return test.build()
Beispiel #28
0
    def test_residual_layer(self):
        # Input space to residual layer (with 2-repeat [simple Conv2D layer]-residual-unit).
        input_space = FloatBox(shape=(2, 2, 3), add_batch_rank=True)

        residual_unit = Conv2DLayer(filters=3, kernel_size=1, strides=1, padding="same",
                                    kernel_spec=0.5, biases_spec=1.0)
        residual_layer = ResidualLayer(residual_unit=residual_unit, repeats=2)
        test = ComponentTest(component=residual_layer, input_spaces=dict(inputs=input_space))

        # Batch of 2 samples.
        inputs = np.array(
            [
                [[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], [[0.7, 0.8, 0.9], [1.1, 1.2, 1.3]]],
                [[[1.1, 1.2, 1.3], [2.4, 2.5, 2.6]], [[-0.7, -0.8, -0.9], [3.1, 3.2, 3.3]]]
            ]
        )

        """
        Calculation:
        1st_conv2d = sum-over-last-axis(input) * 0.5 + 1.0 -> tile last axis 3x
        2nd_conv2d = sum-over-last-axis(2nd_conv2d) * 0.5 + 1.0 -> tile last axis 3x
        output: 2nd_conv2d + input
        """
        conv2d_1 = np.tile(np.sum(inputs, axis=3, keepdims=True) * 0.5 + 1.0, (1, 1, 1, 3))
        conv2d_2 = np.tile(np.sum(conv2d_1, axis=3, keepdims=True) * 0.5 + 1.0, (1, 1, 1, 3))
        expected = conv2d_2 + inputs
        test.test(("apply", inputs), expected_outputs=expected, decimals=5)
Beispiel #29
0
    def test_lstm_layer(self):
        # 0th rank=batch-rank; 1st rank=time/sequence-rank; 2nd-nth rank=data.
        batch_size = 3
        sequence_length = 2
        input_space = FloatBox(shape=(3, ),
                               add_batch_rank=True,
                               add_time_rank=True)

        lstm_layer_component = LSTMLayer(units=5)
        test = ComponentTest(component=lstm_layer_component,
                             input_spaces=dict(inputs=input_space))

        # Batch of n samples.
        inputs = np.ones(shape=(batch_size, sequence_length, 3))

        # First matmul the inputs times the LSTM matrix:
        var_values = test.read_variable_values(lstm_layer_component.variables)
        lstm_matrix = var_values["lstm-layer/lstm-cell/kernel"]
        lstm_biases = var_values["lstm-layer/lstm-cell/bias"]

        expected_outputs, expected_internal_states = lstm_layer(
            inputs, lstm_matrix, lstm_biases, time_major=False)

        expected = dict(output=expected_outputs,
                        last_internal_states=expected_internal_states)
        test.test(("apply", inputs), expected_outputs=expected)
Beispiel #30
0
    def test_conv2d_layer(self):
        # Space must contain batch dimension (otherwise, NNlayer will complain).
        space = FloatBox(shape=(2, 2, 3),
                         add_batch_rank=True)  # e.g. a simple 3-color image

        conv2d_layer = Conv2DLayer(filters=4,
                                   kernel_size=2,
                                   strides=1,
                                   padding="valid",
                                   kernel_spec=0.5,
                                   biases_spec=False)
        test = ComponentTest(component=conv2d_layer,
                             input_spaces=dict(inputs=space))

        # Batch of 2 samples.
        input_ = np.array([
            [
                [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],  # sample 1 (2x2x3)
                [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]]
            ],
            [
                [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],  # sample 2 (2x2x3)
                [[0.7, 0.8, 0.9], [1.00, 1.10, 1.20]]
            ]
        ])
        expected = np.array([
            [[[39.0, 39.0, 39.0, 39.0]]],  # output 1 (1x1x4)
            [[[3.9, 3.9, 3.9, 3.9]]],  # output 2 (1x1x4)
        ])
        test.test(("apply", input_), expected_outputs=expected)