コード例 #1
0
    def test_memory_compilation(self):
        # Builds a memory and returns build stats.
        env = OpenAIGymEnv("Pong-v0",
                           frameskip=4,
                           max_num_noops=30,
                           episodic_life=True)

        record_space = Dict(states=env.state_space,
                            actions=env.action_space,
                            rewards=float,
                            terminals=BoolBox(),
                            add_batch_rank=True)
        input_spaces = dict(
            # insert: records
            records=record_space,
            # get_records: num_records
            num_records=int,
            # update_records: indices, update
            indices=IntBox(add_batch_rank=True),
            update=FloatBox(add_batch_rank=True))

        input_spaces.pop("num_records")
        memory = MemPrioritizedReplay(capacity=20000, )
        test = ComponentTest(component=memory,
                             input_spaces=input_spaces,
                             auto_build=False)
        return test.build()
コード例 #2
0
    def test_multi_input_stream_neural_network_with_dict(self):
        # Space must contain batch dimension (otherwise, NNlayer will complain).
        input_space = Dict(
            a=FloatBox(shape=(3,)),
            b=IntBox(4, shape=()),
            add_batch_rank=True
        )

        multi_input_nn = MultiInputStreamNeuralNetwork(
            input_network_specs=dict(
                a=[],
                b=[{"type": "reshape", "flatten": True, "flatten_categories": True}]
            ),
            post_network_spec=[{"type": "dense", "units": 2}],
        )

        test = ComponentTest(component=multi_input_nn, input_spaces=dict(inputs=input_space))

        # Batch of size=n.
        nn_inputs = input_space.sample(5)

        global_scope = "multi-input-stream-nn/post-concat-nn/dense-layer/dense/"
        # Calculate output manually.
        var_dict = test.read_variable_values()

        b_flat = one_hot(nn_inputs["b"], depth=4)
        concat_out = np.concatenate((nn_inputs["a"], b_flat), axis=-1)
        expected = dense_layer(concat_out, var_dict[global_scope+"kernel"], var_dict[global_scope+"bias"])

        test.test(("call", nn_inputs), expected_outputs=expected)

        test.terminate()
コード例 #3
0
    def create_variables(self, input_spaces, action_space=None):
        super(MultiGpuSynchronizer,
              self).create_variables(input_spaces, action_space)

        # Get input space to load device fun.
        device_input_space = {}
        idx = 0
        while True:
            key = "inputs[{}]".format(idx)
            if key not in input_spaces:
                break
            device_input_space[str(idx)] = input_spaces[key]
            idx += 1
        # Turn into container space for easy variable creation.
        self.device_input_space = Dict(device_input_space)

        # Create input variables for devices.
        for i, device in enumerate(self.gpu_devices):
            with tf.device(device):
                device_variable = self.get_variable(
                    name="gpu-placeholder-{}".format(i),
                    trainable=False,
                    from_space=self.device_input_space,
                    flatten=True,
                    add_batch_rank=self.shard_size,
                    initializer=0)
                self.tower_placeholders.append(tuple(device_variable.values()))
コード例 #4
0
ファイル: ppo_agent.py プロジェクト: mugenZebra/rlgraph
    def __init__(self, clip_ratio, memory_spec=None, **kwargs):
        """
        Args:
            memory_spec (Optional[dict,Memory]): The spec for the Memory to use for the PPO algorithm.
        """
        super(PPOAgent, self).__init__(name=kwargs.pop("name", "ppo-agent"),
                                       **kwargs)

        self.train_time_steps = 0

        # PPO uses a ring buffer.
        self.memory = Memory.from_spec(memory_spec)
        self.record_space = Dict(states=self.state_space,
                                 actions=self.action_space,
                                 rewards=float,
                                 terminals=BoolBox(),
                                 add_batch_rank=False)

        self.policy = Policy(network_spec=self.neural_network,
                             action_adapter_spec=None)

        self.merger = DictMerger(output_space=self.record_space)
        splitter_input_space = copy.deepcopy(self.record_space)
        self.splitter = ContainerSplitter(input_space=splitter_input_space)
        self.loss_function = PPOLossFunction(clip_ratio=clip_ratio,
                                             discount=self.discount)

        self.define_graph_api()
        if self.auto_build:
            self._build_graph()
            self.graph_built = True
コード例 #5
0
    def test_demos_with_container_actions(self):
        # Tests if dqfd can fit a set of states to a set of actions.
        vocab_size = 100
        embed_dim = 128
        # ID/state space.
        state_space = IntBox(vocab_size, shape=(10, ))
        # Container action space.
        actions_space = {}
        num_outputs = 3
        for i in range(3):
            actions_space['action_{}'.format(i)] = IntBox(low=0,
                                                          high=num_outputs)
        actions_space = Dict(actions_space)

        agent_config = config_from_path("configs/dqfd_container.json")
        agent_config["network_spec"] = [
            dict(type="embedding", embed_dim=embed_dim, vocab_size=vocab_size),
            dict(type="reshape", flatten=True),
            dict(type="dense",
                 units=embed_dim,
                 activation="relu",
                 scope="dense_1")
        ]
        agent = DQFDAgent.from_spec(agent_config,
                                    state_space=state_space,
                                    action_space=actions_space)
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)

        # Create a set of demos.
        demo_states = agent.preprocessed_state_space.with_batch_rank().sample(
            20)
        demo_actions = actions_space.with_batch_rank().sample(20)
        demo_rewards = rewards.sample(20, fill_value=1.0)
        demo_next_states = agent.preprocessed_state_space.with_batch_rank(
        ).sample(20)
        demo_terminals = terminals.sample(20, fill_value=False)

        # Insert.
        agent.observe_demos(
            preprocessed_states=demo_states,
            actions=demo_actions,
            rewards=demo_rewards,
            next_states=demo_next_states,
            terminals=demo_terminals,
        )

        # Fit demos.
        agent.update_from_demos(num_updates=5000, batch_size=20)

        # Evaluate demos:
        agent_actions = agent.get_action(demo_states,
                                         apply_preprocessing=False,
                                         use_exploration=False)
        recursive_assert_almost_equal(agent_actions, demo_actions)
コード例 #6
0
    def test_replay(self):
        """
        Tests individual and chunked insert and sampling performance of replay memory.
        """
        record_space = Dict(states=self.env.state_space,
                            actions=self.env.action_space,
                            reward=float,
                            terminals=BoolBox(),
                            add_batch_rank=True)
        input_spaces = dict(insert_records=record_space, get_records=int)

        memory = ReplayMemory(capacity=self.capacity, next_states=True)
        test = ComponentTest(component=memory,
                             input_spaces=input_spaces,
                             enable_profiler=self.enable_profiler)

        records = [record_space.sample(size=1) for _ in range(self.inserts)]
        start = time.monotonic()
        for record in records:
            test.test(("insert_records", record), expected_outputs=None)
        end = time.monotonic() - start

        tp = len(records) / end
        print('#### Testing Replay memory ####')
        print('Testing insert performance:')
        print(
            'Inserted {} separate records, throughput: {} records/s, total time: {} s'
            .format(len(records), tp, end))

        record_chunks = [
            record_space.sample(size=self.chunk_size)
            for _ in range(self.inserts)
        ]
        start = time.monotonic()
        for chunk in record_chunks:
            test.test(("insert_records", chunk), expected_outputs=None)
        end = time.monotonic() - start

        tp = len(record_chunks) * self.chunk_size / end
        print(
            'Inserted {} record chunks of size {}, throughput: {} records/s, total time: {} s'
            .format(len(record_chunks), self.chunk_size, tp, end))

        print('Testing sample performance:')
        start = time.monotonic()
        for _ in range(self.samples):
            test.test(("get_records", self.sample_batch_size),
                      expected_outputs=None)
        end = time.monotonic() - start
        tp = self.samples / end

        print(
            'Sampled {} batches of size {}, throughput: {} sample-ops/s, total time: {} s'
            .format(self.samples, self.sample_batch_size, tp, end))
コード例 #7
0
    def __init__(self, file_name=None, worker_id=0, base_port=5005, seed=0, docker_training=False, no_graphics=False,
                 timeout_wait=30, train_mode=True, **kwargs):
        """
        Args:
            file_name (Optional[str]): Name of Unity environment binary.
            base_port (int): Port number to connect to Unity environment. `worker_id` increments on top of this.
            worker_id (int): Number to add to `base_port`. Used for asynchronous agent scenarios.
            docker_training (bool): Informs this class, whether the process is being run within a container.
                Default: False.
            no_graphics (bool): Whether to run the Unity simulator in no-graphics mode. Default: False.
            timeout_wait (int): Time (in seconds) to wait for connection from environment.
            train_mode (bool): Whether to run in training mode, speeding up the simulation. Default: True.
        """
        # First create the UnityMLAgentsEnvironment to get state and action spaces, then create RLgraph Environment
        # instance.
        self.mlagents_env = UnityEnvironment(
            file_name, worker_id, base_port, seed, docker_training, no_graphics
        )
        all_brain_info = self.mlagents_env.reset()
        # Get all possible information from AllBrainInfo.
        # TODO: Which scene do we pick?
        self.scene_key = next(iter(all_brain_info))
        first_brain_info = all_brain_info[self.scene_key]
        num_environments = len(first_brain_info.agents)

        state_space = {}
        if len(first_brain_info.vector_observations[0]) > 0:
            state_space["vector"] = get_space_from_op(first_brain_info.vector_observations[0])
            # TODO: This is a hack.
            if state_space["vector"].dtype == np.float64:
                state_space["vector"].dtype = np.float32
        if len(first_brain_info.visual_observations) > 0:
            state_space["visual"] = get_space_from_op(first_brain_info.visual_observations[0])
        if first_brain_info.text_observations[0]:
            state_space["text"] = get_space_from_op(first_brain_info.text_observations[0])

        if len(state_space) == 1:
            self.state_key = next(iter(state_space))
            state_space = state_space[self.state_key]
        else:
            self.state_key = None
            state_space = Dict(state_space)
        action_space = get_space_from_op(first_brain_info.action_masks[0])
        if action_space.dtype == np.float64:
            action_space.dtype = np.float32

        super(MLAgentsEnv, self).__init__(
            num_environments=num_environments, state_space=state_space, action_space=action_space, **kwargs
        )

        # Caches the last observation we made (after stepping or resetting).
        self.last_state = []
コード例 #8
0
    def test_ppo_on_container_state_and_action_spaces_and_very_large_rewards(
            self):
        """
        Tests stability of PPO on an extreme env producing strange container states and large rewards and requiring
        container actions.
        """
        env = RandomEnv(
            state_space=Dict(
                {"F_position": FloatBox(shape=(2, ), low=0.01, high=0.02)}),
            action_space=Dict({
                "F_direction_low-1.0_high1.0":
                FloatBox(shape=(), low=-1.0, high=1.0),
                "F_forward_direction_low-1.0_high1.0":
                FloatBox(shape=(), low=-1.0, high=1.0),
                "B_jump":
                BoolBox()
            }),
            reward_space=FloatBox(low=-1000.0,
                                  high=-100000.0),  # hugely negative rewards
            terminal_prob=0.0000001)

        agent_config = config_from_path(
            "configs/ppo_agent_for_random_env_with_container_spaces.json")
        agent = PPOAgent.from_spec(agent_config,
                                   state_space=env.state_space,
                                   action_space=env.action_space)

        worker = SingleThreadedWorker(
            env_spec=lambda: env,
            agent=agent,
            preprocessing_spec=None,
            worker_executes_preprocessing=True,
            #episode_finish_callback=lambda episode_return, duration, timesteps, env_num:
            #print("episode return {}; steps={}".format(episode_return, timesteps))
        )
        results = worker.execute_timesteps(num_timesteps=int(1e6),
                                           use_exploration=True)

        print(results)
コード例 #9
0
ファイル: test_softmax.py プロジェクト: samialabed/rlgraph
    def test_softmax_on_complex_inputs(self):
        softmax = Softmax()
        input_space = Dict(dict(a=FloatBox(shape=(4, 5)),
                                b=FloatBox(shape=(3, ))),
                           add_batch_rank=True,
                           add_time_rank=True)
        test = ComponentTest(component=softmax,
                             input_spaces=dict(logits=input_space))

        inputs = input_space.sample(size=(4, 5))
        expected = dict(a=softmax_(inputs["a"]), b=softmax_(inputs["b"]))
        expected_logs = dict(a=np.log(expected["a"]), b=np.log(expected["b"]))
        test.test(("softmax", inputs),
                  expected_outputs=(expected, expected_logs),
                  decimals=5)
コード例 #10
0
    def test_update_throughput(self):
        env = Environment.from_spec(self.env_spec)
        # TODO comment in for multi gpu
        # config_from_path("configs/multi_gpu_ray_apex_for_pong.json"),
        config = config_from_path("configs/ray_apex_for_pong.json")

        # Adjust to usable GPUs for test system.
        num_gpus = [1]
        for gpu_count in num_gpus:
            config["execution_spec"]["gpu_spec"]["num_gpus"] = gpu_count
            config["execution_spec"]["gpu_spec"]["per_process_gpu_memory_fraction"] = 1.0 / gpu_count

            agent = Agent.from_spec(
                # TODO replace with config from above
                config_from_path("configs/ray_apex_for_pong.json"),
                state_space=env.state_space,
                # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
                action_space=env.action_space
            )

            batch_space = Dict(
                states=agent.preprocessed_state_space,
                actions=env.action_space,
                rewards=FloatBox(),
                next_states=agent.preprocessed_state_space,
                terminals=IntBox(low=0, high=1),
                importance_weights=FloatBox(),
                add_batch_rank=True
            )

            batch_size = 512 * gpu_count
            num_samples = 50
            samples = [batch_space.sample(batch_size) for _ in range(num_samples)]

            times = []
            throughputs = []
            for sample in samples:
                start = time.perf_counter()
                agent.update(sample)
                runtime = time.perf_counter() - start
                times.append(runtime)
                throughputs.append(batch_size / runtime)

            print("Throughput: {} samples / s ({}) for {} GPUs".format(np.mean(throughputs),
                                                                       np.std(throughputs), gpu_count))
コード例 #11
0
    def test_calculate_gradients(self):
        return
        optimizer = GradientDescentOptimizer(learning_rate=0.01)

        x = tf.Variable(2, name='x', dtype=tf.float32)
        log_x = tf.log(x)
        loss = tf.square(x=log_x)

        test = ComponentTest(component=optimizer,
                             input_spaces=dict(
                                 loss=FloatBox(),
                                 variables=Dict({"x": FloatBox()}),
                                 loss_per_item=FloatBox(add_batch_rank=True),
                                 grads_and_vars=Tuple(Tuple(float, float))))

        print(
            test.test(("calculate_gradients", [dict(x=x), loss]),
                      expected_outputs=None))
コード例 #12
0
    def get_preprocessed_space(self, space):
        """
        Returns the Space obtained after pushing the input through all layers of this Stack.

        Args:
            space (Dict): The incoming Space object.

        Returns:
            Space: The Space after preprocessing.
        """
        assert isinstance(space, ContainerSpace)
        dict_spec = dict()
        for flat_key, sub_space in space.flatten().items():
            if flat_key in self.flattened_preprocessors:
                dict_spec[flat_key] = self.flattened_preprocessors[flat_key].get_preprocessed_space(sub_space)
            else:
                dict_spec[flat_key] = sub_space
        dict_spec = unflatten_op(dict_spec)
        return Dict(dict_spec)
コード例 #13
0
    def test_concat_layer_with_dict_input(self):
        # Spaces must contain batch dimension (otherwise, NNlayer will complain).
        input_space = Dict(
            {
                "a": FloatBox(shape=(2, 3)),
                "b": FloatBox(shape=(2, 1)),
                "c": FloatBox(shape=(2, 2)),
            },
            add_batch_rank=True)

        concat_layer = ConcatLayer(dict_keys=["c", "a",
                                              "b"])  # some crazy order
        test = ComponentTest(component=concat_layer,
                             input_spaces=dict(inputs=input_space))

        # Batch of n samples to concatenate.
        inputs = input_space.sample(4)
        expected = np.concatenate((inputs["c"], inputs["a"], inputs["b"]),
                                  axis=-1)
        test.test(("apply", tuple([inputs])), expected_outputs=expected)
コード例 #14
0
    def test_sampler_component(self):
        input_space = Dict(states=dict(state1=float, state2=float),
                           actions=dict(action1=float),
                           reward=float,
                           terminals=BoolBox(),
                           add_batch_rank=True)

        sampler = Sampler()
        test = ComponentTest(component=sampler,
                             input_spaces=dict(sample_size=int,
                                               inputs=input_space))

        samples = input_space.sample(size=100)
        sample = test.test(("sample", [10, samples]), expected_outputs=None)

        self.assertEqual(len(sample["actions"]["action1"]), 10)
        self.assertEqual(len(sample["states"]["state1"]), 10)
        self.assertEqual(len(sample["terminals"]), 10)

        print(sample)
コード例 #15
0
    def build_output_tokens(self):
        """
        Tokenizes vocabulary used for action representations for output of Q-network

        Exposed through self.system_spec and self.actions_spec

        Recall action representation maps index field (a candidate index field) to a decision
        e.g. 
            suppose allow indices on up to 3 cols, allow indices to be ASC or DESC
            then the action is specified in [0,6], where 0 corresponds to noop, 
            {1,2} correspond to an ASC or DESC index on 1st query attribute, 
            {3,4} correspond to an ASC or DESC index on 2nd query attribute, etc.

            {0:1, 1:0:, 2:0} is an action specifying an index (ascending index) on 1st query attributes,
            and noops for the 2 remaining allowed columns for the compound index 

        n.b. actions_spec comes from action branching architectures https://arxiv.org/abs/1711.08946
        TODO dig deeper into that

        """

        noop_idx = 0
        idxs = []
        self.actions_spec = {}

        # not sure whether ASC / DESC can be specified
        # see LIFT paper for this representation in particular
        n_outputs = 1 + self.max_fields_per_index  # 1 + 2 * self.max_fields_per_index
        for i in range(self.max_fields_per_index):
            idxs.append('index_column{}'.format(i))

            self.actions_spec['index_column{}'.format(i)] = IntBox(
                low=0, high=n_outputs)

        # ?
        self.actions_spec = Dict(self.actions_spec, add_batch_rank=True)

        self.system_spec['idxs'] = idxs
        self.system_spec['n_outputs'] = n_outputs
        self.system_spec['noop_idx'] = noop_idx
        self.system_spec['max_fields_per_index'] = self.max_fields_per_index
コード例 #16
0
    def test_container_actions(self):
        # Test container actions with embedding.

        vocab_size = 100
        embed_dim = 128
        # ID/state space.
        state_space = IntBox(vocab_size, shape=(10, ))
        # Container action space.
        actions_space = {}
        num_outputs = 3
        for i in range(3):
            actions_space['action_{}'.format(i)] = IntBox(low=0,
                                                          high=num_outputs)
        actions_space = Dict(actions_space)

        agent_config = config_from_path("configs/dqfd_container.json")
        agent_config["network_spec"] = [
            dict(type="embedding", embed_dim=embed_dim, vocab_size=vocab_size),
            dict(type="reshape", flatten=True),
            dict(type="dense",
                 units=embed_dim,
                 activation="relu",
                 scope="dense_1")
        ]
        agent = DQFDAgent.from_spec(agent_config,
                                    state_space=state_space,
                                    action_space=actions_space)
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)

        agent.observe_demos(
            preprocessed_states=agent.preprocessed_state_space.with_batch_rank(
            ).sample(1),
            actions=actions_space.with_batch_rank().sample(1),
            rewards=rewards.sample(1),
            next_states=agent.preprocessed_state_space.with_batch_rank().
            sample(1),
            terminals=terminals.sample(1),
        )
コード例 #17
0
    def create_variables(self, input_spaces, action_space=None):
        # Store our record-space for convenience.
        self.record_space = input_spaces["records"]
        self.record_space_flat = Dict(self.record_space.flatten(
            custom_scope_separator="/", scope_separator_at_start=False),
                                      add_batch_rank=True)
        self.priority_capacity = 1

        while self.priority_capacity < self.capacity:
            self.priority_capacity *= 2

        # Create segment trees, initialize with neutral elements.
        sum_values = [0.0 for _ in range_(2 * self.priority_capacity)]
        sum_segment_tree = MemSegmentTree(sum_values, self.priority_capacity,
                                          operator.add)
        min_values = [float('inf') for _ in range_(2 * self.priority_capacity)]
        min_segment_tree = MemSegmentTree(min_values, self.priority_capacity,
                                          min)

        self.merged_segment_tree = MinSumSegmentTree(
            sum_tree=sum_segment_tree,
            min_tree=min_segment_tree,
            capacity=self.priority_capacity)
コード例 #18
0
    def test_custom_margin_demos_with_container_actions(self):
        # Tests if using different margins per sample works.
        # Same state, but different
        vocab_size = 100
        embed_dim = 8
        # ID/state space.
        state_space = IntBox(vocab_size, shape=(10,))
        # Container action space.
        actions_space = {}
        num_outputs = 3
        for i in range(3):
            actions_space['action_{}'.format(i)] = IntBox(
                low=0,
                high=num_outputs
            )
        actions_space = Dict(actions_space)

        agent_config = config_from_path("configs/dqfd_container.json")
        agent_config["network_spec"] = [
            dict(type="embedding", embed_dim=embed_dim, vocab_size=vocab_size),
            dict(type="reshape", flatten=True),
            dict(type="dense", units=embed_dim, activation="relu", scope="dense_1")
        ]
        agent = DQFDAgent.from_spec(
            agent_config,
            state_space=state_space,
            action_space=actions_space
        )
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)

        # Create a set of demos.
        demo_states = agent.preprocessed_state_space.with_batch_rank().sample(2)
        # Same state.
        demo_states[1] = demo_states[0]
        demo_actions = actions_space.with_batch_rank().sample(2)

        for name, action in actions_space.items():
            demo_actions[name][0] = 0
            demo_actions[name][1] = 1

        demo_rewards = rewards.sample(2, fill_value=.0)
        # One action has positive reward, one negative
        demo_rewards[0] = 0
        demo_rewards[1] = 0

        # One action is encouraged, one is discouraged.
        margins = np.asarray([0.5, -0.5])

        demo_next_states = agent.preprocessed_state_space.with_batch_rank().sample(2)
        demo_terminals = terminals.sample(2, fill_value=False)

        # When using margins, need to use external batch.
        batch = dict(
            states=demo_states,
            actions=demo_actions,
            rewards=demo_rewards,
            next_states=demo_next_states,
            importance_weights=np.ones_like(demo_rewards),
            terminals=demo_terminals,
        )
        # Fit demos with custom margins.
        for _ in range(10000):
            agent.update(batch=batch, update_from_demos=False, apply_demo_loss_to_batch=True, expert_margins=margins)

        # Evaluate demos for the state -> should have action with positive reward.
        agent_actions = agent.get_action(np.array([demo_states[0]]), apply_preprocessing=False, use_exploration=False)
        print("learned action = ", agent_actions)
コード例 #19
0
    def test_functional_api_multi_stream_nn(self):
        # Input Space of the network.
        input_space = Dict(
            {
                "img": FloatBox(shape=(6, 6, 3)),  # some RGB img
                "txt": TextBox()  # some text
            },
            add_batch_rank=True,
            add_time_rank=True)

        img, txt = ContainerSplitter("img", "txt")(input_space)
        # Complex NN assembly via our Keras-style functional API.
        # Fold text input into single batch rank.
        folded_text = ReShape(fold_time_rank=True)(txt)
        # String layer will create batched AND time-ranked (individual words) hash outputs (int64).
        string_bucket_out, lengths = StringToHashBucket(
            num_hash_buckets=5)(folded_text)
        # Batched and time-ranked embedding output (floats) with embed dim=n.
        embedding_out = EmbeddingLookup(embed_dim=10,
                                        vocab_size=5)(string_bucket_out)
        # Pass embeddings through a text LSTM and use last output (reduce time-rank).
        string_lstm_out, _ = LSTMLayer(units=2,
                                       return_sequences=False,
                                       scope="lstm-layer-txt")(
                                           embedding_out,
                                           sequence_length=lengths)
        # Unfold to get original time-rank back.
        string_lstm_out_unfolded = ReShape(unfold_time_rank=True)(
            string_lstm_out, txt)

        # Parallel image stream via 1 CNN layer plus dense.
        folded_img = ReShape(fold_time_rank=True, scope="img-fold")(img)
        cnn_out = Conv2DLayer(filters=1, kernel_size=2, strides=2)(folded_img)
        unfolded_cnn_out = ReShape(unfold_time_rank=True,
                                   scope="img-unfold")(cnn_out, img)
        unfolded_cnn_out_flattened = ReShape(
            flatten=True, scope="img-flat")(unfolded_cnn_out)
        dense_out = DenseLayer(units=2,
                               scope="dense-0")(unfolded_cnn_out_flattened)

        # Concat everything.
        concat_out = ConcatLayer()(string_lstm_out_unfolded, dense_out)

        # LSTM output has batch+time.
        main_lstm_out, internal_states = LSTMLayer(
            units=2, scope="lstm-layer-main")(concat_out)

        dense1_after_lstm_out = DenseLayer(units=3,
                                           scope="dense-1")(main_lstm_out)
        dense2_after_lstm_out = DenseLayer(
            units=2, scope="dense-2")(dense1_after_lstm_out)
        dense3_after_lstm_out = DenseLayer(
            units=1, scope="dense-3")(dense2_after_lstm_out)

        # A NN with 2 outputs.
        neural_net = NeuralNetwork(
            outputs=[dense3_after_lstm_out, main_lstm_out, internal_states])

        test = ComponentTest(component=neural_net,
                             input_spaces=dict(inputs=input_space))

        # Batch of size=n.
        sample_shape = (4, 2)
        input_ = input_space.sample(sample_shape)

        out = test.test(("call", input_), expected_outputs=None)
        # Main output (Dense out after LSTM).
        self.assertTrue(out[0].shape == sample_shape +
                        (1, ))  # 1=1 unit in dense layer
        self.assertTrue(out[0].dtype == np.float32)
        # main-LSTM out.
        self.assertTrue(out[1].shape == sample_shape + (2, ))  # 2=2 LSTM units
        self.assertTrue(out[1].dtype == np.float32)
        # main-LSTM internal-states.
        self.assertTrue(out[2][0].shape == sample_shape[:1] +
                        (2, ))  # 2=2 LSTM units
        self.assertTrue(out[2][0].dtype == np.float32)
        self.assertTrue(out[2][1].shape == sample_shape[:1] +
                        (2, ))  # 2=2 LSTM units
        self.assertTrue(out[2][1].dtype == np.float32)

        test.terminate()
コード例 #20
0
class TestReplayMemory(unittest.TestCase):
    """
    Tests sampling and insertion behaviour of the replay_memory module.
    """
    record_space = Dict(
        states=dict(state1=float, state2=float),
        actions=dict(action1=float),
        reward=float,
        terminals=BoolBox(),
        next_states=dict(state1=float, state2=float),
        add_batch_rank=True
    )
    memory_variables = ["size", "index"]
    capacity = 10

    input_spaces = dict(
        records=record_space,
        num_records=int
    )

    def test_insert(self):
        """
        Simply tests insert op without checking internal logic.
        """
        memory = ReplayMemory(
            capacity=self.capacity,
        )
        test = ComponentTest(component=memory, input_spaces=self.input_spaces)

        observation = self.record_space.sample(size=1)
        test.test(("insert_records", observation), expected_outputs=None)

        observation = self.record_space.sample(size=100)
        test.test(("insert_records", observation), expected_outputs=None)

    def test_capacity(self):
        """
        Tests if insert correctly manages capacity.
        """
        memory = ReplayMemory(
            capacity=self.capacity
        )
        test = ComponentTest(component=memory, input_spaces=self.input_spaces)
        # Internal state variables.
        memory_variables = memory.get_variables(self.memory_variables, global_scope=False)
        buffer_size = memory_variables['size']
        buffer_index = memory_variables['index']
        size_value, index_value = test.read_variable_values(buffer_size, buffer_index)

        # Assert indices 0 before insert.
        self.assertEqual(size_value, 0)
        self.assertEqual(index_value, 0)

        # Insert one more element than capacity
        observation = self.record_space.sample(size=self.capacity + 1)
        test.test(("insert_records", observation), expected_outputs=None)

        size_value, index_value = test.read_variable_values(buffer_size, buffer_index)
        # Size should be equivalent to capacity when full.
        self.assertEqual(size_value, self.capacity)

        # Index should be one over capacity due to modulo.
        self.assertEqual(index_value, 1)

    def test_batch_retrieve(self):
        """
        Tests if retrieval correctly manages capacity.
        """
        memory = ReplayMemory(
            capacity=self.capacity
        )
        test = ComponentTest(component=memory, input_spaces=self.input_spaces)

        # Insert 2 Elements.
        observation = non_terminal_records(self.record_space, 2)
        test.test(("insert_records", observation), expected_outputs=None)

        # Assert we can now fetch 2 elements.
        num_records = 2
        batch, _, _ = test.test(("get_records", num_records), expected_outputs=None)
        print('Result batch = {}'.format(batch))
        self.assertEqual(2, len(batch['terminals']))
        # Assert next states key is there
        self.assertTrue('next_states' in batch)

        # Test duplicate sampling.
        num_records = 5
        batch, _, _ = test.test(("get_records", num_records), expected_outputs=None)
        self.assertEqual(5, len(batch['terminals']))

        # Now insert over capacity.
        observation = non_terminal_records(self.record_space, self.capacity)
        test.test(("insert_records", observation), expected_outputs=None)

        # Assert we can fetch exactly capacity elements.
        num_records = self.capacity
        batch, _, _ = test.test(("get_records", num_records), expected_outputs=None)
        self.assertEqual(self.capacity, len(batch['terminals']))
コード例 #21
0
class TestRingBufferMemory(unittest.TestCase):
    """
    Tests the ring buffer. The ring buffer has very similar tests to
    the replay memory as it supports similar insertion and retrieval semantics,
    but needs additional tests on episode indexing and its latest semantics.
    """

    record_space = Dict(states=dict(state1=float, state2=float),
                        actions=dict(action1=float),
                        reward=float,
                        terminals=BoolBox(),
                        add_batch_rank=True)
    # Generic memory variables.
    memory_variables = ["size", "index"]

    # Ring buffer variables
    ring_buffer_variables = [
        "size", "index", "num-episodes", "episode-indices"
    ]
    capacity = 10

    input_spaces = dict(records=record_space,
                        num_records=int,
                        num_episodes=int)
    input_spaces_no_episodes = dict(
        records=record_space,
        num_records=int,
    )

    def test_insert_no_episodes(self):
        """
        Simply tests insert op without checking internal logic, episode
        semantics disabled.
        """
        ring_buffer = RingBuffer(capacity=self.capacity,
                                 episode_semantics=False)
        test = ComponentTest(component=ring_buffer,
                             input_spaces=self.input_spaces_no_episodes)

        observation = self.record_space.sample(size=1)
        test.test(("insert_records", observation), expected_outputs=None)

        observation = self.record_space.sample(size=10)
        test.test(("insert_records", observation), expected_outputs=None)

    def test_capacity_no_episodes(self):
        """
        Tests if insert correctly manages capacity, no episode indices updated..
        """
        ring_buffer = RingBuffer(capacity=self.capacity,
                                 episode_semantics=False)
        test = ComponentTest(component=ring_buffer,
                             input_spaces=self.input_spaces_no_episodes)
        # Internal state variables.
        memory_variables = ring_buffer.get_variables(self.memory_variables,
                                                     global_scope=False)
        buffer_size = memory_variables['size']
        buffer_index = memory_variables['index']
        size_value, index_value = test.read_variable_values(
            buffer_size, buffer_index)

        # Assert indices 0 before insert.
        self.assertEqual(size_value, 0)
        self.assertEqual(index_value, 0)

        # Insert one more element than capacity
        observation = self.record_space.sample(size=self.capacity + 1)
        test.test(("insert_records", observation), expected_outputs=None)

        size_value, index_value = test.read_variable_values(
            buffer_size, buffer_index)
        # Size should be equivalent to capacity when full.
        self.assertEqual(size_value, self.capacity)

        # Index should be one over capacity due to modulo.
        self.assertEqual(index_value, 1)

    def test_capacity_with_episodes(self):
        """
        Tests if inserts of non-terminals work when turning
        on episode semantics.

        Note that this does not test episode semantics itself, which are tested below.
        """
        ring_buffer = RingBuffer(capacity=self.capacity,
                                 episode_semantics=True)
        test = ComponentTest(component=ring_buffer,
                             input_spaces=self.input_spaces)
        # Internal memory variables.
        ring_buffer_variables = ring_buffer.get_variables(
            self.ring_buffer_variables, global_scope=False)
        buffer_size = ring_buffer_variables["size"]
        buffer_index = ring_buffer_variables["index"]
        num_episodes = ring_buffer_variables["num-episodes"]
        episode_indices = ring_buffer_variables["episode-indices"]

        size_value, index_value, num_episodes_value, episode_index_values = test.read_variable_values(
            buffer_size, buffer_index, num_episodes, episode_indices)

        # Assert indices 0 before insert.
        self.assertEqual(size_value, 0)
        self.assertEqual(index_value, 0)
        self.assertEqual(num_episodes_value, 0)
        self.assertEqual(np.sum(episode_index_values), 0)

        # Insert one more element than capacity. Note: this is different than
        # replay test because due to episode semantics, it matters if
        # these are terminal or not. This tests if episode index updating
        # causes problems if none of the inserted elements are terminal.
        observation = non_terminal_records(self.record_space,
                                           self.capacity + 1)
        test.test(("insert_records", observation), expected_outputs=None)
        size_value, index_value, num_episodes_value, episode_index_values = test.read_variable_values(
            buffer_size, buffer_index, num_episodes, episode_indices)

        # Size should be equivalent to capacity when full.
        self.assertEqual(size_value, self.capacity)

        # Index should be one over capacity due to modulo.
        self.assertEqual(index_value, 1)
        self.assertEqual(num_episodes_value, 0)
        self.assertEqual(np.sum(episode_index_values), 0)

    def test_episode_indices_when_inserting(self):
        """
        Tests if episodes indices and counts are set correctly when inserting
        terminals.
        """
        ring_buffer = RingBuffer(capacity=self.capacity,
                                 episode_semantics=True)
        test = ComponentTest(component=ring_buffer,
                             input_spaces=self.input_spaces)
        # Internal memory variables.
        ring_buffer_variables = ring_buffer.get_variables(
            self.ring_buffer_variables, global_scope=False)
        buffer_size = ring_buffer_variables["size"]
        buffer_index = ring_buffer_variables["index"]
        num_episodes = ring_buffer_variables["num-episodes"]
        episode_indices = ring_buffer_variables["episode-indices"]

        # First, we insert a single terminal record.
        observation = terminal_records(self.record_space, 1)
        test.test(("insert_records", observation), expected_outputs=None)
        size_value, index_value, num_episodes_value, episode_index_values = test.read_variable_values(
            buffer_size, buffer_index, num_episodes, episode_indices)

        # One episode should be present.
        self.assertEqual(num_episodes_value, 1)
        # However, the index of that episode is 0, so we cannot fetch it.
        self.assertEqual(sum(episode_index_values), 0)

        # Next, we insert 1 non-terminal, then 1 terminal element.
        observation = non_terminal_records(self.record_space, 1)
        test.test(("insert_records", observation), expected_outputs=None)
        observation = terminal_records(self.record_space, 1)
        test.test(("insert_records", observation), expected_outputs=None)

        # Now, we expect to have 2 episodes with episode indices at 0 and 2.
        size_value, index_value, num_episodes_value, episode_index_values = test.read_variable_values(
            buffer_size, buffer_index, num_episodes, episode_indices)
        print('Episode indices after = {}'.format(episode_index_values))
        self.assertEqual(num_episodes_value, 2)
        self.assertEqual(episode_index_values[1], 2)

    def test_only_terminal_with_episodes(self):
        """
        Edge case: What if only terminals are inserted when episode
        semantics are enabled?
        """
        ring_buffer = RingBuffer(capacity=self.capacity,
                                 episode_semantics=True)
        test = ComponentTest(component=ring_buffer,
                             input_spaces=self.input_spaces)
        ring_buffer_variables = ring_buffer.get_variables(
            self.ring_buffer_variables, global_scope=False)
        num_episodes = ring_buffer_variables["num-episodes"]
        episode_indices = ring_buffer_variables["episode-indices"]

        observation = terminal_records(self.record_space, self.capacity)
        test.test(("insert_records", observation), expected_outputs=None)
        num_episodes_value, episode_index_values = test.read_variable_values(
            num_episodes, episode_indices)
        self.assertEqual(num_episodes_value, self.capacity)
        # Every episode index should correspond to its position
        for i in range_(self.capacity):
            self.assertEqual(episode_index_values[i], i)

    def test_episode_fetching(self):
        """
        Test if we can accurately fetch most recent episodes.
        """
        ring_buffer = RingBuffer(capacity=self.capacity,
                                 episode_semantics=True)
        test = ComponentTest(component=ring_buffer,
                             input_spaces=self.input_spaces)
        # Insert 2 non-terminals, 1 terminal
        observation = non_terminal_records(self.record_space, 2)
        test.test(("insert_records", observation), expected_outputs=None)
        observation = terminal_records(self.record_space, 1)
        test.test(("insert_records", observation), expected_outputs=None)

        # We should now be able to retrieve one episode of length 3.
        episode = test.test(("get_episodes", 1), expected_outputs=None)
        self.assertTrue(len(episode['reward']) == 2)

        # We should not be able to retrieve two episodes, and still return just one.
        episode = test.test(("get_episodes", 2), expected_outputs=None)
        self.assertTrue(len(episode['reward']) == 2)

        # Insert 7 non-terminals, 1 terminal -> last terminal is now at buffer index 0 as
        # we inserted 3 + 8 = 11 elements in total.
        observation = non_terminal_records(self.record_space, 7)
        test.test(("insert_records", observation), expected_outputs=None)
        observation = terminal_records(self.record_space, 1)
        test.test(("insert_records", observation), expected_outputs=None)

        # Check if we can fetch 2 episodes:
        episodes = test.test(("get_episodes", 2), expected_outputs=None)

        # We now expect to have retrieved:
        # - 10 time steps
        # - 2 terminal values 1
        # - Terminal values spaced apart 1 index due to the insertion order
        self.assertEqual(len(episodes['terminals']), self.capacity)
        self.assertEqual(episodes['terminals'][0], True)
        self.assertEqual(episodes['terminals'][2], True)

    def test_latest_batch(self):
        """
        Tests if we can fetch latest steps.
        """
        ring_buffer = RingBuffer(capacity=self.capacity,
                                 episode_semantics=True)
        test = ComponentTest(component=ring_buffer,
                             input_spaces=self.input_spaces)

        # Insert 5 random elements.
        observation = non_terminal_records(self.record_space, 5)
        test.test(("insert_records", observation), expected_outputs=None)

        # First, test if the basic computation works.
        batch = test.test(("get_records", 5), expected_outputs=None)
        self.assertEqual(len(batch['terminals']), 5)

        # Next, insert capacity more elements:
        observation = non_terminal_records(self.record_space, self.capacity)
        test.test(("insert_records", observation), expected_outputs=None)

        # If we now fetch capacity elements, we expect to see exactly the last 10.
        batch = test.test(("get_records", self.capacity),
                          expected_outputs=None)

        # Assert every inserted element is contained, even if not in same order:
        retrieved_action = batch['actions']['action1']
        for action_value in observation['actions']['action1']:
            self.assertTrue(action_value in retrieved_action)
コード例 #22
0
class TestPythonMemoryPerformance(unittest.TestCase):
    record_space = Dict(
        states=FloatBox(shape=(4,)),
        actions=FloatBox(shape=(2,)),
        reward=float,
        terminals=BoolBox(),
        add_batch_rank=True
    )

    # Apex params
    capacity = 2000000
    chunksize = 64
    inserts = 1000000

    # Samples.
    samples = 10000
    sample_batch_size = 50

    alpha = 0.6
    beta = 0.4
    max_priority = 1.0

    def test_ray_prioritized_replay_insert(self):
        """
        Tests Ray's memory performance.
        """
        assert get_distributed_backend() == "ray"
        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )
        # Test individual inserts.
        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]

        start = time.monotonic()
        for record in records:
            memory.add(
                obs_t=record['states'],
                action=record['actions'],
                reward=record['reward'],
                obs_tp1=record['states'],
                done=record['terminals'],
                weight=None
            )
        end = time.monotonic() - start
        tp = len(records) / end
        print('#### Testing Ray Prioritized Replay memory ####')
        print('Testing insert performance:')
        print('Inserted {} separate records, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))

        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )

        # Test chunked inserts -> done via external for loop in Ray.
        chunks = int(self.inserts / self.chunksize)
        records = [self.record_space.sample(size=self.chunksize) for _ in range_(chunks)]
        start = time.monotonic()
        for chunk in records:
            for i in range_(self.chunksize):
                memory.add(
                    obs_t=chunk['states'][i],
                    action=chunk['actions'][i],
                    reward=chunk['reward'][i],
                    obs_tp1=chunk['states'][i],
                    done=chunk['terminals'][i],
                    weight=None
                )
        end = time.monotonic() - start
        tp = len(records) * self.chunksize / end
        print('Testing chunked insert performance:')
        print('Inserted {} chunks, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))

    def test_ray_sampling(self):
        """
        Tests Ray's memory performance.
        """
        assert get_distributed_backend() == "ray"
        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )
        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
        for record in records:
            memory.add(
                obs_t=ray_compress(record['states']),
                action=record['actions'],
                reward=record['reward'],
                obs_tp1=ray_compress(record['states']),
                done=record['terminals'],
                weight=None
            )
        start = time.monotonic()
        for _ in range_(self.samples):
            batch_tuple = memory.sample(self.sample_batch_size, beta=1.0)
        end = time.monotonic() - start
        tp = self.samples / end
        print('#### Testing Ray Prioritized Replay memory ####')
        print('Testing sampling performance:')
        print('Sampled {} batches, throughput: {} samples/s, total time: {} s'.format(
            self.samples, tp, end
        ))

    def test_ray_updating(self):
        """
        Tests Ray's memory performance.
        """
        assert get_distributed_backend() == "ray"
        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )
        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
        for record in records:
            memory.add(
                obs_t=record['states'],
                action=record['actions'],
                reward=record['reward'],
                obs_tp1=record['states'],
                done=record['terminals'],
                weight=None
            )
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(self.samples)]
        indices = [np.random.randint(low=0, high=self.inserts, size=self.sample_batch_size) for _
                   in range_(self.samples)]

        start = time.monotonic()
        for index, loss in zip(indices, loss_values):
            memory.update_priorities(index, loss)
        end = time.monotonic() - start
        tp = len(indices) / end
        print('#### Testing Ray Prioritized Replay memory ####')
        print('Testing updating performance:')
        print('Updates {} loss batches, throughput: {} updates/s, total time: {} s'.format(
            len(indices), tp, end
        ))

    def test_rlgraph_apex_insert(self):
        """
        Tests RLgraph's python memory performance.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )
        # Testing insert performance
        records = [self.record_space.sample(size=1) for _ in range(self.inserts)]

        start = time.monotonic()
        for record in records:
            memory.insert_records((
                 record['states'],
                 record['actions'],
                 record['reward'],
                 record['terminals'],
                 None
            ))
        end = time.monotonic() - start
        tp = len(records) / end

        print('#### Testing RLGraph python prioritized replay ####')
        print('Testing insert performance:')
        print('Inserted {} separate records, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))

        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )
        chunks = int(self.inserts / self.chunksize)
        records = [self.record_space.sample(size=self.chunksize) for _ in range_(chunks)]
        start = time.monotonic()
        for chunk in records:
            for i in range_(self.chunksize):
                memory.insert_records((
                    chunk['states'][i],
                    chunk['actions'][i],
                    chunk['reward'][i],
                    chunk['terminals'][i],
                    None
                ))

        end = time.monotonic() - start
        tp = len(records) * self.chunksize / end
        print('Testing chunked insert performance:')
        print('Inserted {} chunks, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))

    def test_rlgraph_sampling(self):
        """
        Tests RLgraph's sampling performance.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )

        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
        for record in records:
            memory.insert_records((
                 ray_compress(record['states']),
                 record['actions'],
                 record['reward'],
                 record['terminals'],
                 None
            ))
        start = time.monotonic()
        for _ in range_(self.samples):
            batch_tuple = memory.get_records(self.sample_batch_size)
        end = time.monotonic() - start
        tp = self.samples / end
        print('#### Testing RLGraph Prioritized Replay memory ####')
        print('Testing sampling performance:')
        print('Sampled {} batches, throughput: {} batches/s, total time: {} s'.format(
            self.samples, tp, end
        ))

    def test_rlgraph_updating(self):
        """
        Tests RLGraph's memory performance.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )

        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
        for record in records:
            memory.insert_records((
                 record['states'],
                 record['actions'],
                 record['reward'],
                 record['terminals'],
                 None
            ))
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(self.samples)]
        indices = [np.random.randint(low=0, high=self.inserts, size=self.sample_batch_size) for _
                   in range_(self.samples)]

        start = time.monotonic()
        for index, loss in zip(indices, loss_values):
            memory.update_records(index, loss)
        end = time.monotonic() - start
        tp = len(indices) / end
        print('#### Testing RLGraph Prioritized Replay memory ####')
        print('Testing updating performance:')
        print('Updates {} loss batches, throughput: {} updates/s, total time: {} s'.format(
            len(indices), tp, end
        ))

    def test_ray_combined_ops(self):
        """
        Tests a combined workflow of insert, sample, update on the prioritized replay memory.
        """
        assert get_distributed_backend() == "ray"
        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )
        chunksize = 32

        # Test chunked inserts -> done via external for loop in Ray.
        chunks = int(self.inserts / chunksize)
        records = [self.record_space.sample(size=chunksize) for _ in range_(chunks)]
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(chunks)]
        start = time.monotonic()

        for chunk, loss_values in zip(records, loss_values):
            # Insert.
            for i in range_(chunksize):
                memory.add(
                    obs_t=ray_compress(chunk['states'][i]),
                    action=chunk['actions'][i],
                    reward=chunk['reward'][i],
                    obs_tp1=ray_compress(chunk['states'][i]),
                    done=chunk['terminals'][i],
                    weight=None
                )
            # Sample.
            batch_tuple = memory.sample(self.sample_batch_size, beta=1.0)
            indices = batch_tuple[-1]
            # Update
            memory.update_priorities(indices, loss_values)

        end = time.monotonic() - start
        tp = len(records) / end
        print('Ray: testing combined insert/sample/update performance:')
        print('Ran {} combined ops, throughput: {} combined ops/s, total time: {} s'.format(
            len(records), tp, end
        ))

    def test_rlgraph_combined_ops(self):
        """
        Tests a combined workflow of insert, sample, update on the prioritized replay memory.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )

        chunksize = 32
        chunks = int(self.inserts / chunksize)
        records = [self.record_space.sample(size=chunksize) for _ in range_(chunks)]
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(chunks)]

        start = time.monotonic()
        for chunk, loss_values in zip(records, loss_values):
            # Each record now is a chunk.
            for i in range_(chunksize):
                memory.insert_records((
                    ray_compress(chunk['states'][i]),
                    chunk['actions'][i],
                    chunk['reward'][i],
                    chunk['terminals'][i],
                    None
                ))
            batch, indices, weights = memory.get_records(self.sample_batch_size)
            memory.update_records(indices, loss_values)

        end = time.monotonic() - start
        tp = len(records) / end
        print('RLGraph: Testing combined op performance:')
        print('Ran {} combined ops, throughput: {} combined ops/s, total time: {} s'.format(
            len(records), tp, end
        ))
    def test_keras_style_complex_multi_stream_nn(self):
        # 3 inputs.
        input_spaces = [
            Dict({
                "img": FloatBox(shape=(6, 6, 3)),
                "int": IntBox(3)
            }, add_batch_rank=True, add_time_rank=True),
            FloatBox(shape=(2,), add_batch_rank=True),
            Tuple(IntBox(2), TextBox(), add_batch_rank=True, add_time_rank=True)
        ]

        # Same NN as in test above, only using some of the sub-Spaces from the input spaces.
        # Tests whether this NN can add automatically the correct splitters.
        folded_text = ReShape(fold_time_rank=True)(input_spaces[2][1])
        # String layer will create batched AND time-ranked (individual words) hash outputs (int64).
        string_bucket_out, lengths = StringToHashBucket(num_hash_buckets=5)(folded_text)
        # Batched and time-ranked embedding output (floats) with embed dim=n.
        embedding_out = EmbeddingLookup(embed_dim=10, vocab_size=5)(string_bucket_out)
        # Pass embeddings through a text LSTM and use last output (reduce time-rank).
        string_lstm_out, _ = LSTMLayer(units=2, return_sequences=False, scope="lstm-layer-txt")(
            embedding_out, sequence_length=lengths
        )
        # Unfold to get original time-rank back.
        string_lstm_out_unfolded = ReShape(unfold_time_rank=True)(string_lstm_out, input_spaces[2][1])

        # Parallel image stream via 1 CNN layer plus dense.
        folded_img = ReShape(fold_time_rank=True, scope="img-fold")(input_spaces[0]["img"])
        cnn_out = Conv2DLayer(filters=1, kernel_size=2, strides=2)(folded_img)
        unfolded_cnn_out = ReShape(unfold_time_rank=True, scope="img-unfold")(cnn_out, input_spaces[0]["img"])
        unfolded_cnn_out_flattened = ReShape(flatten=True, scope="img-flat")(unfolded_cnn_out)
        dense_out = DenseLayer(units=2, scope="dense-0")(unfolded_cnn_out_flattened)

        # Concat everything.
        concat_out = ConcatLayer()(string_lstm_out_unfolded, dense_out)

        # LSTM output has batch+time.
        main_lstm_out, internal_states = LSTMLayer(units=2, scope="lstm-layer-main")(concat_out)

        dense1_after_lstm_out = DenseLayer(units=3, scope="dense-1")(main_lstm_out)
        dense2_after_lstm_out = DenseLayer(units=2, scope="dense-2")(dense1_after_lstm_out)
        dense3_after_lstm_out = DenseLayer(units=1, scope="dense-3")(dense2_after_lstm_out)

        # A NN with 3 outputs.
        neural_net = NeuralNetwork(inputs=input_spaces, outputs=[dense3_after_lstm_out, main_lstm_out, internal_states])

        test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_spaces))

        # Batch of size=n.
        sample_shape = (4, 2)
        input_ = [input_spaces[0].sample(sample_shape), input_spaces[1].sample(sample_shape[0]),
                  input_spaces[2].sample(sample_shape)]

        out = test.test(("call", tuple(input_)), expected_outputs=None)
        # Main output (Dense out after LSTM).
        self.assertTrue(out[0].shape == sample_shape + (1,))  # 1=1 unit in dense layer
        self.assertTrue(out[0].dtype == np.float32)
        # main-LSTM out.
        self.assertTrue(out[1].shape == sample_shape + (2,))  # 2=2 LSTM units
        self.assertTrue(out[1].dtype == np.float32)
        # main-LSTM internal-states.
        self.assertTrue(out[2][0].shape == sample_shape[:1] + (2,))  # 2=2 LSTM units
        self.assertTrue(out[2][0].dtype == np.float32)
        self.assertTrue(out[2][1].shape == sample_shape[:1] + (2,))  # 2=2 LSTM units
        self.assertTrue(out[2][1].dtype == np.float32)

        test.terminate()
コード例 #24
0
def run_dqn(exp, steps=25000, combinatorial=False):
    
    #
    # can't account for all configurations, but be sure agent is of a reasonably small size 
    # 

    vocab_size = 6
    state_size = 6

    #
    # queries, rewards for actions per query
    # 
    dqn_queries, _, actions = data(exp)
    repr_builder = RepresentationBuilder()
    get_query, get_reward = repr_builder.build_dqn(dqn_queries, actions, K=state_size, prob=0.67)

    
    #
    # agent
    #
    import json # config is a bit big to copy
    with open('/Users/jeremywelborn/rlautoindex/conf/dqn.json', 'r') as f:
        config = json.load(f)
    agent_config = config['agent']    

    # any further adjustments?
    agent_config['memory_spec']['type']='replay' 
    agent_config['exploration_spec']['epsilon_spec']['decay_spec']['num_timesteps'] = int(steps * .75)

    agent_config['network_spec'][0]['embed_dim'] = 64 # reduce capacity
    agent_config['network_spec'][2]['units'] = 64
    agent_config['network_spec'][0]['vocab_size'] = vocab_size


    # replicate representations defined in Schema
    state_spec = IntBox(low=0, high=vocab_size, shape=(state_size,))

    if not combinatorial:
        n_outputs = 1+3
        action_spec = {}
        for i in range(3):
            action_spec['candidate_index_column{}'.format(i)] = IntBox(low=0, high=n_outputs)
        action_spec = Dict(action_spec, add_batch_rank=True)
    else:
        perm_idx_2_perm = []
        for r in range(3+1): 
            perm_idx_2_perm.extend(itertools.permutations(range(3),r=r))
        perm_idx_2_perm = list(map(list, perm_idx_2_perm)) # [[], [1], [2], [3], [1, 2], [1, 3], [2, 1], [2, 3], [3, 1], [3, 2], [1, 2, 3], [1, 3, 2], [2, 1, 3], [2, 3, 1], [3, 1, 2], [3, 2, 1]]

        # action is a scalar corresponding to a particular permutation of query attributes
        action_spec = IntBox(low=0, high=len(perm_idx_2_perm))


    task_graph = TaskGraph()
    task = Task(agent_config, state_space=state_spec, action_space=action_spec)
    task_graph.add_task(task)
    task_graph.get_task("").unwrap().timesteps = 0
    controller = SystemController(None, None) # have to have for updates...
    controller.task_graph = task_graph
    controller.set_update_schedule(agent_config["update_spec"])

    print("params: {}".format(task.agent.graph_builder.num_trainable_parameters)) # TODO yikes

    #
    # train agent
    #
    step = 0; steps = steps 
    record = []
    running_avg_reward = deque(maxlen=1000)
    start = time.time()
    while step < steps:
        step += 1

        if step != 0 and step % 1000 == 0:
            print('running avg reward after {}/{} steps is {}'.format(step, steps, np.mean(running_avg_reward)))
            record.append((step, np.mean(running_avg_reward), time.time() - start))

        query_idx, query = get_query()
        
        agent_action = task_graph.act_task("", query, apply_preprocessing=True)
        
        # replicate representation conversions defined in Converter
        # hack - same as how query_cols are stored with query in actual training loop
        attr_tokens = [foo_token, bar_token, baz_token]
        n_attrs = len([attr_token for attr_token in query[:3] if attr_token in attr_tokens]) # count tokens that are column tokens
        
        if not combinatorial:
            action = []
            for key in ['candidate_index_column{}'.format(i) for i in range(3)]:
                action_val = agent_action[key][0]
                if action_val != 0: # if is not noop
                    if n_attrs > action_val - 1: # if is a valid action
                        col = query[:n_attrs][action_val - 1]
                        if col not in action:
                            action.append(col)

        else:
            action = []
            perm_idx = agent_action 
            perm = perm_idx_2_perm[perm_idx]
            
            if len(perm) == n_attrs: # ignore case like query==[foo], permutation of query==[1,2]
                for query_attr_idx in perm:
                    if n_attrs > query_attr_idx: # ignore case like query==[foo], permutation of query==[1] b/c there is only 0th attribute, not 1st attribute
                        col = query[:n_attrs][query_attr_idx]
                        # if col not in action: # no repeats in this representation
                        action.append(col)

        reward = get_reward(query_idx, action)        
        running_avg_reward.append(reward)

        # TODO what to do with s_t+1???
        task_graph.observe_task("", query, agent_action, [], reward, query, False)
        controller.update_if_necessary()

    return record
コード例 #25
0
ファイル: impala_agents.py プロジェクト: MegaYEye/rlgraph
    def __init__(self,
                 discount=0.99,
                 fifo_queue_spec=None,
                 architecture="large",
                 environment_spec=None,
                 feed_previous_action_through_nn=True,
                 feed_previous_reward_through_nn=True,
                 weight_pg=None,
                 weight_baseline=None,
                 weight_entropy=None,
                 worker_sample_size=100,
                 **kwargs):
        """
        Args:
            discount (float): The discount factor gamma.
            architecture (str): Which IMPALA architecture to use. One of "small" or "large". Will be ignored if
                `network_spec` is given explicitly in kwargs. Default: "large".
            fifo_queue_spec (Optional[dict,FIFOQueue]): The spec for the FIFOQueue to use for the IMPALA algorithm.
            environment_spec (dict): The spec for constructing an Environment object for an actor-type IMPALA agent.
            feed_previous_action_through_nn (bool): Whether to add the previous action as another input channel to the
                ActionComponent's (NN's) input at each step. This is only possible if the state space is already a Dict.
                It will be added under the key "previous_action". Default: True.
            feed_previous_reward_through_nn (bool): Whether to add the previous reward as another input channel to the
                ActionComponent's (NN's) input at each step. This is only possible if the state space is already a Dict.
                It will be added under the key "previous_reward". Default: True.
            weight_pg (float): See IMPALALossFunction Component.
            weight_baseline (float): See IMPALALossFunction Component.
            weight_entropy (float): See IMPALALossFunction Component.
            worker_sample_size (int): How many steps the actor will perform in the environment each sample-run.

        Keyword Args:
            type (str): One of "single", "actor" or "learner". Default: "single".
        """
        type_ = kwargs.pop("type", "single")
        assert type_ in ["single", "actor", "learner"]
        self.type = type_
        self.worker_sample_size = worker_sample_size

        # Network-spec by default is a "large architecture" IMPALA network.
        self.network_spec = kwargs.pop(
            "network_spec",
            dict(
                type=
                "rlgraph.components.neural_networks.impala.impala_networks.{}IMPALANetwork"
                .format("Large" if architecture == "large" else "Small")))
        if isinstance(self.network_spec, dict) and "type" in self.network_spec and \
                "IMPALANetwork" in self.network_spec["type"]:
            self.network_spec = default_dict(
                self.network_spec,
                dict(worker_sample_size=1 if self.type ==
                     "actor" else self.worker_sample_size + 1))

        # Depending on the job-type, remove the pieces from the Agent-spec/graph we won't need.
        self.exploration_spec = kwargs.pop("exploration_spec", None)
        optimizer_spec = kwargs.pop("optimizer_spec", None)
        observe_spec = kwargs.pop("observe_spec", None)

        self.feed_previous_action_through_nn = feed_previous_action_through_nn
        self.feed_previous_reward_through_nn = feed_previous_reward_through_nn

        # Run everything in a single process.
        if self.type == "single":
            environment_spec = environment_spec or self.default_environment_spec
            update_spec = kwargs.pop("update_spec", None)
        # Actors won't need to learn (no optimizer needed in graph).
        elif self.type == "actor":
            optimizer_spec = None
            update_spec = kwargs.pop("update_spec", dict(do_updates=False))
            environment_spec = environment_spec or self.default_environment_spec
        # Learners won't need to explore (act) or observe (insert into Queue).
        else:
            observe_spec = None
            update_spec = kwargs.pop("update_spec", None)
            environment_spec = None

        # Add previous-action/reward preprocessors to env-specific preprocessor spec.
        # TODO: remove this empty hard-coded preprocessor.
        self.preprocessing_spec = kwargs.pop(
            "preprocessing_spec",
            dict(
                type="dict-preprocessor-stack",
                preprocessors=dict(
                    # Flatten actions.
                    previous_action=[
                        dict(type="reshape",
                             flatten=True,
                             flatten_categories=kwargs.get(
                                 "action_space").num_categories)
                    ],
                    # Bump reward and convert to float32, so that it can be concatenated by the Concat layer.
                    previous_reward=[dict(type="reshape", new_shape=(1, ))])))

        # Limit communication in distributed mode between each actor and the learner (never between actors).
        execution_spec = kwargs.pop("execution_spec", None)
        if execution_spec is not None and execution_spec.get(
                "mode") == "distributed":
            default_dict(
                execution_spec["session_config"],
                dict(type="monitored-training-session",
                     allow_soft_placement=True,
                     device_filters=["/job:learner/task:0"] + ([
                         "/job:actor/task:{}".format(
                             execution_spec["distributed_spec"]["task_index"])
                     ] if self.type == "actor" else ["/job:learner/task:0"])))
            # If Actor, make non-chief in either case (even if task idx == 0).
            if self.type == "actor":
                execution_spec["distributed_spec"]["is_chief"] = False
                # Hard-set device to the CPU for actors.
                execution_spec["device_strategy"] = "custom"
                execution_spec[
                    "default_device"] = "/job:{}/task:{}/cpu".format(
                        self.type,
                        execution_spec["distributed_spec"]["task_index"])

        self.policy_spec = kwargs.pop("policy_spec", dict())
        # TODO: Create some auto-setting based on LSTM inside the NN.
        default_dict(
            self.policy_spec,
            dict(type="shared-value-function-policy",
                 deterministic=False,
                 reuse_variable_scope="shared-policy",
                 action_space=kwargs.get("action_space")))

        # Now that we fixed the Agent's spec, call the super constructor.
        super(IMPALAAgent,
              self).__init__(discount=discount,
                             preprocessing_spec=self.preprocessing_spec,
                             network_spec=self.network_spec,
                             policy_spec=self.policy_spec,
                             exploration_spec=self.exploration_spec,
                             optimizer_spec=optimizer_spec,
                             observe_spec=observe_spec,
                             update_spec=update_spec,
                             execution_spec=execution_spec,
                             name=kwargs.pop(
                                 "name", "impala-{}-agent".format(self.type)),
                             **kwargs)
        # Always use 1st learner as the parameter server for all policy variables.
        if self.execution_spec["mode"] == "distributed" and self.execution_spec[
                "distributed_spec"]["cluster_spec"]:
            self.policy.propagate_sub_component_properties(
                dict(device=dict(variables="/job:learner/task:0/cpu")))

        # Check whether we have an RNN.
        self.has_rnn = self.policy.neural_network.has_rnn()
        # Check, whether we are running with GPU.
        self.has_gpu = self.execution_spec["gpu_spec"]["gpus_enabled"] is True and \
            self.execution_spec["gpu_spec"]["num_gpus"] > 0

        # Some FIFO-queue specs.
        self.fifo_queue_keys = ["terminals", "states"] + \
                               (["actions"] if not self.feed_previous_action_through_nn else []) + \
                               (["rewards"] if not self.feed_previous_reward_through_nn else []) + \
                               ["action_probs"] + \
                               (["initial_internal_states"] if self.has_rnn else [])
        # Define FIFO record space.
        # Note that only states and internal_states (RNN) contain num-steps+1 items, all other sub-records only contain
        # num-steps items.
        self.fifo_record_space = Dict(
            {
                "terminals":
                bool,
                "action_probs":
                FloatBox(shape=(self.action_space.num_categories, )),
            },
            add_batch_rank=False,
            add_time_rank=self.worker_sample_size)
        self.fifo_record_space["states"] = self.state_space.with_time_rank(
            self.worker_sample_size + 1)
        # Add action and rewards to state or do they have an extra channel?
        if self.feed_previous_action_through_nn:
            self.fifo_record_space["states"]["previous_action"] = \
                self.action_space.with_time_rank(self.worker_sample_size + 1)
        else:
            self.fifo_record_space[
                "actions"] = self.action_space.with_time_rank(
                    self.worker_sample_size)
        if self.feed_previous_action_through_nn:
            self.fifo_record_space["states"]["previous_reward"] = FloatBox(
                add_time_rank=self.worker_sample_size + 1)
        else:
            self.fifo_record_space["rewards"] = FloatBox(
                add_time_rank=self.worker_sample_size)

        if self.has_rnn:
            self.fifo_record_space[
                "initial_internal_states"] = self.internal_states_space.with_time_rank(
                    False)

        # Create our FIFOQueue (actors will enqueue, learner(s) will dequeue).
        self.fifo_queue = FIFOQueue.from_spec(
            fifo_queue_spec or dict(capacity=1),
            reuse_variable_scope="shared-fifo-queue",
            only_insert_single_records=True,
            record_space=self.fifo_record_space,
            device="/job:learner/task:0/cpu"
            if self.execution_spec["mode"] == "distributed"
            and self.execution_spec["distributed_spec"]["cluster_spec"] else
            None)

        # Remove `states` key from input_spaces: not needed.
        del self.input_spaces["states"]

        # Add all our sub-components to the core.
        if self.type == "single":
            pass

        elif self.type == "actor":
            # No learning, no loss function.
            self.loss_function = None
            # A Dict Splitter to split things from the EnvStepper.
            self.env_output_splitter = ContainerSplitter(
                tuple_length=4, scope="env-output-splitter")

            self.states_dict_splitter = None

            # Slice some data from the EnvStepper (e.g only first internal states are needed).
            self.internal_states_slicer = Slice(scope="internal-states-slicer",
                                                squeeze=True)
            # Merge back to insert into FIFO.
            self.fifo_input_merger = DictMerger(*self.fifo_queue_keys)

            # Dummy Flattener to calculate action-probs space.
            dummy_flattener = ReShape(
                flatten=True,
                flatten_categories=self.action_space.num_categories)
            self.environment_stepper = EnvironmentStepper(
                environment_spec=environment_spec,
                actor_component_spec=ActorComponent(self.preprocessor,
                                                    self.policy,
                                                    self.exploration),
                state_space=self.state_space.with_batch_rank(),
                reward_space=
                float,  # TODO <- float64 for deepmind? may not work for other envs
                internal_states_space=self.internal_states_space,
                num_steps=self.worker_sample_size,
                add_previous_action_to_state=True,
                add_previous_reward_to_state=True,
                add_action_probs=True,
                action_probs_space=dummy_flattener.get_preprocessed_space(
                    self.action_space))
            sub_components = [
                self.environment_stepper, self.env_output_splitter,
                self.internal_states_slicer, self.fifo_input_merger,
                self.fifo_queue
            ]
        # Learner.
        else:
            self.environment_stepper = None

            # A Dict splitter to split up items from the queue.
            self.fifo_input_merger = None
            self.fifo_output_splitter = ContainerSplitter(
                *self.fifo_queue_keys, scope="fifo-output-splitter")
            self.states_dict_splitter = ContainerSplitter(
                *list(self.fifo_record_space["states"].keys()),
                scope="states-dict-splitter")
            self.internal_states_slicer = None

            self.transposer = Transpose(
                scope="transposer", device=dict(ops="/job:learner/task:0/cpu"))
            self.staging_area = StagingArea(num_data=len(self.fifo_queue_keys))

            # Create an IMPALALossFunction with some parameters.
            self.loss_function = IMPALALossFunction(
                discount=self.discount,
                weight_pg=weight_pg,
                weight_baseline=weight_baseline,
                weight_entropy=weight_entropy,
                slice_actions=self.feed_previous_action_through_nn,
                slice_rewards=self.feed_previous_reward_through_nn,
                device="/job:learner/task:0/gpu")

            self.policy.propagate_sub_component_properties(
                dict(device=dict(variables="/job:learner/task:0/cpu",
                                 ops="/job:learner/task:0/gpu")))
            for component in [
                    self.staging_area, self.preprocessor, self.optimizer
            ]:
                component.propagate_sub_component_properties(
                    dict(device="/job:learner/task:0/gpu"))

            sub_components = [
                self.fifo_output_splitter, self.fifo_queue,
                self.states_dict_splitter, self.transposer, self.staging_area,
                self.preprocessor, self.policy, self.loss_function,
                self.optimizer
            ]

        if self.type != "single":
            # Add all the agent's sub-components to the root.
            self.root_component.add_components(*sub_components)

            # Define the Agent's (root Component's) API.
            self.define_graph_api(*sub_components)

        if self.type != "single" and self.auto_build:
            if self.type == "learner":
                build_options = dict(
                    build_device_context="/job:learner/task:0/cpu",
                    pin_global_variable_device="/job:learner/task:0/cpu")
                self._build_graph([self.root_component],
                                  self.input_spaces,
                                  optimizer=self.optimizer,
                                  build_options=build_options)
            else:
                self._build_graph([self.root_component],
                                  self.input_spaces,
                                  optimizer=self.optimizer,
                                  build_options=None)

            self.graph_built = True

            if self.has_gpu:
                # Get 1st return op of API-method `stage` of sub-component `staging-area` (which is the stage-op).
                self.stage_op = self.root_component.sub_components["staging-area"].api_methods["stage"]. \
                    out_op_columns[0].op_records[0].op
                # Initialize the stage.
                self.graph_executor.monitored_session.run_step_fn(
                    lambda step_context: step_context.session.run(self.stage_op
                                                                  ))

                # TODO remove after full refactor.
                self.dequeue_op = self.root_component.sub_components["fifo-queue"].api_methods["get_records"]. \
                    out_op_columns[0].op_records[0].op
            if self.type == "actor":
                self.enqueue_op = self.root_component.sub_components["fifo-queue"].api_methods["insert_records"]. \
                    out_op_columns[0].op_records[0].op
コード例 #26
0
ファイル: test_ring_buffer.py プロジェクト: rosea-tf/surreal
class TestRingBufferMemory(unittest.TestCase):
    """
    Tests the ring buffer. The ring buffer has very similar tests to
    the replay memory as it supports similar insertion and retrieval semantics,
    but needs additional tests on episode indexing and its latest semantics.
    """

    record_space = Dict(
        states=dict(state1=float, state2=float),
        actions=dict(action1=float),
        rewards=float,
        terminals=BoolBox(),
        sequence_indices=BoolBox(),
        add_batch_rank=True
    )
    # Generic memory variables.
    memory_variables = ["size", "index"]

    # Ring buffer variables
    ring_buffer_variables = ["size", "index", "num-episodes", "episode-indices"]
    capacity = 10

    input_spaces = dict(
        records=record_space,
        num_records=int,
        num_episodes=int
    )
    input_spaces_no_episodes = dict(
        records=record_space,
        num_records=int,
    )

    def test_capacity_with_episodes(self):
        """
        Tests if inserts of non-terminals work.

        Note that this does not test episode semantics itself, which are tested below.
        """
        for backend in (None, "python"):
            ring_buffer = RingBuffer(capacity=self.capacity, backend=backend)
            test = ComponentTest(component=ring_buffer, input_spaces=self.input_spaces)
            # Internal memory variables.
            ring_buffer_variables = test.get_variable_values(self.ring_buffer_variables)
            size_value = ring_buffer_variables["size"]
            index_value = ring_buffer_variables["index"]
            num_episodes_value = ring_buffer_variables["num-episodes"]
            episode_index_values = ring_buffer_variables["episode-indices"]

            # Assert indices 0 before insert.
            self.assertEqual(size_value, 0)
            self.assertEqual(index_value, 0)
            self.assertEqual(num_episodes_value, 0)
            self.assertEqual(np.sum(episode_index_values), 0)

            # Insert one more element than capacity. Note: this is different than
            # replay test because due to episode semantics, it matters if
            # these are terminal or not. This tests if episode index updating
            # causes problems if none of the inserted elements are terminal.
            observation = non_terminal_records(self.record_space, self.capacity + 1)
            test.test(("insert_records", observation), expected_outputs=None)

            ring_buffer_variables = test.get_variable_values(self.ring_buffer_variables)
            size_value = ring_buffer_variables["size"]
            index_value = ring_buffer_variables["index"]
            num_episodes_value = ring_buffer_variables["num-episodes"]
            episode_index_values = ring_buffer_variables["episode-indices"]

            # Size should be equivalent to capacity when full.
            self.assertEqual(size_value, self.capacity)

            # Index should be one over capacity due to modulo.
            self.assertEqual(index_value, 1)
            self.assertEqual(num_episodes_value, 0)
            self.assertEqual(np.sum(episode_index_values), 0)

            # If we fetch n elements, we expect to see exactly the last n.
            for last_n in range(1, 6):
                batch = test.test(("get_records", last_n), expected_outputs=None)
                recursive_assert_almost_equal(batch["actions"]["action1"], observation["actions"]["action1"][-last_n:])
                recursive_assert_almost_equal(batch["states"]["state2"], observation["states"]["state2"][-last_n:])
                recursive_assert_almost_equal(batch["terminals"], observation["terminals"][-last_n:])

    def test_episode_indices_when_inserting(self):
        """
        Tests if episodes indices and counts are set correctly when inserting
        terminals.
        """
        for backend in (None, "python"):
            ring_buffer = RingBuffer(capacity=self.capacity, backend=backend)
            test = ComponentTest(component=ring_buffer, input_spaces=self.input_spaces)

            # First, we insert a single terminal record.
            observation = terminal_records(self.record_space, 1)
            test.test(("insert_records", observation), expected_outputs=None)

            # Internal memory variables.
            ring_buffer_variables = test.get_variable_values(self.ring_buffer_variables)
            num_episodes_value = ring_buffer_variables["num-episodes"]
            episode_index_values = ring_buffer_variables["episode-indices"]

            # One episode should be present.
            self.assertEqual(num_episodes_value, 1)
            # However, the index of that episode is 0, so we cannot fetch it.
            self.assertEqual(sum(episode_index_values), 0)

            # Next, we insert 1 non-terminal, then 1 terminal element.
            observation = non_terminal_records(self.record_space, 1)
            test.test(("insert_records", observation), expected_outputs=None)
            observation = terminal_records(self.record_space, 1)
            test.test(("insert_records", observation), expected_outputs=None)

            # Now, we expect to have 2 episodes with episode indices at 0 and 2.
            ring_buffer_variables = test.get_variable_values(self.ring_buffer_variables)
            num_episodes_value = ring_buffer_variables["num-episodes"]
            episode_index_values = ring_buffer_variables["episode-indices"]

            print('Episode indices after = {}'.format(episode_index_values))
            self.assertEqual(num_episodes_value, 2)
            self.assertEqual(episode_index_values[1], 2)

    def test_only_terminal_with_episodes(self):
        """
        Edge case: What if only terminals are inserted when episode
        semantics are enabled?
        """
        for backend in (None, "python"):
            ring_buffer = RingBuffer(capacity=self.capacity, backend=backend)
            test = ComponentTest(component=ring_buffer, input_spaces=self.input_spaces)
            observation = terminal_records(self.record_space, self.capacity)
            test.test(("insert_records", observation), expected_outputs=None)

            ring_buffer_variables = test.get_variable_values(self.ring_buffer_variables)
            num_episodes_value = ring_buffer_variables["num-episodes"]
            episode_index_values = ring_buffer_variables["episode-indices"]

            self.assertEqual(num_episodes_value, self.capacity)
            # Every episode index should correspond to its position
            for i in range(self.capacity):
                self.assertEqual(episode_index_values[i], i)

    def test_episode_fetching(self):
        """
        Test if we can accurately fetch most recent episodes.
        """
        for backend in (None, "python"):
            ring_buffer = RingBuffer(capacity=self.capacity, backend=backend)
            test = ComponentTest(component=ring_buffer, input_spaces=self.input_spaces)

            # Insert 2 non-terminals, 1 terminal
            observation = non_terminal_records(self.record_space, 2)
            test.test(("insert_records", observation), expected_outputs=None)
            observation = terminal_records(self.record_space, 1)
            test.test(("insert_records", observation), expected_outputs=None)

            ring_buffer_variables = test.get_variable_values(self.ring_buffer_variables)
            num_episodes_value = ring_buffer_variables["num-episodes"]
            episode_index_values = ring_buffer_variables["episode-indices"]

            # One episode.
            self.assertEqual(num_episodes_value, 1)
            expected_indices = [0] * self.capacity
            expected_indices[0] = 2
            recursive_assert_almost_equal(episode_index_values, expected_indices)

            # We should now be able to retrieve one episode of length 3.
            episode = test.test(("get_episodes", 1), expected_outputs=None)
            expected_terminals = [0, 0, 1]
            recursive_assert_almost_equal(episode["terminals"], expected_terminals)

            # We should not be able to retrieve two episodes, and still return just one.
            episode = test.test(("get_episodes", 2), expected_outputs=None)
            expected_terminals = [0, 0, 1]
            recursive_assert_almost_equal(episode["terminals"], expected_terminals)

            # Insert 7 non-terminals.
            observation = non_terminal_records(self.record_space, 7)
            test.test(("insert_records", observation), expected_outputs=None)

            ring_buffer_variables = test.get_variable_values(self.ring_buffer_variables)
            index_value = ring_buffer_variables["index"]
            episode_index_values = ring_buffer_variables["episode-indices"]

            # Episode indices should not have changed.
            expected_indices[0] = 2
            recursive_assert_almost_equal(episode_index_values, expected_indices)
            # Inserted 2 non-terminal, 1 terminal, 7 non-terminal at capacity 10 -> should be at 0 again.
            self.assertEqual(index_value, 0)

            # Now inserting one terminal so the terminal buffer has layout [1 0 1 0 0 0 0 0 0 0]
            observation = terminal_records(self.record_space, 1)
            test.test(("insert_records", observation), expected_outputs=None)

            # Episode indices:
            ring_buffer_variables = test.get_variable_values(self.ring_buffer_variables)
            num_episodes_value = ring_buffer_variables["num-episodes"]
            recursive_assert_almost_equal(num_episodes_value, 2)

            # # Check if we can fetch 2 episodes:
            episodes = test.test(("get_episodes", 2), expected_outputs=None)
            #
            # # We now expect to have retrieved:
            # # - 10 time steps
            # # - 2 terminal values 1
            # # - Terminal values spaced apart 1 index due to the insertion order
            self.assertEqual(len(episodes['terminals']), self.capacity)
            self.assertEqual(episodes['terminals'][0], True)
            self.assertEqual(episodes['terminals'][2], True)

    def test_latest_batch(self):
        """
        Tests if we can fetch latest steps.
        """
        for backend in (None, "python"):
            ring_buffer = RingBuffer(capacity=self.capacity, backend=backend)
            test = ComponentTest(component=ring_buffer, input_spaces=self.input_spaces)

            # Insert 5 random elements.
            observation = non_terminal_records(self.record_space, 5)
            test.test(("insert_records", observation), expected_outputs=None)

            # First, test if the basic computation works.
            batch = test.test(("get_records", 5), expected_outputs=None)
            recursive_assert_almost_equal(batch, observation)

            # Next, insert capacity more elements:
            observation = non_terminal_records(self.record_space, self.capacity)
            test.test(("insert_records", observation), expected_outputs=None)

            # If we now fetch capacity elements, we expect to see exactly the last 10.
            batch = test.test(("get_records", self.capacity), expected_outputs=None)
            recursive_assert_almost_equal(batch, observation)

            # If we fetch n elements, we expect to see exactly the last n.
            for last_n in range(1, 6):
                batch = test.test(("get_records", last_n), expected_outputs=None)
                recursive_assert_almost_equal(batch["actions"]["action1"], observation["actions"]["action1"][-last_n:])
                recursive_assert_almost_equal(batch["states"]["state2"], observation["states"]["state2"][-last_n:])
                recursive_assert_almost_equal(batch["terminals"], observation["terminals"][-last_n:])
コード例 #27
0
class TestPythonPrioritizedReplay(unittest.TestCase):
    """
    Tests sampling and insertion behaviour of the mem_prioritized_replay module.
    """
    record_space = Dict(states=dict(state1=float, state2=float),
                        actions=dict(action1=float),
                        reward=float,
                        terminals=BoolBox(),
                        add_batch_rank=True)
    apex_space = Dict(states=FloatBox(shape=(4, )),
                      actions=FloatBox(shape=(2, )),
                      reward=float,
                      terminals=BoolBox(),
                      weights=FloatBox(),
                      add_batch_rank=True)

    memory_variables = ["size", "index", "max-priority"]

    capacity = 10
    alpha = 1.0
    beta = 1.0

    max_priority = 1.0

    input_spaces = dict(
        # insert: records
        records=record_space,
        # get_records: num_records
        num_records=int,
        # update_records: indices, update
        indices=IntBox(add_batch_rank=True),
        update=FloatBox(add_batch_rank=True))

    # TODO These methods are all graph fns now -> unify backend tests.
    def test_insert(self):
        """
        Simply tests insert op without checking internal logic.
        """
        memory = MemPrioritizedReplay(capacity=self.capacity,
                                      next_states=True,
                                      alpha=self.alpha,
                                      beta=self.beta)
        memory.create_variables(self.input_spaces)

        observation = memory.record_space_flat.sample(size=1)
        memory.insert_records(observation)

        # Test chunked insert
        observation = memory.record_space_flat.sample(size=5)
        memory.insert_records(observation)

        # Also test Apex version
        memory = ApexMemory(capacity=self.capacity,
                            alpha=self.alpha,
                            beta=self.beta)
        observation = self.apex_space.sample(size=5)
        for i in range_(5):
            memory.insert_records(
                (observation['states'][i], observation['actions'][i],
                 observation['reward'][i], observation['terminals'][i],
                 observation['states'][i], observation["weights"][i]))

    def test_update_records(self):
        """
        Tests update records logic.
        """
        memory = MemPrioritizedReplay(capacity=self.capacity, next_states=True)
        memory.create_variables(self.input_spaces)

        # Insert a few Elements.
        observation = memory.record_space_flat.sample(size=2)
        memory.insert_records(observation)

        # Fetch elements and their indices.
        num_records = 2
        batch = memory.get_records(num_records)
        indices = batch[1]
        self.assertEqual(num_records, len(indices))

        # Does not return anything.
        memory.update_records(indices, np.asarray([0.1, 0.2]))

        # Test apex memory.
        memory = ApexMemory(capacity=self.capacity,
                            alpha=self.alpha,
                            beta=self.beta)
        observation = self.apex_space.sample(size=5)
        for i in range_(5):
            memory.insert_records(
                (ray_compress(observation["states"][i]),
                 observation["actions"][i], observation["reward"][i],
                 observation["terminals"][i], observation["weights"][i]))

        # Fetch elements and their indices.
        num_records = 5
        batch = memory.get_records(num_records)
        indices = batch[1]
        self.assertEqual(num_records, len(indices))

        # Does not return anything
        memory.update_records(indices, np.random.uniform(size=10))

    def test_segment_tree_insert_values(self):
        """
        Tests if segment tree inserts into correct positions.
        """
        memory = MemPrioritizedReplay(capacity=self.capacity,
                                      next_states=True,
                                      alpha=self.alpha,
                                      beta=self.beta)
        memory.create_variables(self.input_spaces)

        priority_capacity = 1
        while priority_capacity < self.capacity:
            priority_capacity *= 2

        sum_segment_values = memory.merged_segment_tree.sum_segment_tree.values
        min_segment_values = memory.merged_segment_tree.min_segment_tree.values

        self.assertEqual(sum(sum_segment_values), 0)
        self.assertEqual(sum(min_segment_values), float('inf'))
        self.assertEqual(len(sum_segment_values), 2 * priority_capacity)
        self.assertEqual(len(min_segment_values), 2 * priority_capacity)

        # Insert 1 Element.
        observation = memory.record_space_flat.sample(size=1)
        memory.insert_records(observation)

        # Check insert positions
        # Initial insert is at priority capacity
        print(sum_segment_values)
        print(min_segment_values)
        start = priority_capacity

        while start >= 1:
            self.assertEqual(sum_segment_values[start], 1.0)
            self.assertEqual(min_segment_values[start], 1.0)
            start = int(start / 2)

        # Insert another Element.
        observation = memory.record_space_flat.sample(size=1)
        memory.insert_records(observation)

        # Index shifted 1
        start = priority_capacity + 1
        self.assertEqual(sum_segment_values[start], 1.0)
        self.assertEqual(min_segment_values[start], 1.0)
        start = int(start / 2)
        while start >= 1:
            # 1 + 1 is 2 on the segment.
            self.assertEqual(sum_segment_values[start], 2.0)
            # min is still 1.
            self.assertEqual(min_segment_values[start], 1.0)
            start = int(start / 2)

    def test_tree_insert(self):
        """
        Tests inserting into the segment tree and querying segments.
        """
        memory = ApexMemory(capacity=4)
        tree = memory.merged_segment_tree.sum_segment_tree
        tree.insert(2, 1.0)
        tree.insert(3, 3.0)
        assert np.isclose(tree.get_sum(), 4.0)
        assert np.isclose(tree.get_sum(0, 2), 0.0)
        assert np.isclose(tree.get_sum(0, 3), 1.0)
        assert np.isclose(tree.get_sum(2, 3), 1.0)
        assert np.isclose(tree.get_sum(2, -1), 1.0)
        assert np.isclose(tree.get_sum(2, 4), 4.0)

    def test_prefixsum_idx(self):
        """
        Tests fetching the index corresponding to a prefix sum.
        """
        memory = ApexMemory(capacity=4)
        tree = memory.merged_segment_tree.sum_segment_tree
        tree.insert(2, 1.0)
        tree.insert(3, 3.0)

        self.assertEqual(tree.index_of_prefixsum(0.0), 2)
        self.assertEqual(tree.index_of_prefixsum(0.5), 2)
        self.assertEqual(tree.index_of_prefixsum(0.99), 2)
        self.assertEqual(tree.index_of_prefixsum(1.01), 3)
        self.assertEqual(tree.index_of_prefixsum(3.0), 3)
        self.assertEqual(tree.index_of_prefixsum(4.0), 3)

        memory = ApexMemory(capacity=4)
        tree = memory.merged_segment_tree.sum_segment_tree
        tree.insert(0, 0.5)
        tree.insert(1, 1.0)
        tree.insert(2, 1.0)
        tree.insert(3, 3.0)
        self.assertEqual(tree.index_of_prefixsum(0.0), 0)
        self.assertEqual(tree.index_of_prefixsum(0.55), 1)
        self.assertEqual(tree.index_of_prefixsum(0.99), 1)
        self.assertEqual(tree.index_of_prefixsum(1.51), 2)
        self.assertEqual(tree.index_of_prefixsum(3.0), 3)
        self.assertEqual(tree.index_of_prefixsum(5.50), 3)
コード例 #28
0
class TestFIFOQueue(unittest.TestCase):
    """
    Tests sampling and insertion behaviour of the FIFOQueue class.
    """
    record_space = Dict(states=dict(state1=float, state2=float, state3=bool),
                        actions=dict(action1=float,
                                     action2=Tuple(float, float)),
                        reward=float,
                        terminals=BoolBox(),
                        add_batch_rank=True)
    capacity = 10

    input_spaces = dict(records=record_space, num_records=int)

    def test_enqueue_dequeue(self):
        """
        Simply tests insert op without checking internal logic.
        """
        fifo_queue = FIFOQueue(capacity=self.capacity,
                               record_space=self.record_space)
        test = ComponentTest(component=fifo_queue,
                             input_spaces=self.input_spaces)

        first_record = self.record_space.sample(size=1)
        test.test(("insert_records", first_record), expected_outputs=None)
        test.test("get_size", expected_outputs=1)

        further_records = self.record_space.sample(size=5)
        test.test(("insert_records", further_records), expected_outputs=None)
        test.test("get_size", expected_outputs=6)

        expected = dict()
        for (k1, v1), (k2, v2) in zip(
                flatten_op(first_record).items(),
                flatten_op(further_records).items()):
            expected[k1] = np.concatenate((v1, v2[:4]))
        expected = unflatten_op(expected)

        test.test(("get_records", 5), expected_outputs=expected)
        test.test("get_size", expected_outputs=1)

    def test_capacity(self):
        """
        Tests if insert correctly blocks when capacity is reached.
        """
        fifo_queue = FIFOQueue(capacity=self.capacity,
                               record_space=self.record_space)
        test = ComponentTest(component=fifo_queue,
                             input_spaces=self.input_spaces)

        def run(expected_):
            # Wait n seconds.
            time.sleep(2)
            # Pull something out of the queue again to continue.
            test.test(("get_records", 2), expected_outputs=expected_)

        # Insert one more element than capacity
        records = self.record_space.sample(size=self.capacity + 1)

        expected = dict()
        for key, value in flatten_op(records).items():
            expected[key] = value[:2]
        expected = unflatten_op(expected)

        # Start thread to save this one from getting stuck due to capacity overflow.
        thread = threading.Thread(target=run, args=(expected, ))
        thread.start()

        print("Going over capacity: blocking ...")
        test.test(("insert_records", records), expected_outputs=None)
        print("Dequeued some items in another thread. Unblocked.")

        thread.join()

    def test_fifo_queue_with_distributed_tf(self):
        """
        Tests if FIFO is correctly shared between two processes running in distributed tf.
        """
        cluster_spec = dict(source=["localhost:22222"],
                            target=["localhost:22223"])

        def run1():
            fifo_queue_1 = FIFOQueue(capacity=self.capacity,
                                     device="/job:source/task:0/cpu",
                                     record_space=self.record_space)
            test_1 = ComponentTest(component=fifo_queue_1,
                                   input_spaces=self.input_spaces,
                                   execution_spec=dict(
                                       mode="distributed",
                                       distributed_spec=dict(
                                           job="source",
                                           task_index=0,
                                           cluster_spec=cluster_spec)))
            # Insert elements from source.
            records = self.record_space.sample(size=self.capacity)
            print("inserting into source-side queue ...")
            test_1.test(("insert_records", records), expected_outputs=None)
            print("size of source-side queue:")
            print(test_1.test("get_size", expected_outputs=None))
            # Pull one sample out.
            print("pulling from source-side queue:")
            print(test_1.test(("get_records", 2), expected_outputs=None))

            test_1.terminate()

        def run2():
            fifo_queue_2 = FIFOQueue(capacity=self.capacity,
                                     device="/job:source/task:0/cpu",
                                     record_space=self.record_space)
            test_2 = ComponentTest(component=fifo_queue_2,
                                   input_spaces=self.input_spaces,
                                   execution_spec=dict(
                                       mode="distributed",
                                       distributed_spec=dict(
                                           job="target",
                                           task_index=0,
                                           cluster_spec=cluster_spec)))
            # Dequeue elements in target.
            print("size of target-side queue:")
            print(test_2.test("get_size", expected_outputs=None))
            print("pulling from target-side queue:")
            print(test_2.test(("get_records", 5), expected_outputs=None))

            test_2.terminate()

        # Start thread to save this one from getting stuck due to capacity overflow.
        thread_1 = threading.Thread(target=run1)
        thread_2 = threading.Thread(target=run2)
        thread_1.start()
        thread_2.start()

        thread_1.join()
        thread_2.join()
コード例 #29
0
class TestPrioritizedReplay(unittest.TestCase):
    """
    Tests sampling and insertion behaviour of the prioritized_replay module.
    """
    record_space = Dict(states=dict(state1=float, state2=float),
                        actions=dict(action1=float),
                        reward=float,
                        terminals=BoolBox(),
                        add_batch_rank=True)
    memory_variables = ["size", "index", "max-priority"]

    capacity = 10
    alpha = 1.0
    beta = 1.0

    max_priority = 1.0

    input_spaces = dict(
        # insert: records
        records=record_space,
        # get_records: num_records
        num_records=int,
        # update_records: indices, update
        indices=IntBox(add_batch_rank=True),
        update=FloatBox(add_batch_rank=True))

    def test_insert(self):
        """
        Simply tests insert op without checking internal logic.
        """
        memory = PrioritizedReplay(capacity=self.capacity,
                                   alpha=self.alpha,
                                   beta=self.beta)
        test = ComponentTest(component=memory, input_spaces=self.input_spaces)

        observation = self.record_space.sample(size=1)
        test.test(("insert_records", observation), expected_outputs=None)

    def test_capacity(self):
        """
        Tests if insert correctly manages capacity.
        """
        memory = PrioritizedReplay(capacity=self.capacity,
                                   alpha=self.alpha,
                                   beta=self.beta)
        test = ComponentTest(component=memory, input_spaces=self.input_spaces)

        # Internal state variables.
        memory_variables = memory.get_variables(self.memory_variables,
                                                global_scope=False)
        buffer_size = memory_variables['size']
        buffer_index = memory_variables['index']
        max_priority = memory_variables['max-priority']

        size_value, index_value, max_priority_value = test.read_variable_values(
            buffer_size, buffer_index, max_priority)

        # Assert indices 0 before insert.
        self.assertEqual(size_value, 0)
        self.assertEqual(index_value, 0)
        self.assertEqual(max_priority_value, 1.0)

        # Insert one more element than capacity
        observation = self.record_space.sample(size=self.capacity + 1)
        test.test(("insert_records", observation), expected_outputs=None)

        size_value, index_value = test.read_variable_values(
            buffer_size, buffer_index)
        # Size should be equivalent to capacity when full.
        self.assertEqual(size_value, self.capacity)

        # Index should be one over capacity due to modulo.
        self.assertEqual(index_value, 1)

    def test_batch_retrieve(self):
        """
        Tests if retrieval correctly manages capacity.
        """
        memory = PrioritizedReplay(capacity=self.capacity,
                                   alpha=self.alpha,
                                   beta=self.beta)
        test = ComponentTest(component=memory, input_spaces=self.input_spaces)

        # Insert 2 Elements.
        observation = non_terminal_records(self.record_space, 2)
        test.test(("insert_records", observation), expected_outputs=None)

        # Assert we can now fetch 2 elements.
        num_records = 2
        batch = test.test(("get_records", num_records), expected_outputs=None)
        records = batch[0]
        print('Result batch = {}'.format(records))
        self.assertEqual(2, len(records['terminals']))

        # We allow repeat indices in sampling.
        num_records = 5
        batch = test.test(("get_records", num_records), expected_outputs=None)
        records = batch[0]
        self.assertEqual(5, len(records['terminals']))

        # Now insert over capacity, note all elements here are non-terminal.
        observation = non_terminal_records(self.record_space, self.capacity)
        test.test(("insert_records", observation), expected_outputs=None)

        # Assert we can fetch exactly capacity elements.
        num_records = self.capacity
        batch = test.test(("get_records", num_records), expected_outputs=None)
        records = batch[0]
        self.assertEqual(self.capacity, len(records['terminals']))

    def test_update_records(self):
        """
        Tests update records logic.
        """
        memory = PrioritizedReplay(capacity=self.capacity)
        test = ComponentTest(component=memory, input_spaces=self.input_spaces)

        # Insert a few Elements.
        observation = non_terminal_records(self.record_space, 2)
        test.test(("insert_records", observation), expected_outputs=None)

        # Fetch elements and their indices.
        num_records = 2
        batch = test.test(("get_records", num_records), expected_outputs=None)
        indices = batch[1]
        self.assertEqual(num_records, len(indices))
        # 0.3, 0.5, 1.0])
        input_params = [indices, np.asarray([0.1, 0.2])]
        # Does not return anything
        test.test(("update_records", input_params), expected_outputs=None)

    def test_segment_tree_insert_values(self):
        """
        Tests if segment tree inserts into correct positions.
        """
        memory = PrioritizedReplay(capacity=self.capacity,
                                   alpha=self.alpha,
                                   beta=self.beta)
        test = ComponentTest(component=memory, input_spaces=self.input_spaces)
        priority_capacity = 1
        while priority_capacity < self.capacity:
            priority_capacity *= 2

        memory_variables = memory.get_variables(
            ["sum-segment-tree", "min-segment-tree"], global_scope=False)
        sum_segment_tree = memory_variables['sum-segment-tree']
        min_segment_tree = memory_variables['min-segment-tree']
        sum_segment_values, min_segment_values = test.read_variable_values(
            sum_segment_tree, min_segment_tree)

        self.assertEqual(sum(sum_segment_values), 0)
        self.assertEqual(sum(min_segment_values), float('inf'))
        self.assertEqual(len(sum_segment_values), 2 * priority_capacity)
        self.assertEqual(len(min_segment_values), 2 * priority_capacity)
        # Insert 1 Element.
        observation = non_terminal_records(self.record_space, 1)
        test.test(("insert_records", observation), expected_outputs=None)

        # Fetch segment tree.
        sum_segment_values, min_segment_values = test.read_variable_values(
            sum_segment_tree, min_segment_tree)

        # Check insert positions
        # Initial insert is at priority capacity
        print(sum_segment_values)
        print(min_segment_values)
        start = priority_capacity

        while start >= 1:
            self.assertEqual(sum_segment_values[start], 1.0)
            self.assertEqual(min_segment_values[start], 1.0)
            start = int(start / 2)

        # Insert another Element.
        observation = non_terminal_records(self.record_space, 1)
        test.test(("insert_records", observation), expected_outputs=None)

        # Fetch segment tree.
        sum_segment_values, min_segment_values = test.read_variable_values(
            sum_segment_tree, min_segment_tree)
        print(sum_segment_values)
        print(min_segment_values)

        # Index shifted 1
        start = priority_capacity + 1
        self.assertEqual(sum_segment_values[start], 1.0)
        self.assertEqual(min_segment_values[start], 1.0)
        start = int(start / 2)
        while start >= 1:
            # 1 + 1 is 2 on the segment.
            self.assertEqual(sum_segment_values[start], 2.0)
            # min is still 1.
            self.assertEqual(min_segment_values[start], 1.0)
            start = int(start / 2)
コード例 #30
0
ファイル: grid_world.py プロジェクト: EmpereurCC/RLgraph_exp
    def __init__(self,
                 world="4x4",
                 save_mode=False,
                 action_type="udlr",
                 reward_function="sparse",
                 state_representation="discrete"):
        """
        Args:
            world (Union[str,List[str]]): Either a string to map into `MAPS` or a list of strings describing the rows
                of the world (e.g. ["S ", " G"] for a two-row/two-column world with start and goal state).

            save_mode (bool): Whether to replace holes (H) with walls (W). Default: False.

            action_type (str): Which action space to use. Chose between "udlr" (up, down, left, right), which is a
                discrete action space and "ftj" (forward + turn + jump), which is a container multi-discrete
                action space. "ftjb" is the same as "ftj", except that sub-action "jump" is a boolean.

            reward_function (str): One of
                sparse: hole=-5, fire=-3, goal=1, all other steps=-0.1
                rich: hole=-100, fire=-10, goal=50, all other steps=-0.1

            state_representation (str):
                - "discrete": An int representing the field on the grid, 0 meaning the upper left field, 1 the one
                    below, etc..
                - "xy": The x and y grid position tuple.
                - "xy+orientation": The x and y grid position tuple plus the orientation (if any) as tuple of 2 values
                    of the actor.
                - "camera": A 3-channel image where each field in the grid-world is one pixel and the 3 channels are
                    used to indicate different items in the scene (walls, holes, the actor, etc..).
        """
        # Build our map.
        if isinstance(world, str):
            self.description = world
            world = self.MAPS[world]
        else:
            self.description = "custom-map"

        world = np.array(list(map(list, world)))
        # Apply safety switch.
        world[world == 'H'] = ("H" if not save_mode else "F")

        # `world` is a list of lists that needs to be indexed using y/x pairs (first row, then column).
        self.world = world
        self.n_row, self.n_col = self.world.shape
        (start_y, ), (start_x, ) = np.nonzero(self.world == "S")

        # Init pygame (if installed) for visualizations.
        if pygame is not None:
            self.pygame_field_size = 30
            pygame.init()
            self.pygame_agent = pygame.image.load(
                os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "images/agent.png"))
            # Create basic grid Surface for reusage.
            self.pygame_basic_surface = self.grid_to_surface()
            self.pygame_display_set = False

        # Figure out our state space.
        assert state_representation in [
            "discrete", "xy", "xy+orientation", "camera"
        ]
        self.state_representation = state_representation
        # Discrete states (single int from 0 to n).
        if self.state_representation == "discrete":
            state_space = IntBox(self.n_row * self.n_col)
        # x/y position (2 ints).
        elif self.state_representation == "xy":
            state_space = IntBox(low=(0, 0),
                                 high=(self.n_col, self.n_row),
                                 shape=(2, ))
        # x/y position + orientation (3 ints).
        elif self.state_representation == "xy+orientation":
            state_space = IntBox(low=(0, 0, 0, 0),
                                 high=(self.n_col, self.n_row, 1, 1))
        # Camera outputting a 2D color image of the world.
        else:
            state_space = IntBox(0, 255, shape=(self.n_row, self.n_col, 3))

        self.default_start_pos = self.get_discrete_pos(start_x, start_y)
        self.discrete_pos = self.default_start_pos

        assert reward_function in ["sparse",
                                   "rich"]  # TODO: "potential"-based reward
        self.reward_function = reward_function

        # Store the goal position for proximity calculations (for "potential" reward function).
        (self.goal_y, ), (self.goal_x, ) = np.nonzero(self.world == "G")

        # Specify the actual action spaces.
        self.action_type = action_type
        action_space = IntBox(4) if self.action_type == "udlr" else Dict(
            dict(forward=IntBox(3),
                 turn=IntBox(3),
                 jump=(IntBox(2) if self.action_type == "ftj" else BoolBox())))

        # Call the super's constructor.
        super(GridWorld, self).__init__(state_space=state_space,
                                        action_space=action_space)

        # Reset ourselves.
        self.state = None
        self.orientation = None  # int: 0, 90, 180, 270
        self.camera_pixels = None  # only used, if state_representation=='cam'
        self.reward = None
        self.is_terminal = None
        self.reset(randomize=False)