Python Dict Examples

Programming Language: Python

Namespace/Package Name: surreal.spaces

Class/Type: Dict

Examples at hotexamples.com: 11

Python Dict - 11 examples found. These are the top rated real world Python examples of surreal.spaces.Dict extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Dict(11)

Frequently Used Methods

Dict (11)

Example #1

Show file

File: dddqn.py Project: rosea-tf/surreal

 def __init__(self, config, name=None):
     super().__init__(config, name)
     self.Phi = Preprocessor.make(config.preprocessor)
     self.x = self.Phi(Space.make(
         config.state_space).with_batch())  # preprocessed states (x)
     self.a = Space.make(config.action_space).with_batch()  # actions (a)
     self.Q = Network.make(
         network=config.q_network,
         input_space=self.x,
         output_space=Dict(
             A=self.a, V=Float().with_batch()),  # dueling network outputs
         adapters=dict(A=dict(pre_network=config.dueling_a_network),
                       V=dict(pre_network=config.dueling_v_network)))
     self.Qt = self.Q.copy(trainable=False)
     self.memory = PrioritizedReplayBuffer.make(
         record_space=Dict(dict(s=self.x, a=self.a, r=float, t=bool, n=int),
                           main_axes="B"),
         capacity=config.memory_capacity,
         alpha=config.memory_alpha,
         beta=config.memory_beta,
         next_record_setup=dict(s="s_", n_step=config.n_step))
     self.n_step = NStep(config.gamma,
                         n_step=config.n_step,
                         n_step_only=True)  # N-step component
     self.L = DDDQNLoss()  # double/dueling/n-step Q-loss
     self.optimizer = Optimizer.make(self.config.optimizer)
     self.epsilon = Decay.make(
         self.config.epsilon)  # for epsilon greedy learning
     self.Phi.reset()  # make sure, Preprocessor is clean

Example #2

Show file

File: dads.py Project: rosea-tf/surreal

    def __init__(self, config, name=None):
        super().__init__(config, name)
        self.inference = False  # True=planning mode. False="supervised+intrinsic-reward+model-learning" mode.
        self.he = 0  # Current step within He (total episode horizon).
        self.hz = 0  # Current step within Hz (repeat horizon for one selected skill)

        self.preprocessor = Preprocessor.make(config.preprocessor)
        self.s = self.preprocessor(
            config.state_space.with_batch())  # preprocessed states
        self.a = config.action_space.with_batch()  # actions (a)
        self.ri = Float(main_axes=[("Episode Horizon", config.episode_horizon)
                                   ])  # intrinsic rewards in He
        self.z = Float(-1.0, 1.0, shape=(config.dim_skill_vectors,), main_axes="B") if \
            config.discrete_skills is False else Int(config.dim_skill_vectors, main_axes="B")
        self.s_and_z = Dict(dict(s=self.s, z=self.z), main_axes="B")
        self.pi = Network.make(input_space=self.s_and_z,
                               output_space=self.a,
                               **config.policy_network)
        self.q = Network.make(input_space=self.s_and_z,
                              output_space=self.s,
                              distributions=dict(
                                  type="mixture",
                                  num_experts=config.num_q_experts),
                              **config.q_network)
        self.B = FIFOBuffer(Dict(dict(s=self.s, z=self.z, a=self.a, t=bool)),
                            config.episode_buffer_capacity,
                            when_full=self.event_buffer_full,
                            next_record_setup=dict(s="s_"))
        self.SAC = SAC(config=config.sac_config,
                       name="SAC-level0")  # Low-level SAC.
        self.q_optimizer = Optimizer.make(
            config.supervised_optimizer)  # supervised model optimizer
        self.Lsup = NegLogLikelihoodLoss(distribution=MixtureDistribution(
            num_experts=config.num_q_experts))
        self.preprocessor.reset()

Example #3

Show file

    def __init__(self, config, name=None):
        super().__init__(config, name)
        self.preprocessor = Preprocessor.make(config.preprocessor)
        self.s = self.preprocessor(Space.make(config.state_space).with_batch())  # preprocessed states (x)
        self.a = Space.make(config.action_space).with_batch()  # actions (a)
        self.a_soft = self.a.as_one_hot_float_space()  # soft-one-hot actions (if Int elements in action space)
        self.pi = Network.make(distributions=dict(  # policy (π)
            bounded_distribution_type=config.bounded_distribution_type, discrete_distribution_type="gumbel-softmax",
            gumbel_softmax_temperature=config.gumbel_softmax_temperature
        ), input_space=self.s, output_space=self.a, **config.policy_network)
        self.Q = []  # the Q-networks
        for i in range(config.num_q_networks):
            self.Q.append(Network.make(input_space=Dict(s=self.s, a=self.a), output_space=float, **config.q_network))
        self.Qt = [self.Q[i].copy(trainable=False) for i in range(config.num_q_networks)]  # target q-network(s)
        record_space = Dict(default_dict(dict(s=self.s, a=self.a_soft, r=float, t=bool),
                                         {"n": int} if config.n_step > 1 else {}), main_axes="B")
        self.memory = Memory.make(record_space=record_space, **config.memory_spec)
        self.alpha = tf.Variable(config.initial_alpha, name="alpha", dtype=tf.float32)  # the temperature parameter α
        self.entropy_target = Decay.make(config.entropy_target)
        self.n_step = NStep(config.gamma, n_step=config.n_step, n_step_only=True)
        self.L, self.Ls_critic, self.L_actor, self.L_alpha = SACLoss(), [0, 0], 0, 0  # SAC loss function and values.

        # TEST
        self.log_pi, self.entropy_error_term, self.log_alpha = 0, 0, 0
        # END: TEST

        self.optimizers = dict(
            q=Optimizer.make(self.config.q_optimizer), pi=Optimizer.make(self.config.policy_optimizer),
            alpha=Optimizer.make(self.config.alpha_optimizer)
        )
        self.preprocessor.reset()  # make sure, Preprocessor is clean

Example #4

Show file

class TestFIFOBufferMemory(unittest.TestCase):
    """
    Tests the FIFOBuffer Component.
    """
    record_space = Dict(states=dict(state1=float, state2=float),
                        actions=dict(action1=float),
                        rewards=float,
                        terminals=bool,
                        main_axes="B")
    capacity = 10

    def when_full(self, buffer):
        print("Executing `when_full` on buffer={}".format(buffer))
        raise Exception  # to catch

    def test_fifo_buffer(self):
        fifo_buffer = FIFOBuffer(record_space=self.record_space,
                                 capacity=self.capacity,
                                 when_full=self.when_full)

        # Not full.
        data = self.record_space.sample(self.capacity - 1)
        fifo_buffer.add_records(data)
        self.assertTrue(fifo_buffer.size == self.capacity - 1)

        # Full.
        data = self.record_space.sample(2)
        try:
            fifo_buffer.add_records(data)
            # Expect when_full to be called.
            raise AssertionError
        except Exception:
            pass

        self.assertTrue(fifo_buffer.size == self.capacity)
        all_data = fifo_buffer.flush()
        self.assertTrue(fifo_buffer.size == 0)
        self.assertTrue(fifo_buffer.index == 0)

        self.assertTrue(len(all_data["states"]["state1"]) == self.capacity)
        self.assertTrue(len(all_data["states"]["state2"]) == self.capacity)
        self.assertTrue(len(all_data["rewards"]) == self.capacity)
        self.assertTrue(all_data["rewards"].dtype == np.float32)

Example #5

Show file

class TestReplayBuffer(unittest.TestCase):
    """
    Tests sampling and insertion behaviour of the replay_memory module.
    """
    record_space = Dict(states=dict(state1=float, state2=float),
                        actions=dict(action1=Int(3, shape=(3, ))),
                        reward=float,
                        terminals=Bool(),
                        next_states=dict(state1=float, state2=float),
                        main_axes="B")

    def test_insert(self):
        """
        Simply tests insert op without checking internal logic.
        """
        memory = ReplayBuffer(record_space=self.record_space, capacity=4)
        # Assert indices 0 before insert.
        self.assertEqual(memory.size, 0)
        self.assertEqual(memory.index, 0)

        # Insert one single record (no batch rank) and check again.
        data = self.record_space.sample()
        memory.add_records(data)
        self.assertTrue(memory.size == 1)
        self.assertTrue(memory.index == 1)

        # Insert one single record (with batch rank) and check again.
        data = self.record_space.sample(1)
        memory.add_records(data)
        self.assertTrue(memory.size == 2)
        self.assertTrue(memory.index == 2)

        # Insert two records (batched).
        data = self.record_space.sample(2)
        memory.add_records(data)
        self.assertTrue(memory.size == 4)
        self.assertTrue(memory.index == 0)

        # Insert one single record (no batch rank, BUT with `single` indicator set for performance reasons)
        # and check again.
        data = self.record_space.sample()
        memory.add_records(data, single=True)
        self.assertTrue(memory.size == 4)
        self.assertTrue(memory.index == 1)

    def test_insert_over_capacity(self):
        """
        Tests if insert correctly manages capacity.
        """
        capacity = 10
        memory = ReplayBuffer(record_space=self.record_space,
                              capacity=capacity)
        # Assert indices 0 before insert.
        self.assertEqual(memory.size, 0)
        self.assertEqual(memory.index, 0)

        # Insert one more element than capacity.
        data = self.record_space.sample(size=capacity + 1)
        memory.add_records(data)

        # Size should be equivalent to capacity when full.
        self.assertEqual(memory.size, capacity)
        # Index should be one over capacity due to modulo.
        self.assertEqual(memory.index, 1)

    def test_get_records(self):
        """
        Tests if retrieval correctly manages capacity.
        """
        capacity = 10
        memory = ReplayBuffer(record_space=self.record_space,
                              capacity=capacity)

        # Insert 1 record.
        data = self.record_space.sample(1)
        memory.add_records(data)

        # Assert we can now fetch 2 elements.
        retrieved_data = memory.get_records(num_records=1)
        self.assertEqual(1, len(retrieved_data["terminals"]))
        check(data, retrieved_data)

        # Test duplicate sampling.
        retrieved_data = memory.get_records(num_records=5)
        self.assertEqual(5, len(retrieved_data["terminals"]))
        # Only one record in the memory -> returned samples should all be the exact same.
        check(retrieved_data["reward"][0], retrieved_data["reward"][1])
        check(retrieved_data["reward"][0], retrieved_data["reward"][2])
        check(retrieved_data["reward"][0], retrieved_data["reward"][3])
        check(retrieved_data["reward"][0], retrieved_data["reward"][4])

        # Now insert another one.
        data = self.record_space.sample()  # w/o batch rank
        memory.add_records(data)
        # Pull exactly two records and make sure they are NOT(!) the same.
        retrieved_data = memory.get_records(num_records=2)
        self.assertEqual(2, len(retrieved_data["terminals"]))
        self.assertNotEqual(retrieved_data["reward"][0],
                            retrieved_data["reward"][1])

        # Now insert over capacity.
        data = self.record_space.sample(capacity)
        memory.add_records(data)

        # Assert we can fetch exactly capacity elements.
        retrieved_data = memory.get_records(num_records=capacity)
        self.assertEqual(capacity, len(retrieved_data["terminals"]))

Example #6

Show file

File: test_memories_generically.py Project: rosea-tf/surreal

class TestMemoriesGenerically(unittest.TestCase):
    """
    Tests different generic functionalities of Memories.
    """
    record_space = Dict(
        states=dict(state1=float, state2=Float(shape=(2,))),
        actions=dict(action1=int),
        reward=float,
        terminals=bool,
        main_axes="B"
    )
    record_space_no_next_state = Dict(s=dict(s1=float, s2=float), a=dict(a1=Int(10)), r=float, t=Bool(), main_axes="B")

    capacity = 10
    alpha = 1.0
    beta = 1.0
    max_priority = 1.0

    def test_next_state_handling(self):
        """
        Tests if next-states can be stored efficiently (not using any space!) in the memory.

        NOTE: The memory does not care about terminal signals, it will always return the n-next-in-memory state
        regardless of whether this is a useful state (terminal=False) or not (terminal=True). In case of a
        terminal=True, the next state (whether it be the true terminal state, the reset state, or any other random
        state) does not matter anyway.
        """
        capacity = 10
        batch_size = 2

        # Test all classes of memories.
        for class_ in [ReplayBuffer, PrioritizedReplayBuffer]:
            memory = class_(record_space=self.record_space_no_next_state, capacity=capacity,
                            next_record_setup=dict(s="s_"))

            # Insert n records (inserts must always be batch-size).
            data = dict(
                s=dict(s1=np.array([0.0, 1.0]), s2=np.array([2.0, 3.0])),
                a=np.array([0, 1]), r=np.array([-0.0, -1.0]), t=np.array([False, True]),
                s_=dict(s1=np.array([0.1, 1.1]), s2=np.array([2.1, 3.1]))
            )
            memory.add_records(data)

            # Check, whether inserting the wrong batch size raises Exception.
            try:
                data = self.record_space_no_next_state.sample(batch_size + 1)
                data["s_"] = self.record_space_no_next_state["s"].sample(batch_size)
                memory.add_records(data)
                assert False, "ERROR: Should not get here. Error is expected."
            except SurrealError:
                pass

            # Assert we can now fetch n elements.
            retrieved_data = memory.get_records(num_records=1)
            self.assertEqual(1, len(retrieved_data["t"]))

            # Check the next state.
            if retrieved_data["s"]["s1"][0] == 0.0:
                self.assertTrue(retrieved_data["s_"]["s1"] == 0.1 and retrieved_data["s_"]["s2"] == 2.1)
            else:
                self.assertTrue(retrieved_data["s"]["s1"] == 1.0)
                self.assertTrue(retrieved_data["s_"]["s1"] == 1.1 and retrieved_data["s_"]["s2"] == 3.1)

            # Insert another 2xn records and then check for correct next-state returns when getting records.
            data = dict(
                s=dict(s1=np.array([0.1, 1.1]), s2=np.array([2.1, 3.1])),
                a=np.array([2, 3]), r=np.array([-2.0, -3.0]), t=np.array([False, False]),
                s_=dict(s1=np.array([0.2, 1.2]), s2=np.array([2.2, 3.2]))
            )
            memory.add_records(data)
            data = dict(
                s=dict(s1=np.array([0.2, 1.2]), s2=np.array([2.2, 3.2])),
                a=np.array([4, 5]), r=np.array([-4.0, -5.0]), t=np.array([True, True]),
                s_=dict(s1=np.array([0.3, 1.3]), s2=np.array([2.3, 3.3]))
            )
            memory.add_records(data)

            for _ in range(20):
                retrieved_data = memory.get_records(num_records=2)
                self.assertEqual(2, len(retrieved_data["t"]))

                # Check the next states (always 0.1 larger than state).
                for i in range(2):
                    check(retrieved_data["s"]["s1"][i], retrieved_data["s_"]["s1"][i] - 0.1)
                    check(retrieved_data["s"]["s2"][i], retrieved_data["s_"]["s2"][i] - 0.1)

            self.assertTrue(memory.size == 6)

            # Insert up to capacity and check again.
            data = dict(
                s=dict(s1=np.array([0.3, 1.3]), s2=np.array([2.3, 3.3])),
                a=np.array([6, 7]), r=np.array([-6.0, -7.0]), t=np.array([True, False]),
                s_=dict(s1=np.array([0.4, 1.4]), s2=np.array([2.4, 3.4]))
            )
            memory.add_records(data)
            data = dict(
                s=dict(s1=np.array([0.4, 1.4]), s2=np.array([2.4, 3.4])),
                a=np.array([8, 9]), r=np.array([-8.0, -9.0]), t=np.array([False, False]),
                s_=dict(s1=np.array([0.5, 1.5]), s2=np.array([2.5, 3.5]))
            )
            memory.add_records(data)

            for _ in range(20):
                retrieved_data = memory.get_records(num_records=3)
                self.assertEqual(3, len(retrieved_data["t"]))

                # Check the next states (always 0.1 larger than state).
                for i in range(3):
                    check(retrieved_data["s"]["s1"][i], retrieved_data["s_"]["s1"][i] - 0.1)
                    check(retrieved_data["s"]["s2"][i], retrieved_data["s_"]["s2"][i] - 0.1)

            self.assertTrue(memory.size == 10)

            # Go a little bit (one batch) over capacity and check again.
            data = dict(
                s=dict(s1=np.array([0.5, 1.5]), s2=np.array([2.5, 3.5])),
                a=np.array([10, 11]), r=np.array([-10.0, -11.0]), t=np.array([True, True]),
                s_=dict(s1=np.array([0.6, 1.6]), s2=np.array([2.6, 3.6]))
            )
            memory.add_records(data)

            for _ in range(20):
                retrieved_data = memory.get_records(num_records=4)
                self.assertEqual(4, len(retrieved_data["t"]))

                # Check the next states (always 0.1 larger than state).
                for i in range(4):
                    check(retrieved_data["s"]["s1"][i], retrieved_data["s_"]["s1"][i] - 0.1)
                    check(retrieved_data["s"]["s2"][i], retrieved_data["s_"]["s2"][i] - 0.1)

            self.assertTrue(memory.size == 10)

    def test_next_state_handling_with_n_step(self):
        """
        Tests if next-states can be stored efficiently (not using any space!) in the memory using an n-step memory.

        NOTE: The memory does not care about terminal signals, it will always return the n-next-in-memory state
        regardless of whether this is a useful state (terminal=False) or not (terminal=True). In case of a
        terminal=True, the next state (whether it be the true terminal state, the reset state, or any other random
        state) does not matter anyway.
        """
        capacity = 10
        batch_size = 2
        # Test all classes of memories.
        for class_ in [ReplayBuffer, PrioritizedReplayBuffer]:
            memory = class_(record_space=self.record_space_no_next_state, capacity=capacity,
                            next_record_setup=dict(s="s_", n_step=3))

            # Insert n records (inserts must always be batch-size).
            data = dict(
                s=dict(s1=np.array([0.0, 1.0]), s2=np.array([2.0, 3.0])),
                a=np.array([0, 1]), r=np.array([-0.0, -1.0]), t=np.array([False, True]),
                s_=dict(s1=np.array([0.3, 1.3]), s2=np.array([2.3, 3.3]))  # s' is now the n-step s'
            )
            memory.add_records(data)

            # Check, whether inserting the wrong batch size raises Exception.
            try:
                data = self.record_space_no_next_state.sample(batch_size + 1)
                data["s_"] = self.record_space_no_next_state["s"].sample(batch_size)
                memory.add_records(data)
                assert False, "ERROR: Should not get here. Error is expected."
            except SurrealError:
                pass

            # Assert we cannot pull samples yet. n-step is 3, so we need at least 3 elements in memory.
            try:
                memory.get_records(num_records=1)
                assert False, "ERROR: Should not get here. Error is expected."
            except SurrealError:
                pass

            # Insert another 2xn records and then check for correct next-state returns when getting records.
            data = dict(
                s=dict(s1=np.array([0.1, 1.1]), s2=np.array([2.1, 3.1])),
                a=np.array([2, 3]), r=np.array([-2.0, -3.0]), t=np.array([False, False]),
                s_=dict(s1=np.array([0.4, 1.4]), s2=np.array([2.4, 3.4]))  # s' is now the n-step s'
            )
            memory.add_records(data)
            data = dict(
                s=dict(s1=np.array([0.2, 1.2]), s2=np.array([2.2, 3.2])),
                a=np.array([4, 5]), r=np.array([-4.0, -5.0]), t=np.array([True, True]),
                s_=dict(s1=np.array([0.5, 1.5]), s2=np.array([2.5, 3.5]))  # s' is now the n-step s'
            )
            memory.add_records(data)

            for _ in range(20):
                retrieved_data = memory.get_records(num_records=2)
                self.assertEqual(2, len(retrieved_data["t"]))

                # Check the next states (always 0.1 larger than state).
                for i in range(2):
                    check(retrieved_data["s"]["s1"][i], retrieved_data["s_"]["s1"][i] - 0.3)
                    check(retrieved_data["s"]["s2"][i], retrieved_data["s_"]["s2"][i] - 0.3)

            self.assertTrue(memory.size == 6)

            # Insert up to capacity and check again.
            data = dict(
                s=dict(s1=np.array([0.3, 1.3]), s2=np.array([2.3, 3.3])),
                a=np.array([6, 7]), r=np.array([-6.0, -7.0]), t=np.array([True, False]),
                s_=dict(s1=np.array([0.6, 1.6]), s2=np.array([2.6, 3.6]))
            )
            memory.add_records(data)
            data = dict(
                s=dict(s1=np.array([0.4, 1.4]), s2=np.array([2.4, 3.4])),
                a=np.array([8, 9]), r=np.array([-8.0, -9.0]), t=np.array([False, False]),
                s_=dict(s1=np.array([0.7, 1.7]), s2=np.array([2.7, 3.7]))
            )
            memory.add_records(data)

            for _ in range(20):
                retrieved_data = memory.get_records(num_records=3)
                self.assertEqual(3, len(retrieved_data["t"]))

                # Check the next states (always 0.1 larger than state).
                for i in range(3):
                    check(retrieved_data["s"]["s1"][i], retrieved_data["s_"]["s1"][i] - 0.3)
                    check(retrieved_data["s"]["s2"][i], retrieved_data["s_"]["s2"][i] - 0.3)

            self.assertTrue(memory.size == 10)

            # Go a little bit (two batches) over capacity and check again.
            data = dict(
                s=dict(s1=np.array([0.5, 1.5]), s2=np.array([2.5, 3.5])),
                a=np.array([10, 11]), r=np.array([-10.0, -11.0]), t=np.array([True, True]),
                s_=dict(s1=np.array([0.8, 1.8]), s2=np.array([2.8, 3.8]))
            )
            memory.add_records(data)
            data = dict(
                s=dict(s1=np.array([0.6, 1.6]), s2=np.array([2.6, 3.6])),
                a=np.array([10, 11]), r=np.array([-10.0, -11.0]), t=np.array([False, False]),
                s_=dict(s1=np.array([0.9, 1.9]), s2=np.array([2.9, 3.9]))
            )
            memory.add_records(data)

            for _ in range(20):
                retrieved_data = memory.get_records(num_records=4)
                self.assertEqual(4, len(retrieved_data["t"]))

                # Check the next states (always 0.1 larger than state).
                for i in range(4):
                    check(retrieved_data["s"]["s1"][i], retrieved_data["s_"]["s1"][i] - 0.3)
                    check(retrieved_data["s"]["s2"][i], retrieved_data["s_"]["s2"][i] - 0.3)

            self.assertTrue(memory.size == 10)

Example #7

Show file

class TestPrioritizedReplayBuffer(unittest.TestCase):
    """
    Tests insertion and (weighted) sampling of the PrioritizedReplayBuffer Component.
    """
    record_space = Dict(
        states=dict(state1=float, state2=Float(shape=(2,))),
        actions=dict(action1=int),
        reward=float,
        terminals=bool,
        main_axes="B"
    )

    capacity = 10
    alpha = 1.0
    beta = 1.0
    max_priority = 1.0

    def test_insert(self):
        memory = PrioritizedReplayBuffer(
            record_space=self.record_space,
            capacity=self.capacity,
            alpha=self.alpha,
            beta=self.beta
        )

        # Assert indices 0 before insert.
        self.assertEqual(memory.size, 0)
        self.assertEqual(memory.index, 0)

        # Insert single record (no batch rank).
        data = self.record_space.sample()
        memory.add_records(data)
        self.assertTrue(memory.size == 1)
        self.assertTrue(memory.index == 1)

        # Insert single record (w/ batch rank).
        data = self.record_space.sample(1)
        memory.add_records(data)
        self.assertTrue(memory.size == 2)
        self.assertTrue(memory.index == 2)

        # Insert batched records.
        data = self.record_space.sample(5)
        memory.add_records(data)
        self.assertTrue(memory.size == 7)
        self.assertTrue(memory.index == 7)

        # Insert over capacity.
        data = self.record_space.sample(100)
        memory.add_records(data)
        self.assertTrue(memory.size == 10)
        self.assertTrue(memory.index == 7)

    def test_update_records(self):
        memory = PrioritizedReplayBuffer(record_space=self.record_space, capacity=self.capacity)

        # Insert record samples.
        num_records = 2
        data = self.record_space.sample(num_records)
        memory.add_records(data)
        self.assertTrue(memory.size == num_records)
        self.assertTrue(memory.index == num_records)

        # Fetch records, their indices and weights.
        batch, indices, weights = memory.get_records_with_indices_and_weights(num_records)
        check(weights, np.ones(shape=(num_records,)))
        self.assertEqual(num_records, len(indices))
        self.assertTrue(memory.size == num_records)
        self.assertTrue(memory.index == num_records)

        # Update weight of index 0 to very small.
        memory.update_records(np.array([0]), np.array([0.01]))
        # Expect to sample almost only index 1 (which still has a weight of 1.0).
        for _ in range(100):
            _, indices, weights = memory.get_records_with_indices_and_weights(num_records=1000)
            self.assertGreaterEqual(np.sum(indices), 970)

        # Update weight of index 1 to very small as well.
        # Expect to sample equally.
        for _ in range(100):
            rand = np.random.random()
            memory.update_records(np.array([0, 1]), np.array([rand, rand]))
            _, indices, _ = memory.get_records_with_indices_and_weights(num_records=1000)
            self.assertGreaterEqual(np.sum(indices), 400)
            self.assertLessEqual(np.sum(indices), 600)

        # Update weights to be 1:2.
        # Expect to sample double as often index 1 over index 0 (1.0 = 2* 0.5).
        for _ in range(100):
            rand = np.random.random() * 10
            memory.update_records(np.array([0, 1]), np.array([rand, rand * 2]))
            _, indices, _ = memory.get_records_with_indices_and_weights(num_records=1000)
            self.assertGreaterEqual(np.sum(indices), 600)
            self.assertLessEqual(np.sum(indices), 750)

        # Update weights to be 1:4.
        # Expect to sample quadruple as often index 1 over index 0.
        for _ in range(100):
            rand = np.random.random() * 10
            memory.update_records(np.array([0, 1]), np.array([rand, rand * 4]))
            _, indices, _ = memory.get_records_with_indices_and_weights(num_records=1000)
            self.assertGreaterEqual(np.sum(indices), 750)
            self.assertLessEqual(np.sum(indices), 850)

        # Update weights to be 1:9.
        # Expect to sample 9 times as often index 1 over index 0.
        for _ in range(100):
            rand = np.random.random() * 10
            memory.update_records(np.array([0, 1]), np.array([rand, rand * 9]))
            _, indices, _ = memory.get_records_with_indices_and_weights(num_records=1000)
            self.assertGreaterEqual(np.sum(indices), 850)
            self.assertLessEqual(np.sum(indices), 950)

        # Insert more record samples.
        num_records = 10
        data = self.record_space.sample(num_records)
        memory.add_records(data)
        self.assertTrue(memory.size == self.capacity)
        self.assertTrue(memory.index == 2)

        # Update weights to be 1.0 to 10.0 and sample a < 10 batch.
        memory.update_records(np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
                              np.array([0.1, 1., 3., 8., 16., 32., 64., 128., 256., 512.]))
        counts = Counter()
        for _ in range(1000):
            _, indices, _ = memory.get_records_with_indices_and_weights(num_records=np.random.randint(1, 6))
            for i in indices:
                counts[i] += 1
        print(counts)
        self.assertTrue(
            counts[9] >= counts[8] >= counts[7] >= counts[6] >= counts[5] >=
            counts[4] >= counts[3] >= counts[2] >= counts[1] >= counts[0]
        )

    def test_segment_tree_insert_values(self):
        """
        Tests if segment tree inserts into correct positions.
        """
        memory = PrioritizedReplayBuffer(
            record_space=self.record_space,
            capacity=self.capacity,
            alpha=self.alpha,
            beta=self.beta
        )

        priority_capacity = 1
        while priority_capacity < self.capacity:
            priority_capacity *= 2

        sum_segment_values = memory.merged_segment_tree.sum_segment_tree.values
        min_segment_values = memory.merged_segment_tree.min_segment_tree.values

        self.assertEqual(sum(sum_segment_values), 0)
        self.assertEqual(sum(min_segment_values), float("inf"))
        self.assertEqual(len(sum_segment_values), 2 * priority_capacity)
        self.assertEqual(len(min_segment_values), 2 * priority_capacity)

        # Insert 1 Element.
        observation = self.record_space.sample(size=1)
        memory.add_records(observation)

        # Check insert positions
        # Initial insert is at priority capacity
        print(sum_segment_values)
        print(min_segment_values)
        start = priority_capacity

        while start >= 1:
            self.assertEqual(sum_segment_values[start], 1.0)
            self.assertEqual(min_segment_values[start], 1.0)
            start = int(start / 2)

        # Insert another Element.
        observation = self.record_space.sample(size=1)
        memory.add_records(observation)

        # Index shifted 1
        start = priority_capacity + 1
        self.assertEqual(sum_segment_values[start], 1.0)
        self.assertEqual(min_segment_values[start], 1.0)
        start = int(start / 2)
        while start >= 1:
            # 1 + 1 is 2 on the segment.
            self.assertEqual(sum_segment_values[start], 2.0)
            # min is still 1.
            self.assertEqual(min_segment_values[start], 1.0)
            start = int(start / 2)

    def test_tree_insert(self):
        """
        Tests inserting into the segment tree and querying segments.
        """
        memory = PrioritizedReplayBuffer(record_space=self.record_space, capacity=4        )
        tree = memory.merged_segment_tree.sum_segment_tree
        tree.insert(2, 1.0)
        tree.insert(3, 3.0)
        self.assertTrue(np.isclose(tree.get_sum(), 4.0))
        self.assertTrue(np.isclose(tree.get_sum(0, 2), 0.0))
        self.assertTrue(np.isclose(tree.get_sum(0, 3), 1.0))
        self.assertTrue(np.isclose(tree.get_sum(2, 3), 1.0))
        self.assertTrue(np.isclose(tree.get_sum(2, -1), 1.0))
        self.assertTrue(np.isclose(tree.get_sum(2, 4), 4.0))

    def test_prefixsum_idx(self):
        """
        Tests fetching the index corresponding to a prefix sum.
        """
        memory = PrioritizedReplayBuffer(record_space=self.record_space, capacity=4)
        tree = memory.merged_segment_tree.sum_segment_tree
        tree.insert(2, 1.0)
        tree.insert(3, 3.0)

        self.assertEqual(tree.index_of_prefixsum(0.0), 2)
        self.assertEqual(tree.index_of_prefixsum(0.5), 2)
        self.assertEqual(tree.index_of_prefixsum(0.99), 2)
        self.assertEqual(tree.index_of_prefixsum(1.01), 3)
        self.assertEqual(tree.index_of_prefixsum(3.0), 3)
        self.assertEqual(tree.index_of_prefixsum(4.0), 3)

        memory = PrioritizedReplayBuffer(record_space=self.record_space, capacity=4)
        tree = memory.merged_segment_tree.sum_segment_tree
        tree.insert(0, 0.5)
        tree.insert(1, 1.0)
        tree.insert(2, 1.0)
        tree.insert(3, 3.0)
        self.assertEqual(tree.index_of_prefixsum(0.0), 0)
        self.assertEqual(tree.index_of_prefixsum(0.55), 1)
        self.assertEqual(tree.index_of_prefixsum(0.99), 1)
        self.assertEqual(tree.index_of_prefixsum(1.51), 2)
        self.assertEqual(tree.index_of_prefixsum(3.0), 3)
        self.assertEqual(tree.index_of_prefixsum(5.50), 3)

Example #8

Show file

    def __init__(self,
                 world="2x2",
                 actors=1,
                 num_cores=1,
                 save_mode=False,
                 action_type="udlr",
                 reward_function="sparse",
                 state_representation="discrete"):
        """
        Args:
            world (Union[str,List[str]]): Either a string to map into `MAPS` or a list of strings describing the rows
                of the world (e.g. ["S ", " G"] for a two-row/two-column world with start and goal state).

            save_mode (bool): Whether to replace holes (H) with walls (W). Default: False.

            action_type (str): Which action space to use. Chose between "udlr" (up, down, left, right), which is a
                discrete action space and "ftj" (forward + turn + jump), which is a container multi-discrete
                action space. "ftjb" is the same as "ftj", except that sub-action "jump" is a boolean.

            reward_function (str): One of
                sparse: hole=-5, fire=-3, goal=1, all other steps=-0.1
                rich: hole=-100, fire=-10, goal=50, all other steps=-0.1

            state_representation (str):
                - "discrete": An int representing the field on the grid, 0 meaning the upper left field, 1 the one
                    below, etc..
                - "xy": The x and y grid position tuple.
                - "xy+orientation": The x and y grid position tuple plus the orientation (if any) as tuple of 2 values
                    of the actor.
                - "camera": A 3-channel image where each field in the grid-world is one pixel and the 3 channels are
                    used to indicate different items in the scene (walls, holes, the actor, etc..).
        """
        # Build our map.
        if isinstance(world, str):
            self.description = world
            world = self.MAPS[world]
        else:
            self.description = "custom-map"

        world = np.array(list(map(list, world)))
        # Apply safety switch.
        world[world == 'H'] = ("H" if not save_mode else "F")

        # `world` is a list of lists that needs to be indexed using y/x pairs (first row, then column).
        n_row, n_col = world.shape

        # Figure out our state space.
        assert state_representation in [
            "discrete", "xy", "xy+orientation", "camera"
        ]
        # Discrete states (single int from 0 to n).
        if state_representation == "discrete":
            state_space = Int(n_row * n_col)
        # x/y position (2 ints).
        elif state_representation == "xy":
            state_space = Int(low=(0, 0), high=(n_col, n_row), shape=(2, ))
        # x/y position + orientation (3 ints).
        elif state_representation == "xy+orientation":
            state_space = Int(low=(0, 0, 0, 0), high=(n_col, n_row, 1, 1))
        # Camera outputting a 2D color image of the world.
        else:
            state_space = Int(0, 255, shape=(n_row, n_col, 3))

        # Specify the actual action space.
        action_space = Int(4) if action_type == "udlr" else Dict(
            dict(forward=Int(3),
                 turn=Int(3),
                 jump=(Int(2) if action_type == "ftj" else Bool())))
        # Call super.
        super().__init__(
            actors=actors,
            num_cores=num_cores,
            state_space=state_space,
            action_space=action_space,
            process_class=GridWorldEnvProcess,
            # kwargs passed on to process ctor.
            world=world,
            n_col=n_col,
            n_row=n_row,
            state_representation=state_representation,
            action_type=action_type,
            reward_function=reward_function,
        )
        # Buffers for returns from processes.
        self.state = np.array([state_space.zeros()] * actors)
        # Reset ourselves.
        self.reset_all()

Example #9

Show file

    def test_joint_cumulative_distribution(self):
        param_space = Dict(
            {
                "a":
                Float(shape=(4, )),  # 4-discrete
                "b":
                Dict({
                    "ba":
                    Tuple([Float(shape=(3, )),
                           Float(0.1, 1.0, shape=(3, ))]),  # 3-variate normal
                    "bb":
                    Tuple([Float(shape=(2, )),
                           Float(shape=(2, ))]),  # beta -1 to 1
                    "bc":
                    Tuple([Float(shape=(4, )),
                           Float(0.1, 1.0, shape=(4, ))]),  # normal (dim=4)
                })
            },
            main_axes="B")

        values_space = Dict(
            {
                "a":
                Int(4),
                "b":
                Dict({
                    "ba": Float(shape=(3, )),
                    "bb": Float(shape=(2, )),
                    "bc": Float(shape=(4, ))
                })
            },
            main_axes="B")

        low, high = -1.0, 1.0
        cumulative_distribution = JointCumulativeDistribution(
            distributions={
                "a": Categorical(),
                "b": {
                    "ba": MultivariateNormal(),
                    "bb": Beta(low=low, high=high),
                    "bc": Normal()
                }
            })

        # Batch of size=2 and deterministic (True).
        input_ = param_space.sample(2)
        input_["a"] = softmax(input_["a"])
        expected_mean = {
            "a": np.argmax(input_["a"], axis=-1),
            "b": {
                "ba":
                input_["b"]["ba"][0],  # [0]=Mean
                # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] * range + low
                "bb":
                (1.0 / (1.0 + input_["b"]["bb"][1] / input_["b"]["bb"][0])) *
                (high - low) + low,
                "bc":
                input_["b"]["bc"][0],
            }
        }
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(20):
            out = cumulative_distribution.sample(input_, deterministic=True)
            check(out, expected_mean)
            out = cumulative_distribution.sample_deterministic(input_)
            check(out, expected_mean)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = param_space.sample(1)
        input_["a"] = softmax(input_["a"])
        expected_mean = {
            "a": np.sum(input_["a"] * np.array([0, 1, 2, 3])),
            "b": {
                "ba":
                input_["b"]["ba"][0],  # [0]=Mean
                # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] * range + low
                "bb":
                (1.0 / (1.0 + input_["b"]["bb"][1] / input_["b"]["bb"][0])) *
                (high - low) + low,
                "bc":
                input_["b"]["bc"][0],
            }
        }

        outs = []
        for _ in range(500):
            out = cumulative_distribution.sample(input_)
            outs.append(out)
            out = cumulative_distribution.sample_stochastic(input_)
            outs.append(out)

        check(np.mean(np.stack([o["a"][0] for o in outs], axis=0), axis=0),
              expected_mean["a"],
              atol=0.3)
        check(np.mean(np.stack([o["b"]["ba"][0] for o in outs], axis=0),
                      axis=0),
              expected_mean["b"]["ba"][0],
              decimals=1)
        check(np.mean(np.stack([o["b"]["bb"][0] for o in outs], axis=0),
                      axis=0),
              expected_mean["b"]["bb"][0],
              decimals=1)
        check(np.mean(np.stack([o["b"]["bc"][0] for o in outs], axis=0),
                      axis=0),
              expected_mean["b"]["bc"][0],
              decimals=1)

        # Test log-likelihood outputs.
        params = param_space.sample(1)
        params["a"] = softmax(params["a"])
        # Make sure beta-values are within 0.0 and 1.0 for the numpy calculation (which doesn't have scaling).
        values = values_space.sample(1)
        log_prob_beta = np.log(
            beta.pdf(values["b"]["bb"], params["b"]["bb"][0],
                     params["b"]["bb"][1]))
        # Now do the scaling for b/bb (beta values).
        values["b"]["bb"] = values["b"]["bb"] * (high - low) + low
        expected_log_llh = np.log(params["a"][0][values["a"][0]]) + \
            np.sum(np.log(norm.pdf(values["b"]["ba"][0], params["b"]["ba"][0], params["b"]["ba"][1]))) + \
            np.sum(log_prob_beta) + \
            np.sum(np.log(norm.pdf(values["b"]["bc"][0], params["b"]["bc"][0], params["b"]["bc"][1])))

        out = cumulative_distribution.log_prob(params, values)
        check(out, expected_log_llh, decimals=0)

Example #10

Show file

    def test_mixture(self):
        # Create a mixture distribution consisting of 3 bivariate normals weighted by an internal
        # categorical distribution.
        num_distributions = 3
        num_events_per_multivariate = 2  # 2=bivariate
        param_space = Dict(
            {
                "categorical":
                Float(shape=(num_distributions, ), low=-1.5, high=2.3),
                "parameters0":
                Tuple(
                    Float(shape=(num_events_per_multivariate, )),  # mean
                    Float(shape=(num_events_per_multivariate, ),
                          low=0.5,
                          high=1.0),  # diag
                ),
                "parameters1":
                Tuple(
                    Float(shape=(num_events_per_multivariate, )),  # mean
                    Float(shape=(num_events_per_multivariate, ),
                          low=0.5,
                          high=1.0),  # diag
                ),
                "parameters2":
                Tuple(
                    Float(shape=(num_events_per_multivariate, )),  # mean
                    Float(shape=(num_events_per_multivariate, ),
                          low=0.5,
                          high=1.0),  # diag
                ),
            },
            main_axes="B")
        values_space = Float(shape=(num_events_per_multivariate, ),
                             main_axes="B")
        # The Component to test.
        mixture = MixtureDistribution(
            # Try different spec types.
            MultivariateNormal(),
            "multi-variate-normal",
            "multivariate_normal")

        # Batch of size=n and deterministic (True).
        input_ = param_space.sample(1)
        # Make probs for categorical.
        categorical_probs = softmax(input_["categorical"])

        # Note: Usually, the deterministic draw should return the max-likelihood value
        # Max-likelihood for a 3-Mixed Bivariate: mean-of-argmax(categorical)()
        # argmax = np.argmax(input_[0]["categorical"], axis=-1)
        #expected = np.array([input_[0]["parameters{}".format(idx)][0][i] for i, idx in enumerate(argmax)])
        #    input_[0]["categorical"][:, 1:2] * input_[0]["parameters1"][0] + \
        #    input_[0]["categorical"][:, 2:3] * input_[0]["parameters2"][0]

        # The mean value is a 2D vector (bivariate distribution).
        expected = categorical_probs[:, 0:1] * input_["parameters0"][0] + \
            categorical_probs[:, 1:2] * input_["parameters1"][0] + \
            categorical_probs[:, 2:3] * input_["parameters2"][0]

        for _ in range(20):
            out = mixture.sample(input_, deterministic=True)
            check(out, expected)
            out = mixture.sample_deterministic(input_)
            check(out, expected)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = param_space.sample(1)
        # Make probs for categorical.
        categorical_probs = softmax(input_["categorical"])
        expected = categorical_probs[:, 0:1] * input_["parameters0"][0] + \
            categorical_probs[:, 1:2] * input_["parameters1"][0] + \
            categorical_probs[:, 2:3] * input_["parameters2"][0]
        outs = []
        for _ in range(500):
            out = mixture.sample(input_, deterministic=False)
            outs.append(out)
            out = mixture.sample_stochastic(input_)
            outs.append(out)
        check(np.mean(np.array(outs), axis=0), expected, decimals=1)

        return
        # TODO: prob/log-prob tests for Mixture.

        # Test log-likelihood outputs (against scipy).
        for i in range(20):
            params = param_space.sample(1)
            # Make sure categorical params are softmaxed.
            category_probs = softmax(params["categorical"][0])
            values = values_space.sample(1)
            expected = 0.0
            v = []
            for j in range(3):
                v.append(
                    multivariate_normal.pdf(
                        values[0],
                        mean=params["parameters{}".format(j)][0][0],
                        cov=params["parameters{}".format(j)][1][0]))
                expected += category_probs[j] * v[-1]
            out = mixture.prob(params, values)
            check(out[0], expected, atol=0.1)

            expected = np.zeros(shape=(3, ))
            for j in range(3):
                expected[j] = np.log(category_probs[j]) + np.log(
                    multivariate_normal.pdf(
                        values[0],
                        mean=params["parameters{}".format(j)][0][0],
                        cov=params["parameters{}".format(j)][1][0]))
            expected = np.log(np.sum(np.exp(expected)))
            out = mixture.log_prob(params, values)
            print("{}: out={} expected={}".format(i, out, expected))
            check(out, np.array([expected]), atol=0.25)

Example #11

Show file

File: dads.py Project: rosea-tf/surreal

    def __init__(
            self,
            *,
            policy_network,
            q_network,
            state_space,
            action_space,
            sac_config,
            num_q_experts=4,  # 4 used in paper.
            q_predicts_states_diff=False,
            num_denominator_samples_for_ri=250,  # 50-500 used in paper
            dim_skill_vectors=10,
            discrete_skills=False,
            episode_horizon=200,
            skill_horizon=None,
            preprocessor=None,
            supervised_optimizer=None,
            num_steps_per_supervised_update=1,
            episode_buffer_capacity=200,
            summaries=None):
        """
        Args:
            policy_network (Network): The policy-network (pi) to use as a function approximator for the learnt policy.

            q_network (Network): The dynamics-network (q) to use as a function approximator for the learnt env
                dynamics. NOTE: Not to be confused with a Q-learning Q-net! In the paper, the dynamics function is
                called `q`, hence the same nomenclature here.

            state_space (Space): The state/observation Space.
            action_space (Space): The action Space.
            sac_config (SACConfig): The config for the internal SAC-Algo used to learn the skills using intrinsic rewards.

            num_q_experts (int): The number of experts used in the Mixture distribution output bz the q-network to
                predict the next state (s') given s (state) and z (skill vector).

            q_predicts_states_diff (bool): Whether the q-network predicts the different between s and s' rather than
                s' directly. Default: False.

            num_denominator_samples_for_ri (int): The number of samples to calculate for the denominator of the
                intrinsic reward function (`L` in the paper).

            dim_skill_vectors (int): The number of dimensions of the learnt skill vectors.
            discrete_skills (bool): Whether skill vectors are discrete (one-hot).
            episode_horizon (int): The episode horizon (He) to move within, when gathering episode samples.

            skill_horizon (Optional[int]): The horizon for which to use one skill vector (before sampling a new one).
                Default: Use value of `episode_horizon`.

            preprocessor (Preprocessor): The preprocessor (if any) to use.
            supervised_optimizer (Optimizer): The optimizer to use for the supervised (q) model learning task.

            num_steps_per_supervised_update (int): The number of gradient descent iterations per update
                (each iteration uses the same environment samples).

            episode_buffer_capacity (int): The capacity of the episode (experience) FIFOBuffer.

            summaries (List[any]): A list of summaries to produce if `UseTfSummaries` in debug.json is true.
                In the simplest case, this is a list of `self.[...]`-property names of the SAC object that should
                be tracked after each tick.
        """
        # Clean up network configs to be passable as **kwargs to `make`.
        # Networks are given as sequential config or directly as Keras objects -> prepend "network" key to spec.
        if isinstance(
                policy_network,
            (list, tuple, tf.keras.models.Model, tf.keras.layers.Layer)):
            policy_network = dict(network=policy_network)
        if isinstance(
                q_network,
            (list, tuple, tf.keras.models.Model, tf.keras.layers.Layer)):
            q_network = dict(network=q_network)

        # Make state/action space.
        state_space = Space.make(state_space)
        action_space = Space.make(action_space)

        # Fix SAC config, add correct state- and action-spaces.
        sac_config = SACConfig.make(
            sac_config,
            state_space=Dict(s=state_space,
                             z=Float(-1.0, 1.0, shape=(dim_skill_vectors, ))),
            action_space=action_space,
            # Use no memory. Updates are done from DADS' own buffer.
            memory_capacity=1,
            memory_batch_size=1,
            # Share policy network between DADS and underlying learning SAC.
            policy_network=policy_network)

        if skill_horizon is None:
            skill_horizon = episode_horizon

        super().__init__(
            locals())  # Config will store all c'tor variables automatically.

        # Keep track of which time-step stuff happened. Only important for by-time-step frequencies.
        self.last_update = 0