Example #1
0
    def test_make_last_sequence_terminal_state_first_in_made_sequence_wrap(
            self):
        batch_size = 10
        state_shape = 2
        sequence_length = 4
        capacity = 30
        rm = replay_memory.SequenceReplayMemory(state_shape, sequence_length,
                                                batch_size, capacity)

        # tuple 1
        state = np.ones(state_shape)
        action = 0
        reward = 0
        next_state = np.ones(state_shape)
        terminal = False
        for i in range(capacity - 1):
            rm.store(state, action, reward, terminal)

        terminal = True
        rm.store(state, action, reward, terminal)

        # tuple 2
        terminal = False
        rm.store(state, action, reward, terminal)

        # tuple 3
        terminal = False
        rm.store(state, action, reward, terminal)

        actual = rm.make_last_sequence(np.arange(state_shape)).tolist()
        expected = [[0, 0], [1, 1], [1, 1], [0, 1]]
        self.assertEquals(actual, expected)
Example #2
0
    def test_minibatch_sample_shapes_multidimensional_state_broadcast_check(
            self):
        batch_size = 100
        state_shape = (1, 2, 1)
        sequence_length = 2
        capacity = 1000
        rm = replay_memory.SequenceReplayMemory(state_shape, sequence_length,
                                                batch_size, capacity)
        for idx in range(1000):
            state = np.ones(state_shape)
            action = 0
            reward = 0
            next_state = np.ones(state_shape)
            terminal = False
            rm.store(state, action, reward, terminal)

        states, actions, rewards, next_states, terminals = rm.sample_batch()
        expected_states_shape = (batch_size, ) + (
            sequence_length, ) + state_shape

        self.assertEquals(states.shape, expected_states_shape)
        self.assertEquals(actions.shape, (batch_size, 1))
        self.assertEquals(rewards.shape, (batch_size, 1))
        self.assertEquals(next_states.shape, expected_states_shape)
        self.assertEquals(terminals.shape, (batch_size, 1))
Example #3
0
    def test_minibatch_sample_shapes_1D_state_sequence_length_2(self):
        batch_size = 10
        state_shape = 2
        sequence_length = 2
        capacity = 1000
        rm = replay_memory.SequenceReplayMemory(state_shape, sequence_length,
                                                batch_size, capacity)
        for idx in range(1000):
            state = np.ones(state_shape)
            action = 0
            reward = 0
            next_state = np.ones(state_shape)
            terminal = False
            rm.store(state, action, reward, terminal)

        states, actions, rewards, next_states, terminals = rm.sample_batch()
        self.assertEquals(states.shape,
                          (batch_size, sequence_length, state_shape))
        self.assertEquals(states.sum(),
                          batch_size * sequence_length * state_shape)
        self.assertEquals(actions.shape, (batch_size, 1))
        self.assertEquals(rewards.shape, (batch_size, 1))
        self.assertEquals(next_states.shape,
                          (batch_size, sequence_length, state_shape))
        self.assertEquals(next_states.sum(),
                          batch_size * sequence_length * state_shape)
        self.assertEquals(terminals.shape, (batch_size, 1))
 def test_sequence_value_string(self):
     room_size = 3
     num_rooms = 3
     mdp = mdps.MazeMDP(room_size, num_rooms)
     mdp.compute_states()
     mdp.EXIT_REWARD = 1
     mdp.MOVE_REWARD = -0.1
     discount = 1
     sequence_length = 2
     batch_size = 10
     learning_rate = 1e-3
     freeze_interval = 10000
     num_hidden = 4
     eps = .5
     reg = 1e-8
     num_actions = len(mdp.get_actions(None))
     batch_size = 100
     network = recurrent_qnetwork.RecurrentQNetwork(
         input_shape=2 * room_size,
         sequence_length=sequence_length,
         batch_size=batch_size,
         num_actions=4,
         num_hidden=num_hidden,
         discount=discount,
         learning_rate=learning_rate,
         regularization=reg,
         update_rule='adam',
         freeze_interval=freeze_interval,
         network_type='single_layer_lstm',
         rng=None)
     num_epochs = 5
     epoch_length = 10
     test_epoch_length = 0
     max_steps = (room_size * num_rooms)**2
     epsilon_decay = (num_epochs * epoch_length * max_steps) / 2
     adapter = state_adapters.CoordinatesToSingleRoomRowColAdapter(
         room_size=room_size)
     p = policy.EpsilonGreedy(num_actions, eps, 0.05, epsilon_decay)
     rm = replay_memory.SequenceReplayMemory(
         input_shape=2 * room_size,
         sequence_length=sequence_length,
         batch_size=batch_size,
         capacity=50000)
     log = logger.NeuralLogger(agent_name='RecurrentQNetwork')
     a = agent.RecurrentNeuralAgent(network=network,
                                    policy=p,
                                    replay_memory=rm,
                                    log=log,
                                    state_adapter=adapter)
     run_tests = False
     e = experiment.Experiment(mdp,
                               a,
                               num_epochs,
                               epoch_length,
                               test_epoch_length,
                               max_steps,
                               run_tests,
                               value_logging=True)
     e.log_temporal_value_string()
Example #5
0
    def test_make_last_sequence_empty(self):
        batch_size = 10
        state_shape = 2
        sequence_length = 4
        capacity = 30
        rm = replay_memory.SequenceReplayMemory(state_shape, sequence_length,
                                                batch_size, capacity)

        actual = rm.make_last_sequence(np.arange(state_shape)).tolist()
        expected = [[0, 0], [0, 0], [0, 0], [0, 1]]
        self.assertEquals(actual, expected)
Example #6
0
    def test_make_last_sequence_preceding_state_terminal(self):
        batch_size = 10
        state_shape = 2
        sequence_length = 3
        capacity = 30
        rm = replay_memory.SequenceReplayMemory(state_shape, sequence_length,
                                                batch_size, capacity)

        state = np.ones(state_shape)
        action = 0
        reward = 0
        next_state = np.ones(state_shape)
        terminal = False
        rm.store(state, action, reward, terminal)
        terminal = True
        rm.store(state, action, reward, terminal)
        actual = rm.make_last_sequence(np.arange(state_shape)).tolist()
        expected = [[0, 0], [0, 0], [0, 1]]
        self.assertEquals(actual, expected)
Example #7
0
    def test_make_last_sequence_basic_operation(self):
        batch_size = 10
        state_shape = 2
        sequence_length = 3
        capacity = 30
        rm = replay_memory.SequenceReplayMemory(state_shape, sequence_length,
                                                batch_size, capacity)

        for idx in range(4):
            state = np.ones(state_shape)
            action = 0
            reward = 0
            next_state = np.ones(state_shape)
            terminal = False
            rm.store(state, action, reward, terminal)

        actual = rm.make_last_sequence(np.arange(state_shape)).tolist()
        expected = [[1, 1], [1, 1], [0, 1]]
        self.assertEquals(actual, expected)
Example #8
0
    def test_minibatch_sample_shapes_1D_state_terminal(self):
        batch_size = 200
        state_shape = 2
        sequence_length = 2
        capacity = 1000
        rm = replay_memory.SequenceReplayMemory(state_shape, sequence_length,
                                                batch_size, capacity)
        prev_state_terminal = False
        for idx in range(1, 1001):
            action = 0
            reward = 0
            state = np.ones(state_shape) * idx
            state = state if not prev_state_terminal else np.zeros(state_shape)
            prev_state_terminal = False if np.random.random() < .8 else True
            rm.store(state, action, reward, prev_state_terminal)

        states, actions, rewards, next_states, terminals = rm.sample_batch()
        for state, next_state, terminal in zip(states, next_states, terminals):
            if terminal:
                self.assertEquals(next_state.tolist()[-1],
                                  np.zeros(state_shape).tolist())
Example #9
0
    def test_make_last_sequence_insufficient_samples_for_full_sequence(self):
        batch_size = 10
        state_shape = 2
        sequence_length = 4
        capacity = 30
        rm = replay_memory.SequenceReplayMemory(state_shape, sequence_length,
                                                batch_size, capacity)

        # tuple 1
        state = np.ones(state_shape)
        action = 0
        reward = 0
        next_state = np.ones(state_shape)
        terminal = False
        rm.store(state, action, reward, terminal)

        # tuple 2
        terminal = False
        rm.store(state, action, reward, terminal)

        actual = rm.make_last_sequence(np.arange(state_shape)).tolist()
        expected = [[0, 0], [1, 1], [1, 1], [0, 1]]
        self.assertEquals(actual, expected)
Example #10
0
        def run(learning_rate, freeze_interval, num_hidden, reg, seq_len, eps,
                nt, update):
            room_size = 5
            num_rooms = 2
            input_shape = 2 * room_size
            print 'building mdp...'
            mdp = mdps.MazeMDP(room_size, num_rooms)
            mdp.compute_states()
            mdp.EXIT_REWARD = 1
            mdp.MOVE_REWARD = -0.01
            network_type = nt
            discount = 1
            sequence_length = seq_len
            num_actions = len(mdp.get_actions(None))
            batch_size = 100
            update_rule = update
            print 'building network...'
            network = recurrent_qnetwork.RecurrentQNetwork(
                input_shape=input_shape,
                sequence_length=sequence_length,
                batch_size=batch_size,
                num_actions=4,
                num_hidden=num_hidden,
                discount=discount,
                learning_rate=learning_rate,
                regularization=reg,
                update_rule=update_rule,
                freeze_interval=freeze_interval,
                network_type=network_type,
                rng=None)

            # take this many steps because (very loosely):
            # let l be the step length
            # let d be the difference in start and end locations
            # let N be the number of steps for the agent to travel a distance d
            # then N ~ (d/l)^2  // assuming this is a random walk
            # with l = 1, this gives d^2 in order to make it N steps away
            # the desired distance here is to walk along both dimensions of the maze
            # which is equal to two times the num_rooms * room_size
            # so squaring that gives a loose approximation to the number of
            # steps needed (discounting that this is actually a lattice (does it really matter?))
            # (also discounting the walls)
            # see: http://mathworld.wolfram.com/RandomWalk2-Dimensional.html
            max_steps = (2 * room_size * num_rooms)**2
            num_epochs = 500
            epoch_length = 1
            test_epoch_length = 0
            epsilon_decay = (num_epochs * epoch_length * max_steps) / 4
            print 'building adapter...'
            adapter = state_adapters.CoordinatesToSingleRoomRowColAdapter(
                room_size=room_size)
            print 'building policy...'
            p = policy.EpsilonGreedy(num_actions, eps, 0.05, epsilon_decay)
            print 'building replay memory...'
            # want to track at minimum the last 50 episodes
            capacity = max_steps * 50
            rm = replay_memory.SequenceReplayMemory(
                input_shape=input_shape,
                sequence_length=sequence_length,
                batch_size=batch_size,
                capacity=capacity)
            print 'building logger...'
            log = logger.NeuralLogger(agent_name=network_type)
            print 'building agent...'
            a = agent.RecurrentNeuralAgent(network=network,
                                           policy=p,
                                           replay_memory=rm,
                                           log=log,
                                           state_adapter=adapter)
            run_tests = False
            print 'building experiment...'
            e = experiment.Experiment(mdp,
                                      a,
                                      num_epochs,
                                      epoch_length,
                                      test_epoch_length,
                                      max_steps,
                                      run_tests,
                                      value_logging=True)
            print 'running experiment...'
            e.run()

            ak = file_utils.load_key('../access_key.key')
            sk = file_utils.load_key('../secret_key.key')
            bucket = 'hierarchical9'
            try:
                aws_util = aws_s3_utility.S3Utility(ak, sk, bucket)
                aws_util.upload_directory(e.agent.logger.log_dir)
            except Exception as e:
                print 'error uploading to s3: {}'.format(e)