def test_available_actions(): state = np.array([[[1, 2, 0], [1, 2, 0], [1, 2, 0]], [[1, 2, 0], [1, 2, 0], [1, 2, 0]], [[1, 2, 0], [1, 2, 0], [1, 2, 0]]]) game = Game(state=state) actions = game.available_actions() # All actions except left is available assert actions == [1, 2, 3, 4, 5]
def test_available_actions_none_available(): state = np.array([[[1, 2, 3], [5, 6, 7], [1, 2, 3]], [[8, 9, 10], [11, 12, 13], [8, 9, 10]], [[14, 15, 16], [17, 18, 19], [14, 15, 16]]]) game = Game(state=state) actions = game.available_actions() # All actions except left is available assert actions == [] assert game.game_over()
def test_available_actions(): state = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 3, 0]]) game = Game(state=state) actions = game.available_actions() # All actions except left is available assert actions == [1, 2, 3]
def test_available_actions_none_available(): state = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, 3, 4], [5, 6, 7, 8]]) game = Game(state=state) actions = game.available_actions() # All actions except left is available assert actions == [] assert game.game_over()
def test_init(choice): choice.side_effect = [0, # First position 1, # First tile 1, # Second position 2] # Second tile game = Game() choice.assert_has_calls([call(16), call([1, 2], p=[0.9, 0.1]), call(15), call([1, 2], p=[0.9, 0.1])]) # Assert correct number of 0s, 1s and 2s game.print_state() assert (np.bincount(game.state().flatten()) == [14, 1, 1]).all() assert game.score() == 0
def highest_reward_strategy(state, actions): """Strategy that always chooses the action of highest immediate reward. If there are any ties, the strategy prefers left over up over right over down. """ sorted_actions = np.sort(actions)[::-1] rewards = map(lambda action: Game(np.copy(state)).do_action(action), sorted_actions) action_index = np.argsort(rewards, kind="mergesort")[-1] return sorted_actions[action_index]
def highest_reward_strategy(state, actions): """Strategy that always chooses the action of highest immediate reward. If there are any ties, the strategy prefers left over up over right over down. """ sorted_actions = np.sort(actions)[::-1] rewards = [ Game(np.copy(state)).do_action(action) for action in sorted_actions ] action_index = np.argsort(rewards)[-1] return sorted_actions[action_index]
def test_do_action(choice): choice.side_effect = [0, # First position 1] # First tile state = np.array([[1, 2, 3, 3], [5, 6, 7, 8], [5, 2, 7, 0], [1, 0, 3, 0]]) game = Game(state=state) game.do_action(3) # DOWN new_state = np.array([[1, 0, 0, 0], [1, 2, 3, 0], [6, 6, 8, 3], [1, 2, 3, 8]]) game.print_state() assert (game.state() == new_state).all() # Score is 2 ** 6 + 2 ** 8 assert game.score() == 320
def test_do_action(choice): choice.side_effect = [ 0, # First position 1 ] # First tile state = np.array([[[1, 2, 3], [5, 6, 7], [5, 2, 7]], [[1, 2, 3], [5, 6, 7], [5, 2, 7]], [[1, 2, 3], [5, 6, 7], [5, 2, 7]]]) game = Game(state=state) game.do_action(3) # DOWN new_state = np.array([[[0, 2, 0], [1, 6, 3], [6, 2, 8]], [[0, 2, 0], [1, 6, 3], [6, 2, 8]], [[0, 2, 0], [1, 6, 3], [6, 2, 8]]]) game.print_state() assert (game.state() == new_state).all() # Score is (2 ** 6 + 2 ** 8)*3 assert game.score() == 960
def play(strategy, verbose=False, allow_unavailable_action=True): """Plays a single game, using a provided strategy. Args: strategy: A function that takes as argument a state and a list of available actions and returns an action from the list. allow_unavailable_action: Boolean, whether strategy is passed all actions or just the available ones. verbose: If true, prints game states, actions and scores. Returns: score, experiences where score is the final score and experiences is the list Experience instances that represent the collected experience. """ game = Game() state = game.state().copy() game_over = game.game_over() experiences = [] while not game_over: if verbose: print("Score:", game.score()) game.print_state() old_state = state next_action = strategy( old_state, range(4) if allow_unavailable_action else game.available_actions()) if game.is_action_available(next_action): reward = game.do_action(next_action) state = game.state().copy() game_over = game.game_over() if verbose: print("Action:", ACTION_NAMES[next_action]) print("Reward:", reward) experiences.append(Experience(old_state, next_action, reward, state, game_over, False, game.available_actions())) else: experiences.append(Experience(state, next_action, 0, state, False, True, game.available_actions())) if verbose: print("Score:", game.score()) game.print_state() print("Game over.") return game.score(), experiences
target_f = self.model.predict(state) target_f[0][action] = target self.model.fit(state, target_f, epochs=1, verbose=0) if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay def load(self, name): self.model.load_weights(name) def save(self, name): self.model.save_weights(name) if __name__ == "__main__": count = 0 game = Game() env = gym.make('CartPole-v1') #state_size = 192 state_size = env.observation_space.shape[0] # in our case 4*4*12 #action_size = 4 action_size = env.action_space.n #in our case 4 agent = DQNAgent(state_size, action_size) # agent.load("./save/cartpole-dqn.h5") done = False batch_size = 32 for e in range(EPISODES): state = game.state() count += 1 #state = env.reset() #state = np.reshape(state, [1, state_size])