def play(strategy, verbose=False, allow_unavailable_action=True): """Plays a single game, using a provided strategy. Args: strategy: A function that takes as argument a state and a list of available actions and returns an action from the list. allow_unavailable_action: Boolean, whether strategy is passed all actions or just the available ones. verbose: If true, prints game states, actions and scores. Returns: score, experiences where score is the final score and experiences is the list Experience instances that represent the collected experience. """ game = Game() state = game.state().copy() game_over = game.game_over() experiences = [] while not game_over: if verbose: print("Score:", game.score()) game.print_state() old_state = state next_action = strategy( old_state, range(4) if allow_unavailable_action else game.available_actions()) if game.is_action_available(next_action): reward = game.do_action(next_action) state = game.state().copy() game_over = game.game_over() if verbose: print("Action:", ACTION_NAMES[next_action]) print("Reward:", reward) experiences.append( Experience(old_state, next_action, reward, state, game_over, False, game.available_actions())) else: experiences.append( Experience(state, next_action, 0, state, False, True, game.available_actions())) if verbose: print("Score:", game.score()) game.print_state() print("Game over.") return game.score(), experiences
def play(strategy, verbose=False, allow_unavailable_action=True): """Plays a single game, using a provided strategy. Args: strategy: A function that takes as argument a state and a list of available actions and returns an action from the list. allow_unavailable_action: Boolean, whether strategy is passed all actions or just the available ones. verbose: If true, prints game states, actions and scores. Returns: score, experiences where score is the final score and experiences is the list Experience instances that represent the collected experience. """ game = Game() state = game.state().copy() game_over = game.game_over() experiences = [] while not game_over: if verbose: print("Score:", game.score()) game.print_state() old_state = state next_action = strategy( old_state, range(4) if allow_unavailable_action else game.available_actions()) if game.is_action_available(next_action): reward = game.do_action(next_action) state = game.state().copy() game_over = game.game_over() if verbose: print("Action:", ACTION_NAMES[next_action]) print("Reward:", reward) experiences.append(Experience(old_state, next_action, reward, state, game_over, False, game.available_actions())) else: experiences.append(Experience(state, next_action, 0, state, False, True, game.available_actions())) if verbose: print("Score:", game.score()) game.print_state() print("Game over.") return game.score(), experiences
def test_available_actions_none_available(): state = np.array([[[1, 2, 3], [5, 6, 7], [1, 2, 3]], [[8, 9, 10], [11, 12, 13], [8, 9, 10]], [[14, 15, 16], [17, 18, 19], [14, 15, 16]]]) game = Game(state=state) actions = game.available_actions() # All actions except left is available assert actions == [] assert game.game_over()
def test_available_actions_none_available(): state = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, 3, 4], [5, 6, 7, 8]]) game = Game(state=state) actions = game.available_actions() # All actions except left is available assert actions == [] assert game.game_over()
env = gym.make('CartPole-v1') #state_size = 192 state_size = env.observation_space.shape[0] # in our case 4*4*12 #action_size = 4 action_size = env.action_space.n #in our case 4 agent = DQNAgent(state_size, action_size) # agent.load("./save/cartpole-dqn.h5") done = False batch_size = 32 for e in range(EPISODES): state = game.state() count += 1 #state = env.reset() #state = np.reshape(state, [1, state_size]) while not game.game_over(): # env.render() action = randint(0, 3) #replace with epsilon greedy strategy #action = agent.act(state) reward = game.do_action(action) next_state = game.state() #next_state, reward, done, _ = env.step(action) #reward = reward if not done else -10 #next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action, reward, next_state, done) state = next_state if done: print("episode: {}/{}, score: {}, e: {:.2}".format( e, EPISODES, agent.epsilon)) break print("bla")