def test_validatePlacementNonIsolable(self): game = Game() board = game.getBoard() self.assertEquals(FieldState.EMPTY, board.getState("a1")) self.assertEquals(FieldState.EMPTY, board.getState("b2")) m1 = Placement(BallColors.BLACK, "a1", "b2") self.assertFalse(m1.validate(game)[0])
def test_simplePlacementWithNoRemoval(self): game = Game() board = game.getBoard() self.assertEquals(FieldState.EMPTY, board.getState("a1")) self.assertEquals(FieldState.EMPTY, board.getState("a2")) m1 = Placement(BallColors.BLACK, "a1", None) m1.execute(game) self.assertEquals(FieldState.BLACK, board.getState("a1")) self.assertEquals(FieldState.EMPTY, board.getState("a2"))
def test_validatePlacementOnNonEmptyField(self): game = Game() board = game.getBoard() self.assertEquals(FieldState.EMPTY, board.getState("a1")) self.assertEquals(FieldState.EMPTY, board.getState("a2")) m1 = Placement(BallColors.BLACK, "a1", "a2") m1.execute(game) self.assertEquals(FieldState.BLACK, board.getState("a1")) self.assertEquals(FieldState.REMOVED, board.getState("a2")) self.assertFalse(m1.validate(game)[0])
#takes the one action which was selected in batch target_f[0][action] = target #trains the model self.model.fit(state, target_f, epochs=1, verbose=0) if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay def load(self, name): self.model.load_weights(name) def save(self, name): self.model.save(name) if __name__ == "__main__": game = Game() agent = DQNAgent() # agent.load("./save/file") done = False batch_size = agent.batch_size debug = False save_maxvalues = True output_list = [] for e in range(EPISODES): game.new_game() state = game.state() state = np.reshape(state, [1, agent.state_size]) while not game.game_over(): action = agent.act(state) reward = (game.do_action(action))**2
def main(): game = Game(800, 600, 20) game.main_loop()
def play_single_game(): """Play a single game using the latest model snapshot""" game = Game() state_size = 16 debug = True model = load_model(path + "/data/checkpoint") game.new_game() state = game.state() state = np.reshape(state, [1, state_size]) while not game.game_over(): # get action from highest q-value act_values = model.predict(state) if len(game.available_actions()) < 4: temp = game.available_actions() for i in range(0, 4): if i not in temp: act_values[0][i] = -100 #returns action with highest q-value action = np.argmax(act_values[0]) reward = (game.do_action(action))**2 next_state = game.state() actions_available = game.available_actions() if len(actions_available) == 0: done = True else: done = False next_state = np.reshape(next_state, [1, state_size]) state = next_state print("Action:", ACTION_NAMES[action]) print("Reward:", reward) game.print_state() if done: states = game.state() states = np.reshape(state, [1, state_size]) max_value = np.amax(states[0]) print("Score:", game.score()) print("Max Value: " + str(2**max_value)) print("Game over.") break
def main(): game = Game(160, 160, 20) game.main_loop()