def move(self, board): # Convert to numpy array start_state = self.make_np_array(board) # get ally's predicted move # ally_predicted_action = self.model_cnn.predict(start_state) # print("ally", ally_predicted_action) # identify which 5 output nodes to take max from # ally_alist = ally_predicted_action[0].tolist() # ally_action = ally_alist.index(max(ally_alist)) # start_index = ally_action * self.num_actions # TODO Remove later, add cnn back # get ally's next move tmp_board = play_round(board, 4) ally_action = tmp_board.players[0].action start_index = ally_action * self.num_actions # Make a new prediction now that the weights have been updated all_q_vals = self.model_dqn.predict([start_state, self.mask1]) if ally_action == self.num_actions - 1: q_vals = all_q_vals[0][start_index:] else: q_vals = all_q_vals[0][start_index:start_index + self.num_actions] qlist = q_vals.tolist() action = qlist.index(max(qlist)) self.action = action return get_desired_space_from_action(self.position, action)
def play(labels): extended_labels = labels + list(range(max(labels) + 1, million + 1)) current, index = labels[0], build_index(extended_labels) for i in range(10 * million): current, index = play_round(current, index) return index[1] * index[index[1]]
def test_play(student): hands = game.deal() players = (student, student, student, student) discard = [] lp = 0 game_over = False retval = [] round = 0 t_pass = True while not game_over: round += 1 retval.append('Round {0}'.format(round)) retval.append([ 'HANDS', [ 'player{0}: {1}'.format(i, sorted(h)) for i, h in enumerate(hands) ] ]) prev_hands = copy.deepcopy(hands) prev_lp = lp try: hands, lp, game_over, discard = game.play_round( hands, players, discard, lp, 'raise') except game.InvalidAction, e: retval.append(['INVALID ACTION', [e, e.call]]) t_pass = False break retval.append([ 'PLAYS', [ 'player{0}: {1}'.format((prev_lp + i) % len(players), str(p)) for i, p in enumerate(discard[-1]) ] ]) if prev_hands == hands: # Nobody played, so we break out or we would be stuck game_over = True retval.append("GAME OVER: Nobody played") else: retval.append(['WINNER', ['player{0}'.format(lp)]])
def test_play(student): hands = game.deal() players = (student, student, student, student) discard = [] lp = 0 game_over = False retval = [] round = 0 t_pass = True while not game_over: round += 1 retval.append("Round {0}".format(round)) retval.append(["HANDS", ["player{0}: {1}".format(i, sorted(h)) for i, h in enumerate(hands)]]) prev_hands = copy.deepcopy(hands) prev_lp = lp try: hands, lp, game_over, discard = game.play_round(hands, players, discard, lp, "raise") except game.InvalidAction, e: retval.append(["INVALID ACTION", [e, e.call]]) t_pass = False break retval.append( [ "PLAYS", ["player{0}: {1}".format((prev_lp + i) % len(players), str(p)) for i, p in enumerate(discard[-1])], ] ) if prev_hands == hands: # Nobody played, so we break out or we would be stuck game_over = True retval.append("GAME OVER: Nobody played") else: retval.append(["WINNER", ["player{0}".format(lp)]])
from game import labels, example_labels, play_round, hash_labels for i in range(100): labels = play_round(labels) print(hash_labels(labels))
def move(self, board): # Convert to numpy array start_state = self.make_np_array(board) # epsilon goes from 1 down to 0.1 if self.epsilon > 0.1: self.epsilon -= 0.000001 # get ally's predicted move # ally_predicted_action = self.model_cnn.predict(start_state) # print("ally", ally_predicted_action) # identify which 5 output nodes to take max from # ally_alist = ally_predicted_action[0].tolist() # ally_action = ally_alist.index(max(ally_alist)) # start_index = ally_action * self.num_actions # TODO Remove later, add cnn back # get ally's next move tmp_board = play_round(board, 4) ally_action = tmp_board.players[0].action tmp_list = [0, 0, 0, 0, 0] tmp_list[ally_action] = 1 ally_np = np.array(tmp_list, dtype=np.uint8) ally_np = np.reshape(ally_np, (1, 5)) # get Q values all_q_vals = self.model_dqn.predict([start_state, ally_np, self.mask5]) # Choose your action: random action if < epsilon or best action if > epsilon rand_val = random.random() if rand_val < self.epsilon: action = random.randint(0, 4) else: qlist = all_q_vals[0].tolist() action = qlist.index(max(qlist)) # Play one game iteration with the chosen action new_board = play_round(board, action) new_state = self.make_np_array(new_board) ####################### # reward calculation ####################### reward = 0 # first add actual points (use team scores rather than just the player's score) # team 1 score minus team 2 score # reward += (new_board.score[0] - board.score[0] + new_board.score[1] - board.score[1] - # (new_board.score[2] - board.score[2] + new_board.score[3] - board.score[3])) reward += (new_board.score[1] - board.score[1]) * 10 # get everyone else's targets # find your target based on closest food in direction of action taken (don't consider foods 'behind' you) # consider that you could have just eaten your target and it moved # if you and ally share same target, lose points, unless any enemy is closer to the same target than your ally # very small reward for moving toward center of map # other rewards # convert action into np tmp_list = [0, 0, 0, 0, 0] tmp_list[action] = 1 action_np = np.array(tmp_list, dtype=np.uint8) action_np = np.reshape(action_np, (1, 5)) self.memory.append( [start_state, ally_np, action_np, reward, new_state]) # Fit (when there are enough samples) if len(self.memory) > self.min_memory_size: batch = random.sample(self.memory, self.batch_size) start_states = np.array([np.squeeze(x[0]) for x in batch], dtype=np.uint8) # remove first dimension ally_actions = np.array([np.squeeze(x[1]) for x in batch], dtype=np.uint8) # remove first dimension actions = np.array([np.squeeze(x[2]) for x in batch], dtype=np.uint8) # remove first dimension rewards = np.array([x[3] for x in batch]) next_states = np.array([np.squeeze(x[4]) for x in batch], dtype=np.uint8) # remove first dimension self.fit_batch(start_states, ally_actions, actions, rewards, next_states) self.iteration += 1 # Make a new prediction now that the weights have been updated all_q_vals = self.model_dqn.predict([start_state, ally_np, self.mask5]) qlist = all_q_vals[0].tolist() if self.iteration % 10000 == 1: print(qlist) action = qlist.index(max(qlist)) return get_desired_space_from_action(self.position, action)
def main(): """Controlling the game flow""" echo.clear() print(constants.WELCOME_MESSAGE) # getting towns towns = init.get_towns() num_towns = len(towns) rng_towns = range(num_towns) # getting hometown and number of managers in hometown managers = [0 for town in rng_towns] (hometown, num_managers) = init.get_managers(towns) managers[towns.index(hometown)] = num_managers # getting the number of days to be played period = init.get_timeframe() # getting possible wins for each city potentials = init.get_potentials(towns) # getting the network (adjacency) network = init.get_network(towns) hotels = [0 for town in rng_towns] score_of_today = [0 for town in rng_towns] score = 0 # mapping cities to integers (inverse of towns) cities = dict((town, towns.index(town)) for town in towns) # mapping integers to towns (inverse of cities) towns = dict((towns.index(town), town) for town in towns) # state represents the most important data structure of the whole program # it contains the essential attributes of the game state = dict((town,[towns[town], managers[town], hotels[town], \ potentials[town], score_of_today[town], network[town]]) \ for town in rng_towns) day = 1 # main loop over the number of days to be played while day < period + 1: echo.clear() echo.headline(day) days_left = period - day [state, day_shift, city] = game.play_round(state, period, days_left, \ towns, cities) # special case: hire -> day_shift > 1 if day_shift > 1: # new manager is active with the beginning of day d + 2 # where d is the day he was hired (in the morning) for shift in range(day_shift): # state enters as state at the end of day + shift profit_today = sum([state[town][4] for town in rng_towns]) if shift > 0: print("Automatische Berechnung ...\n") time.sleep(.5) print("\nStatus am Ende von Tag " + str(day + shift)) echo.status(state) print("Gewinn an Tag " + str(day + shift) + ": " + \ str(profit_today)) # adding the profit of the current day to the overall score score += profit_today print("Gesamtgewinn mit Ende von Tag " + str(day + shift) + \ ": " + str(score) + "\n") # resetting the score of the day for town in rng_towns: state[town][4] = 0 # we want the manager to be effective with beginning of # day + (shift = 2) if shift == 1: state[city][1] += 1 # turning state into evening mode state = game.calculate_profit(state) shift += 1 # special case: resetting state to morning mode at the end # of the final iteration if shift == day_shift: for town in rng_towns: state[town][4] = 0 input() echo.clear() # every other case else: profit_today = sum([state[town][4] for town in rng_towns]) print("\nStatus am Ende von Tag " + str(day) + \ " (" + str(period) + "):") echo.status(state) print("Gewinn an Tag " + str(day) + ": " + str(profit_today)) score += profit_today print("Gesamtgewinn mit Ende von Tag " + str(day) + ": " + \ str(score) + "\n") # setting the score of the day to 0 for beginning of next day for town in rng_towns: state[town][4] = 0 input() echo.clear() day += day_shift print("Spielende!\n") print("Gesamtgewinn im Spiel: " + str(score) + "\n") name = get_name() save_score(score, name) print(constants.GOODBYE_MESSAGE)