def run_game(self, verbose=True): # print('===') state = GameState(players=self.num_players, init_game=True, validate=self.validate) self.state = state state_vectors = [] # Add the judgement of the first state vector ai = self.ais[0] if isinstance(ai, H50AI_TDlam): state_vector = state.get_state_vector(0).reshape((1, -1)) current_value = ai.session.run(ai.softmax_output, {ai.input_state: state_vector})[0] else: state_vector = np.zeros(5) current_value = np.zeros(2) # current_grads = ai.session.run(ai.grads, feed_dict={ai.input_state: state_vector}) # state_vectors.append((state.get_state_vector(0), current_value, state_vector, current_grads)) game_round = 1 state = state while True: if state.current_player_index == 0: scores = np.array([player.score for player in state.players]) if np.any(scores >= self.end_score): # game has ended max_score = np.max(scores) num_cards = [] for i, player in enumerate(state.players): if player.score == max_score: num_cards.append(len(player.cards_played)) min_num_cards = np.min(num_cards) winning_players = [] for i, player in enumerate(state.players): if player.score == max_score and len( player.cards_played) == min_num_cards: winning_players.append((i, player)) assert len(winning_players) >= 1 print('## players {} win with {} points after {} rounds'. format([p[0] for p in winning_players], max_score, game_round)) try: state.verify_state() except AssertionError: import traceback traceback.print_exc() import ipdb ipdb.set_trace() if len(winning_players) > 1: print('Draw!') return FinishedGameInfo(None, None, state_vectors=state_vectors) winner_index = winning_players[0][0] winner_num_bought = len( state.players[winner_index].cards_played) winner_value = np.zeros(self.num_players) winner_value[winner_index] = 1. for player_index in range(state.num_players): #pass #print(np.roll(winner_value, -1 * player_index)) #print(state_vectors[-1].post_move_values[player_index]) #print(state_vectors[-1]) state_vectors[-1].post_move_values[ player_index] = np.roll(winner_value, -1 * player_index) # assert ((winner_index + 1) % state.num_players) == state.current_player_index winner_t1 = len([ c for c in state.players[winner_index].cards_played if c.tier == 1 ]) winner_t2 = len([ c for c in state.players[winner_index].cards_played if c.tier == 2 ]) winner_t3 = len([ c for c in state.players[winner_index].cards_played if c.tier == 3 ]) return FinishedGameInfo(game_round, winner_index, winner_num_t1_bought=winner_t1, winner_num_t2_bought=winner_t2, winner_num_t3_bought=winner_t3, state_vectors=state_vectors) # return game_round, i, winner_num_bought, state_vectors if state.current_player_index == 0: game_round += 1 if verbose: print('Round {}: {}'.format(game_round, state.get_scores())) if game_round > 50: print('Stopped after 50 rounds') return FinishedGameInfo(None, None, state_vectors=state_vectors) # return game_round, None, None, state_vectors # scores = state.get_scores() # if any([score >= self.end_score for score in scores]): # break # game end for tier in range(1, 4): state.generator.shuffle(state.cards_in_deck(tier)) current_player_index = state.current_player_index move, move_info = self.ais[state.current_player_index].make_move( state) # print(state.current_player_index, move, values[:-1]) if verbose: print('P{}: {}, value {}'.format(state.current_player_index, move, values)) last_player = state.current_player_index state.make_move(move) # new_state_vector = state.get_state_vector(current_player_index) state_vectors.append(move_info)