def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0): if player1version == -1: player1 = User('player1', env.state_size, env.action_size) else: player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: player2 = User('player2', env.state_size, env.action_size) else: player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first) return (scores, memory, points, sp_scores)
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0): if player1version == -1: player1 = User('player1', env.state_size, env.action_size) else: player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: player2 = User('player2', env.state_size, env.action_size) else: player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) printmoves = player1version == -1 or player2version == -1 scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first, printmoves) return (scores, memory, points, sp_scores)
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first=0): env = Game() if player1version == -1: player1 = User("user1", env.state_size, env.action_size) else: player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: name = env.name + "{0:0>4}".format(player1version) if Provider.getNetByName(name) == None: return player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) netName = env.name + "{0:0>4}".format(player1version) player1 = Agent(netName, env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: name = input('enter username: ') user2 = Provider.getPersonByName(name) player2 = User(user2.name, env.state_size, env.action_size) else: player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: name = env.name + "{0:0>4}".format(player2version) if Provider.getNetByName(name) == None: return player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) net2Name = env.name + "{0:0>4}".format(player2version) player2 = Agent(net2Name, env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first) return (scores, memory, points, sp_scores)
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first=0): #-1代表的是玩家 if player1version == -1: player1 = User('player1', env.state_size, env.action_size) else: #Residual_CNN 返回的是一个x player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: #如果不是玩家,则读取训练好的版本及相关权重 player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) #对其进行模拟,以及mcts树的构建 player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: player2 = User('player2', env.state_size, env.action_size) else: player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first) return (scores, memory, points, sp_scores)
def start(self, data): # Generate Game from initial json board_json = data[BOARD_KEY] # Find grid shape self.grid_shape=(board_json[WIDTH_KEY], board_json[HEIGHT_KEY]) self.w, self.h = self.grid_shape # Find the snake positions. snakes = board_json[SNAKES_KEY] # Find your position your_id = data[YOU_KEY][ID_KEY] starting_pos_json = [] for snake in snakes: if snake[ID_KEY] == your_id: starting_pos_json = [snake[BODY_KEY][0]] + starting_pos_json else: starting_pos_json = starting_pos_json + [snake[BODY_KEY][0]] starting_pos = [] for pos_json in starting_pos_json: starting_pos += [(pos_json['x'], pos_json['y'])] self.num_players = len(snakes) + 1 # Food layer immediately after players self.food_layer = self.num_players * 2 # Other Layer immediately after food self.other_layer = self.food_layer + 1 # Total layers self.num_layers = self.other_layer + 1 # Find the food positions. foods = board_json[FOOD_KEY] starting_food = [] for pos_json in foods: starting_food += (pos_json['x'], pos_json['y']) print("Creating new Game") # Create Game env = Game(self.grid_shape, self.num_players, starting_pos, starting_food) print("Creating new Agent") # Create Agent player_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) player_network = player_NN.read(env.name, self.run_version, self.playerversion) player_NN.model.set_weights(player_network.get_weights()) # self.player = Agent('player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player_NN) self.player = Agent('player', env.state_size, env.action_size, 50, config.CPUCT, player_NN)
def train_network(agent, train_phase): if train_phase[0] == 'start': net = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, config.INPUT_START_DIM, config.OUTPUT_START_DIM, config.HIDDEN_CNN_LAYERS) net_str = 's' elif train_phase[0] == 'general': net = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, config.INPUT_DIM, config.OUTPUT_DIM, config.HIDDEN_CNN_LAYERS) net_str = 'g' net.read(agent, net_str) #validation_file = choice(listdir('validation_states')) #with open('validation_states\\' + validation_file, 'rb') as input_file: # validation = pickle.load(input_file) min_val_error = 10000.0 for i in range(config.TRAINING_LOOPS): print("Iteration #" + str(i)) game_file = choice(listdir('train_states')) with open('train_states\\' + game_file, 'rb') as input_file: game_memory = pickle.load(input_file) remove('train_states\\' + game_file) hist = net.fit(game_memory['batch_states'], game_memory['batch_targets'], config.EPOCHS, 2, 0.0, 32) #metric = hist.history['val_loss'][config.EPOCHS - 1] #if metric < min_val_error: # min_val_error = metric net.write(agent, net_str) print("Min Loss: " + str(min_val_error))
def predict(): current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + (6, 7), 42, config.HIDDEN_CNN_LAYERS) current_NN.model.set_weights( current_NN.read('connect4', 2, 74).get_weights()) current_player = Agent('current_player', 84, 42, config.MCTS_SIMS, config.CPUCT, current_NN) if request.method == 'POST': try: data = request.get_json() gs = GameState(np.array(json.loads(data["gameState"])), 1) #print(gs) preds = current_player.get_preds(gs) preds = np.array(preds[1]).reshape(6, 7) pred_arg = np.unravel_index(preds.argmax(), preds.shape) except ValueError: return jsonify("Please enter a proper GameState.") return jsonify([int(x) for x in pred_arg]) if request.method == 'GET': return "Hello World! GET request"
def evaluation_worker(conn): import config from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE import initialise from model import Residual_CNN, import_tf import_tf(1024 * 3) from game import Game from agent import Agent from memory import Memory from funcs import playMatches import loggers as lg import logging import time # initialise new test memory test_memories = Memory(int(MEMORY_SIZE / 10)) env = Game() # initialise new models # create an untrained neural network objects from the config file if len(env.grid_shape) == 2: shape = (1, ) + env.grid_shape else: shape = env.grid_shape if TEAM_SIZE > 1: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) else: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) current_player_version = 0 best_player_version = 0 # If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: best_player_version = initialise.INITIAL_MODEL_VERSION #print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) # otherwise just ensure the weights on the two players are the same else: best_NN.model.set_weights(current_NN.model.get_weights()) current_player = Agent('current_player', config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN) time.sleep(20) while 1: # request current_NN weights conn.send(current_player_version) # wait indefinitely for current_NN weights conn.poll(None) data = conn.recv() if data: # set current_NN weights current_NN.model.set_weights(data) current_player_version += 1 # play tournament games tourney_players = [] if TEAM_SIZE > 1: for i in range(int(PLAYER_COUNT / TEAM_SIZE)): # for each team for k in range( TEAM_SIZE ): # alternate adding best_players and current_players up to the TEAM_SIZE if k % 2 == 0: tourney_players.append(best_player) else: tourney_players.append(current_player) else: for i in range(PLAYER_COUNT): if i % 2 == 0: tourney_players.append(best_player) else: tourney_players.append(current_player) scores, test_memories = playMatches(tourney_players, config.EVAL_EPISODES, lg.logger_tourney, 0.0, test_memories, evaluation=True) test_memories.clear_stmemory() # if the current player is significantly better than the best_player replace the best player # the replacement is made by just copying the weights of current_player's nn to best_player's nn if scores['current_player'] > scores[ 'best_player'] * config.SCORING_THRESHOLD: # if current_NN won send message conn.send(((current_player_version, best_player_version), str(scores))) best_player_version = best_player_version + 1 best_NN.model.set_weights(current_NN.model.get_weights()) best_NN.write(env.name, best_player_version) if len( test_memories.ltmemory ) == test_memories.MEMORY_SIZE and current_player_version % 5 == 0: pickle.dump( memories, open( run_folder + "memory/test_memory" + str(current_player_version).zfill(4) + ".p", "wb")) #print("Evaluating performance of current_NN") #current_player.evaluate_accuracy(test_memories.ltmemory) #print('\n') else: time.sleep(10)
j = 0 # Each versions will play 5 games against each other versions. for player_idx, _ in enumerate(version_list_CNN): for opponent_idx, _ in enumerate(version_list_Res): if player_idx != opponent_idx: continue else: m_tmp = player1_NN.read(initialise.INITIAL_RUN_NUMBER, player_idx + 1) player1_NN.model.set_weights(m_tmp.get_weights()) player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) m_tmp = player2_NN.read(initialise.INITIAL_RUN_NUMBER, opponent_idx + 1) player2_NN.model.set_weights(m_tmp.get_weights()) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = funcs.playMatches( player1, player2, EPISODES, lg.logger_main, turns_until_tau0=0, goes_first=1) print('\n') print('-------') print('player1: version {}'.format(player_idx)) print('player2: version {}'.format(opponent_idx))
pool = multiprocessing.Pool(2) out = zip(pool.map(_selfplay, range(0, 2))) t = tuple(out) len(t) chessenv.action_size chessenv.state_size chessenv.grid_shape current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119,)+chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119,)+chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS) best_NN.model.set_weights(current_NN.model.get_weights()) best_player_version = 2 print('LOADING MODEL VERSION ' + str(2) + '...') m_tmp = best_NN.read(chessenv.name, 2, best_player_version) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) current_player = Agent('current_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent('best_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, best_NN) state = chessenv.reset() state.render(None) # action, pi, MCTS_value, NN_value = current_player.act(state, 0) scores, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory) current_player.replay(memory.ltmemory)
######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) #If loading an existing neural netwrok, set the weights from that model if initialize.INITIAL_MODEL_VERSION != None: best_player_version = initialize.INITIAL_MODEL_VERSION print('LOADING MODEL VERSION ' + str(initialize.INITIAL_MODEL_VERSION) + '...') m_tmp = best_NN.read(env.name, initialize.INITIAL_RUN_NUMBER, best_player_version) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) #otherwise just ensure the weights on the two players are the same else: best_player_version = 0 best_NN.model.set_weights(current_NN.model.get_weights()) #copy the config file to the run folder copyfile('./config.py', run_folder + 'config.py') plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes=True) print('\n')
class Agent_NN: def __init__(self, enable_cache=False): self.nn_start = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, config.INPUT_START_DIM, config.OUTPUT_START_DIM, config.HIDDEN_CNN_LAYERS) self.nn = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, config.INPUT_DIM, config.OUTPUT_DIM, config.HIDDEN_CNN_LAYERS) self.enable_cache = enable_cache self.cache = {} def purge_cache(self): self.cache = {} def nn_read(self, name): self.nn_start.read(name, 's') self.nn.read(name, 'g') def nn_write(self, name): self.nn_start.write(name, 's') self.nn.write(name, 'g') def predict(self, state, perspective, mcts): network = self.build_nn_input(state, perspective, mcts=mcts) if network.shape[1] == config.INPUT_DIM[0]: return self.nn.predict(network) else: return self.nn_start.predict(network) def build_start_nn_input(self, state, perspective): nn_input = np.zeros( (1, config.INPUT_START_DIM[0], config.INPUT_START_DIM[1], config.INPUT_START_DIM[2]), dtype=np.float32) numbers_output = { 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 8: 5, 9: 4, 10: 3, 11: 2, 12: 1 } rotation = np.random.randint(12) if self.enable_cache is True and rotation in self.cache: nn_input[:, :11, :, :] = self.cache[rotation] else: # Resources outputs for number, tile in state.numbers: resource = state.tiles[tile] for vertex in config.tiles_vertex[tile]: nn_input[0, resource - 2, config.vertex_to_nn_input[rotation][vertex][0], config.vertex_to_nn_input[rotation][vertex] [1]] += numbers_output[number] / 15.0 # Ports for key, r in enumerate([ config.SHEEP, config.ORE, config.BRICK, config.WHEAT, config.WOOD, config.GENERIC ]): indices = [i for i, x in enumerate(state.ports) if x == r] for i in indices: for vertex in config.ports_vertex[i]['vert']: nn_input[ 0, key + 5, config.vertex_to_nn_input[rotation][vertex][0], config.vertex_to_nn_input[rotation][vertex][1]] = 1 if self.enable_cache is True: self.cache[rotation] = nn_input[:, :11, :, :] # Settlements, cities, roads for p in range(4): p_order = (4 + p - perspective) % 4 for s in state.players[p].settlements: nn_input[0, 11 + 2 * p_order, config.vertex_to_nn_input[rotation][s][0], config.vertex_to_nn_input[rotation][s][1]] = 1 for r in state.players[p].roads: nn_input[ 0, 12 + 2 * p_order, config.vertex_to_nn_input[rotation][r[0]][0], config.vertex_to_nn_input[rotation][r[0]][1]] += 1 / 3.0 nn_input[ 0, 12 + 2 * p_order, config.vertex_to_nn_input[rotation][r[1]][0], config.vertex_to_nn_input[rotation][r[1]][1]] += 1 / 3.0 # Cards for p in range(4): p_order = (4 + p - perspective) % 4 for key, r in enumerate([ config.SHEEP, config.ORE, config.BRICK, config.WHEAT, config.WOOD ]): nn_input[0, 19 + key + 5 * p_order, :, :] = state.players[p].cards[r] / 10.0 # State if (state.game_phase == config.PHASE_INITIAL_SETTLEMENT or state.game_phase == config.PHASE_INITIAL_ROAD ) and state.initial_phase_decrease == 0: nn_input[0, 39, :, :] = 1 if (state.game_phase == config.PHASE_INITIAL_SETTLEMENT or state.game_phase == config.PHASE_INITIAL_ROAD ) and state.initial_phase_decrease == 1: nn_input[0, 40, :, :] = 1 # Player turn for p in range(4): p_order = (4 + p - perspective) % 4 if p == state.player_turn: nn_input[0, 41 + p_order, :, :] = 1 return nn_input def build_nn_input(self, state, perspective, mcts=None): if state.game_phase == config.PHASE_INITIAL_SETTLEMENT or state.game_phase == config.PHASE_INITIAL_ROAD: return self.build_start_nn_input(state, perspective) nn_input = np.zeros( (1, config.INPUT_DIM[0], config.INPUT_DIM[1], config.INPUT_DIM[2]), dtype=np.float32) numbers_output = { 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 8: 5, 9: 4, 10: 3, 11: 2, 12: 1 } rotation = np.random.randint(12) if self.enable_cache is True and rotation in self.cache: nn_input[:, :11, :, :] = self.cache[rotation] else: # Resources outputs for number, tile in state.numbers: resource = state.tiles[tile] for vertex in config.tiles_vertex[tile]: nn_input[0, resource - 2, config.vertex_to_nn_input[rotation][vertex][0], config.vertex_to_nn_input[rotation][vertex] [1]] += numbers_output[number] / 15.0 # Ports for key, r in enumerate([ config.SHEEP, config.ORE, config.BRICK, config.WHEAT, config.WOOD, config.GENERIC ]): indices = [i for i, x in enumerate(state.ports) if x == r] for i in indices: for vertex in config.ports_vertex[i]['vert']: nn_input[ 0, key + 5, config.vertex_to_nn_input[rotation][vertex][0], config.vertex_to_nn_input[rotation][vertex][1]] = 1 if self.enable_cache is True: self.cache[rotation] = nn_input[:, :11, :, :] # Settlements, cities, roads for p in range(4): p_order = (4 + p - perspective) % 4 for s in state.players[p].settlements: nn_input[0, 11 + 3 * p_order, config.vertex_to_nn_input[rotation][s][0], config.vertex_to_nn_input[rotation][s][1]] = 1 for c in state.players[p].cities: nn_input[0, 12 + 3 * p_order, config.vertex_to_nn_input[rotation][c][0], config.vertex_to_nn_input[rotation][c][1]] = 1 for r in state.players[p].roads: nn_input[ 0, 13 + 3 * p_order, config.vertex_to_nn_input[rotation][r[0]][0], config.vertex_to_nn_input[rotation][r[0]][1]] += 1 / 3.0 nn_input[ 0, 13 + 3 * p_order, config.vertex_to_nn_input[rotation][r[1]][0], config.vertex_to_nn_input[rotation][r[1]][1]] += 1 / 3.0 # Cards for p in range(4): p_order = (4 + p - perspective) % 4 for key, r in enumerate([ config.SHEEP, config.ORE, config.BRICK, config.WHEAT, config.WOOD ]): nn_input[0, 23 + key + 5 * p_order, :, :] = state.players[p].cards[r] / 10.0 # Robber for vertex in config.tiles_vertex[state.robber_tile]: nn_input[0, 43, config.vertex_to_nn_input[rotation][vertex][0], config.vertex_to_nn_input[rotation][vertex][1]] = 1 # Army Cards Played for p in range(4): p_order = (4 + p - perspective) % 4 nn_input[0, 44 + p_order, :, :] = state.players[p].used_knights / 5.0 # Army Holder for p in range(4): p_order = (4 + p - perspective) % 4 nn_input[0, 48 + p_order, :, :] = state.players[p].largest_army_badge # Longest Road Holder for p in range(4): p_order = (4 + p - perspective) % 4 nn_input[0, 52 + p_order, :, :] = state.players[p].longest_road_badge # Special Cards for p in range(4): p_order = (4 + p - perspective) % 4 for key, r in enumerate([ config.VICTORY_POINT, config.KNIGHT, config.MONOPOLY, config.ROAD_BUILDING, config.YEAR_OF_PLENTY ]): nn_input[0, 56 + key + 5 * p_order, :, :] = state.players[p].special_cards.count( r) / 3.0 # Discarding, initial game phase if state.game_phase == config.PHASE_DISCARD: nn_input[0, 76, :, :] = 1 # Player turn for p in range(4): p_order = (4 + p - perspective) % 4 if p == state.player_turn: nn_input[0, 77 + p_order, :, :] = 1 # Other game phases if state.game_phase == config.PHASE_THROW_DICE: nn_input[0, 81, :, :] = 1 if state.game_phase == config.PHASE_MOVE_ROBBER: nn_input[0, 82, :, :] = 1 if state.game_phase == config.PHASE_STEAL_CARD: nn_input[0, 83, :, :] = 1 if state.game_phase == config.PHASE_ROAD_BUILDING: nn_input[0, 84, :, :] = 1 if state.game_phase == config.PHASE_YEAR_OF_PLENTY: nn_input[0, 85, :, :] = 1 if state.game_phase == config.PHASE_TRADE_RESPOND: nn_input[0, 86, :, :] = 1 for s in range(54): if state.available_settlement_spot(s): nn_input[0, 87, config.vertex_to_nn_input[rotation][s][0], config.vertex_to_nn_input[rotation][s][1]] = 1 return nn_input
if initialise.INITIAL_MEMORY_VERSION == [None] * DECISION_TYPES: for i in range(DECISION_TYPES): memories.append(Memory(MEMORY_SIZE[i])) else: for d_t, MEM_VERSION in enumerate(initialise.INITIAL_MEMORY_VERSION): print('LOADING MEMORY VERSION ' + str(MEM_VERSION) + '...') memories.append(pickle.load(open( run_archive_folder + game.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/decision_" + str(d_t) + "_memory" + str(MEM_VERSION).zfill(4) + ".p", "rb"))) if memories[-1].MEMORY_SIZE < MEMORY_SIZE[d_t]: memories[-1].extension(MEMORY_SIZE[d_t]) nn = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, game.grid_shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS, 0) m_tmp = nn.read(game.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION[0]) nn.model.set_weights(m_tmp.get_weights()) trained_agent = Agent('trained_agent', game.action_size, config.MCTS_SIMS, config.CPUCT, [nn]) trained_agent.evaluate_accuracy(memories[0].ltmemory, 0) quit() if arg == "pred_test": nn = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + game.grid_shape, [1],\ config.HIDDEN_CNN_LAYERS, 0) player = Agent('player', game.state_size, config.MCTS_SIMS, config.CPUCT, [nn]) print(player.predict_value(game.gameState))
config.HIDDEN_CNN_LAYERS) else: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) best_player_version = 0 # If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: best_player_version = initialise.INITIAL_MODEL_VERSION print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) # otherwise just ensure the weights on the two players are the same else: best_NN.model.set_weights(current_NN.model.get_weights()) # copy the config file to the run folder copyfile('./config.py', run_folder + 'config.py') plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes=True) print('\n')
import config memory = Memory(config.MEMORY_SIZE) ######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, Game.InputShape, Game.ActionSize, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, Game.InputShape, Game.ActionSize, config.HIDDEN_CNN_LAYERS) #If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: best_player_version = initialise.INITIAL_MODEL_VERSION print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = current_NN.read(Game.Name, initialise.INITIAL_RUN_NUMBER, best_player_version) current_NN.model.set_weights(m_tmp.get_weights()) m_tmp = best_NN.read(Game.Name, initialise.INITIAL_RUN_NUMBER, best_player_version) best_NN.model.set_weights(m_tmp.get_weights()) #otherwise just ensure the weights on the two players are the same else: best_player_version = 0 best_NN.model.set_weights(current_NN.model.get_weights()) #copy the config file to the run folder copyfile('./config.py', run_folder + 'config.py') #plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True) print('\n') ######## CREATE THE PLAYERS ########
class Playing: def __init__(self, env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first=0): self.EPISODES = EPISODES self.turns_until_tau0 = turns_until_tau0 self.logger = logger self.goes_first = goes_first if player1version == -1: self.player1 = User('player1', env.state_size, env.action_size) else: self.player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: self.player1_network = self.player1_NN.read( env.name, run_version, player1version) self.player1_NN.model.set_weights( self.player1_network.get_weights()) self.player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, self.player1_NN) if player2version == -1: self.player2 = User('player2', env.state_size, env.action_size) else: self.player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: self.player2_network = self.player2_NN.read( env.name, run_version, player2version) self.player2_NN.model.set_weights( self.player2_network.get_weights()) self.player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, self.player2_NN) def play_one_game(self, e): self.logger.info('====================') self.logger.info('EPISODE %d OF %d', e + 1, self.EPISODES) self.logger.info('====================') goes_first = self.goes_first player1 = self.player1 logger = self.logger player2 = self.player2 env = self.env turns_until_tau0 = self.turns_until_tau0 memory = self.memory scores = self.scores sp_scores = self.sp_scores points = self.points print(str(e + 1) + ' ', end='') state = env.reset() done = 0 turn = 0 player1.mcts = None player2.mcts = None if goes_first == 0: player1Starts = random.randint(0, 1) * 2 - 1 else: player1Starts = goes_first if player1Starts == 1: players = { 1: { "agent": player1, "name": player1.name }, -1: { "agent": player2, "name": player2.name } } logger.info(player1.name + ' plays as X') else: players = { 1: { "agent": player2, "name": player2.name }, -1: { "agent": player1, "name": player1.name } } logger.info(player2.name + ' plays as X') logger.info('--------------') env.gameState.render(logger) while done == 0: turn = turn + 1 #### Run the MCTS algo and return an action if turn < turns_until_tau0: action, pi, MCTS_value, NN_value = players[ state.playerTurn]['agent'].act(state, 1) else: action, pi, MCTS_value, NN_value = players[ state.playerTurn]['agent'].act(state, 0) if action == "restart": break if memory != None: ####Commit the move to memory memory.commit_stmemory(env.identities, state, pi) logger.info('action: %d', action) for r in range(env.grid_shape[0]): logger.info([ '----' if x == 0 else '{0:.2f}'.format(np.round(x, 2)) for x in pi[env.grid_shape[1] * r:(env.grid_shape[1] * r + env.grid_shape[1])] ]) # logger.info('MCTS perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(MCTS_value,2)) # logger.info('NN perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(NN_value,2)) logger.info('====================') ### Do the action state, value, done, _ = env.step( action ) # the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move print("player turn", env.gameState.playerTurn) print(env.gameState.board) if env.gameState.playerTurn == -1: f = open("./communicate/output.txt", "w") temp_board = [str(x) for x in env.gameState.board] f.write(",".join(temp_board)) f.close() env.gameState.render(logger) if done == 1: if memory != None: #### If the game is finished, assign the values correctly to the game moves for move in memory.stmemory: if move['playerTurn'] == state.playerTurn: move['value'] = value else: move['value'] = -value memory.commit_ltmemory() if value == 1: logger.info('%s WINS!', players[state.playerTurn]['name']) print('%s WINS!' % (players[state.playerTurn]['name'])) scores[players[state.playerTurn]['name']] = scores[players[ state.playerTurn]['name']] + 1 if state.playerTurn == 1: sp_scores['sp'] = sp_scores['sp'] + 1 else: sp_scores['nsp'] = sp_scores['nsp'] + 1 elif value == -1: logger.info('%s WINS!', players[-state.playerTurn]['name']) print('%s WINS!' % (players[-state.playerTurn]['name'])) scores[players[-state.playerTurn]['name']] = scores[ players[-state.playerTurn]['name']] + 1 if state.playerTurn == 1: sp_scores['nsp'] = sp_scores['nsp'] + 1 else: sp_scores['sp'] = sp_scores['sp'] + 1 else: logger.info('DRAW...') print("DRAW") scores['drawn'] = scores['drawn'] + 1 sp_scores['drawn'] = sp_scores['drawn'] + 1 pts = state.score points[players[state.playerTurn]['name']].append(pts[0]) points[players[-state.playerTurn]['name']].append(pts[1]) def playMatches(self): self.env = Game() self.scores = {self.player1.name: 0, "drawn": 0, self.player2.name: 0} self.sp_scores = {'sp': 0, "drawn": 0, 'nsp': 0} self.points = {self.player1.name: [], self.player2.name: []} for e in range(self.EPISODES): self.play_one_game(e) return (self.scores, self.memory, self.points, self.sp_scores) # # if __name__ == '__main__': # env = Game() # playing = Playing(env, 1, 1, -1, 10, lg.logger_tourney, 0) # playing.playMatches()
def playBattleSnake(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0): # Initialize Trained Snake player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) # Initialize BattleSnake Model player2 = BattleSnake('player2') # Initialize Memory memory = None # Initialize Game state env = Game() scores = {player1.name:0, "drawn": 0, player2.name:0} sp_scores = {'sp':0, "drawn": 0, 'nsp':0} points = {player1.name:[], player2.name:[]} state = env.reset() done = 0 turn = 0 player1.mcts = None player2.mcts = None if goes_first == 0: player1Starts = random.randint(0,1) * 2 - 1 else: player1Starts = goes_first if player1Starts == 1: players = {1:{"agent": player1, "name":player1.name} , -1: {"agent": player2, "name":player2.name} } logger.info(player1.name + ' plays as X') else: players = {1:{"agent": player2, "name":player2.name} , -1: {"agent": player1, "name":player1.name} } logger.info(player2.name + ' plays as X') logger.info('--------------') env.gameState.render(logger) while done == 0: turn = turn + 1 #### Run the MCTS algo and return an action if turn < turns_until_tau0: action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 1) else: action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 0) if memory != None: ####Commit the move to memory memory.commit_stmemory(env.identities, state, pi) #logger.info('action: %d', action) #for r in range(env.grid_shape[0]): # logger.info(['----' if x == 0 else '{0:.2f}'.format(np.round(x,2)) for x in pi[env.grid_shape[1]*r : (env.grid_shape[1]*r + env.grid_shape[1])]]) #logger.info('MCTS perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(MCTS_value,2)) #logger.info('NN perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(NN_value,2)) #logger.info('====================') ### Do the action state, value, done, _ = env.step(action) #the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move env.gameState.render(logger) if done == 1: if memory != None: #### If the game is finished, assign the values correctly to the game moves for move in memory.stmemory: if move['playerTurn'] == state.playerTurn: move['value'] = value else: move['value'] = -value memory.commit_ltmemory() if value == 1: logger.info('%s WINS!', players[state.playerTurn]['name']) scores[players[state.playerTurn]['name']] = scores[players[state.playerTurn]['name']] + 1 if state.playerTurn == 1: sp_scores['sp'] = sp_scores['sp'] + 1 else: sp_scores['nsp'] = sp_scores['nsp'] + 1 elif value == -1: logger.info('%s WINS!', players[-state.playerTurn]['name']) scores[players[-state.playerTurn]['name']] = scores[players[-state.playerTurn]['name']] + 1 if state.playerTurn == 1: sp_scores['nsp'] = sp_scores['nsp'] + 1 else: sp_scores['sp'] = sp_scores['sp'] + 1 else: logger.info('DRAW...') scores['drawn'] = scores['drawn'] + 1 sp_scores['drawn'] = sp_scores['drawn'] + 1 pts = state.score points[players[state.playerTurn]['name']].append(pts[0]) points[players[-state.playerTurn]['name']].append(pts[1]) return (scores, memory, points, sp_scores)
def retraining_worker(conn): from game import Game import initialise import config from config import PLAYER_COUNT, TEAM_SIZE, BATCH_SIZE, TRAINING_LOOPS from model import Residual_CNN, import_tf import_tf(1024 * 2) import numpy as np import time env = Game() ######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file if len(env.grid_shape) == 2: shape = (1,) + env.grid_shape else: shape = env.grid_shape if TEAM_SIZE > 1: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) else: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) # If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: m_tmp = current_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION) current_NN.model.set_weights(m_tmp.get_weights()) train_overall_loss = [] while 1: # request memory samples conn.send((TRAINING_LOOPS, BATCH_SIZE)) # wait for memory samples conn.poll(None) data = conn.recv() if data: # train on sampled memories for i, minibatch in enumerate(data): training_states = np.array([current_NN.convertToModelInput(row['state']) for row in minibatch]) training_targets = {'value_head': np.array([row['value'] for row in minibatch])} fit = current_NN.fit(training_states, training_targets, epochs=config.EPOCHS, verbose=1,\ validation_split=0, batch_size=32) if i == 0: init_loss = fit.history['loss'][0] train_overall_loss.append(round(fit.history['loss'][config.EPOCHS - 1], 4)) """display.clear_output(wait=True) display.display(pl.gcf()) pl.gcf().clear() time.sleep(.25) print('\n') current_NN.printWeightAverages() print("Max = {0}, Min = {1}, latest = {2}".format(max(self.train_overall_loss), min(self.train_overall_loss), self.train_overall_loss[-1])) print("Loss reduction: {}".format(init_loss - fit.history['loss'][0]))""" # send new current_NN weights conn.send((current_NN.model.get_weights(), train_overall_loss[-1])) else: time.sleep(10)
move39 = move41.copy() all_dict = [move38, move39, move41, move37, move40] moves = ['24', '32', '41', '37', '40'] j = 0 state = GameState( np.array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, 0, 1, 1, 0, 0 ], dtype=np.int), 1) # Each versions must predict the best move, we plot the prediction score against the version number for player_idx, _ in enumerate(version_list_CNN): m_tmp = player1_NN.read(initialise.INITIAL_RUN_NUMBER, player_idx + 1) player1_NN.model.set_weights(m_tmp.get_weights()) player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) # m_tmp = player2_NN.read(initialise.INITIAL_RUN_NUMBER, player_idx + 1) # player2_NN.model.set_weights(m_tmp.get_weights()) # player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) move37[player_idx] = player1.get_preds(state)[0] # move40[player_idx] = player1.get_preds(state)[1][40] # move41[player_idx] = player1.get_preds(state)[1][41] # move38[player_idx] = player1.get_preds(state)[1][24] # move39[player_idx] = player1.get_preds(state)[1][32] plt.figure()
str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb")) # Create an untrained neural network objects from the config file current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) # Load existing neural network if needed if initialise.INITIAL_MODEL_VERSION is not None: best_player_version = initialise.INITIAL_MODEL_VERSION print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = best_NN.read(initialise.INITIAL_RUN_NUMBER, best_player_version) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) # Otherwise ensure the initial weights are the same for both players else: best_player_version = 0 best_NN.model.set_weights(current_NN.model.get_weights()) print('\n') print(current_NN.model.summary()) # Create players current_player = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent('best_player', env.state_size, env.action_size,
memory = Memory(config.MEMORY_SIZE) else: print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...') memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb" ) ) ######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) #If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: best_player_version = initialise.INITIAL_MODEL_VERSION print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) #otherwise just ensure the weights on the two players are the same else: best_player_version = 0 best_NN.model.set_weights(current_NN.model.get_weights()) #copy the config file to the run folder copyfile('./config.py', run_folder + 'config.py') plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True) print('\n') ######## CREATE THE PLAYERS ########