def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0): if player1version == -1: player1 = User('player1', env.state_size, env.action_size) else: player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: player2 = User('player2', env.state_size, env.action_size) else: player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first) return (scores, memory, points, sp_scores)
def _selfplay(n): chessenv = Game() memory = Memory(config.MEMORY_SIZE) current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119, ) + chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119, ) + chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS) best_NN.model.set_weights(current_NN.model.get_weights()) current_player = Agent('current_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent('best_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, best_NN) t0 = time.perf_counter() print('Proc {0} start'.format(n)) _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0=config.TURNS_UNTIL_TAU0, memory=memory) t1 = time.perf_counter() - t0 print('Proc {0} done in {1} seconds'.format(n, t1)) return memory
def start(self, data): # Generate Game from initial json board_json = data[BOARD_KEY] # Find grid shape self.grid_shape=(board_json[WIDTH_KEY], board_json[HEIGHT_KEY]) self.w, self.h = self.grid_shape # Find the snake positions. snakes = board_json[SNAKES_KEY] # Find your position your_id = data[YOU_KEY][ID_KEY] starting_pos_json = [] for snake in snakes: if snake[ID_KEY] == your_id: starting_pos_json = [snake[BODY_KEY][0]] + starting_pos_json else: starting_pos_json = starting_pos_json + [snake[BODY_KEY][0]] starting_pos = [] for pos_json in starting_pos_json: starting_pos += [(pos_json['x'], pos_json['y'])] self.num_players = len(snakes) + 1 # Food layer immediately after players self.food_layer = self.num_players * 2 # Other Layer immediately after food self.other_layer = self.food_layer + 1 # Total layers self.num_layers = self.other_layer + 1 # Find the food positions. foods = board_json[FOOD_KEY] starting_food = [] for pos_json in foods: starting_food += (pos_json['x'], pos_json['y']) print("Creating new Game") # Create Game env = Game(self.grid_shape, self.num_players, starting_pos, starting_food) print("Creating new Agent") # Create Agent player_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) player_network = player_NN.read(env.name, self.run_version, self.playerversion) player_NN.model.set_weights(player_network.get_weights()) # self.player = Agent('player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player_NN) self.player = Agent('player', env.state_size, env.action_size, 50, config.CPUCT, player_NN)
def TTA_fitting(tr_gen, val_gen, test_gen, dummy_test_gen): fold_val, fold_test = [], [] for i in range(N_ENSMBLE): callbacks = [ # val_lossが下がらなくなった際に学習を終了するコールバック EarlyStopping(monitor='val_loss', patience=30, verbose=1), # val_lossが下がらなくなった際に学習率を下げるコールバック ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, min_lr=1e-9), # val_categorical_accuracyが最大のmodelを保存するコールバック ModelCheckpoint(savePath, monitor='val_categorical_accuracy', save_best_only=True) ] # load data RCNN = Residual_CNN(N_BLOCK, N_LAYER, FILTER, KERNEL) model = RCNN.build_model() # model training model.fit_generator(tr_gen, callbacks, steps_per_epoch=tr_x.shape[0] // BATCH_SIZE, epochs=EPOCH, validation_data=val_gen, validation_steps=val_x.shape[0] // BATCH_SIZE) # TTA dummy test data val_pred = TTA(model, dummy_test_gen, sub_epoch=10) tta_val_acc = accuracy_score(np.argmax(val_y, 1), np.argmax(val_pred, 1)) print(f"TTA_vaild_accuracy : {tta_val_acc}") # TTA test data test_pred = TTA(model, test_gen, sub_epoch=10) # stacking TTA prediction fold_val.append(val_pred) fold_test.append(test_pred) # TTA ensmble accuracy if i != 0: mean_val_pred = np.mean(fold_val, axis=0) mean_test_pred = np.mean(fold_test, axis=0) ensmble_tta_val_acc = accuracy_score(np.argmax(val_y, 1), np.argmax(mean_val_pred, 1)) print(f"ensmbleTTA_vaild_accuracy : {ensmble_tta_val_acc}") # saving TTA ensmble prediction of test data submission = pd.Series(mean_test_pred, name='label') submission.to_csv(os.path.join( PATH, 'submission_ensemble_' + str(i + 1) + ".csv"), header=True, index_label='id')
def __init__(self, enable_cache=False): self.nn_start = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, config.INPUT_START_DIM, config.OUTPUT_START_DIM, config.HIDDEN_CNN_LAYERS) self.nn = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, config.INPUT_DIM, config.OUTPUT_DIM, config.HIDDEN_CNN_LAYERS) self.enable_cache = enable_cache self.cache = {}
def Fold_fitting(train_x, train_y, test): N_FOLD = 5 RCNN = Residual_CNN(N_BLOCK, N_LAYER, FILTER, KERNEL) model = RCNN.build_model() fold = StratifiedKFold(n_splits=N_FOLD, random_state=1103) pred = [] acc = [] for i, (tr_idx, val_idx) in enumerate(fold.split(train_x, np.argmax(train_y, 1))): callbacks = [ # val_lossが下がらなくなった際に学習を終了するコールバック EarlyStopping(monitor='val_loss', patience=30, verbose=1), # val_lossが下がらなくなった際に学習率を下げるコールバック ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, min_lr=1e-9), # val_categorical_accuracyが最大のmodelを保存するコールバック ModelCheckpoint(savePath, monitor='val_categorical_accuracy', save_best_only=True) ] print(f"==========={i}_fold start ================= ") train_split = train_x[tr_idx], train_y[val_idx] val_split = val_y[tr_idx], val_y[val_idx] tr_gen, val_gen, test_gen, dummy_test_gen = get_argment_generator( train_split, val_split, train, test, BATCH_SIZE) h = model.fit_generator(tr_gen, epochs=EPOCH, steps_per_epoch=len(tr_x) // BATCH_SIZE, validation_data=val_gen, validation_steps=len(val_x) // BATCH_SIZE, callbacks=callbacks) val_pred = model.predict(dummy_test_gen) test_pred = model.predict(test_gen) val_score = accuracy_score(np.argmax(val_split[1], 1), val_pred) print(f"fold_{i} val accuracy : {val_score}") pred.append(test_pred) mean_pred = np.mean(pred, axis=0) pred_y = np.argmax(mean_pred, 1) submission = pd.Series(pred_y, name='label') submission.to_csv(os.path.join(PATH, f"submission_fold{N_FOLD}.csv"), header=True, index_label='id')
def __init__(self, model_number, nbMatches, alphaMaxDepth=4): # Determines both probability of choosing correct move and time to complete. self.alphaMaxDepth = alphaMaxDepth # Environments self.zeroEnv, self.alphaEnv = None, None self.reset_envs() # Players self.zeroPlayer = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + self.zeroEnv.grid_shape, self.zeroEnv.action_size, config.HIDDEN_CNN_LAYERS) self.alphaPlayer = AlphaBeta(board=self.alphaEnv, max_depth=alphaMaxDepth) # Setting weights and initiating agent. m_tmp = self.zeroPlayer.read(initialise.INITIAL_RUN_NUMBER, model_number) self.zeroPlayer.model.set_weights(m_tmp.get_weights()) self.zeroPlayer = Agent('player1', self.zeroEnv.state_size, self.zeroEnv.action_size, config.MCTS_SIMS, config.CPUCT, self.zeroPlayer) self.results = 0 # Playing matches for x in range(nbMatches): self.results += self.playMatch() self.reset_envs()
def test_play_matches_neural_network(self): memory = Memory(config.MEMORY_SIZE) # At the beginning, we set a random model. It will be similar to an untrained CNN, and quicker. # We also set config.MCTS_SIMS, which is rather low, and will produce poor estimations from the MCTS. # The idea is encourage exploration and generate a lot of boards in memory, even if the probabilities # associated to their possible actions are wrong. # Memory is completed at the end of the game according to the final winner, in order to correct the values # of each move. All the moves of the winner receive value=1 and all the moves of the loser receive value=-1 # The neural network will learn to predict the probabilities and the values. # It will learn wrong probas and values at the beginning, but after some time, the CNN and the neural network # will improve from eachother and converge. player1 = Agent('cnn_agent_1', config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, GenRandomModel()) player2 = Agent('cnn_agent_2', config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, GenRandomModel()) scores, memory, points, sp_scores = play_matches.playMatches(player1, player2, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory) # play_matches.playMatches() has copied stmemory to ltmemory, so we can clear stmemory safely memory.clear_stmemory() cnn1 = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + config.GRID_SHAPE, config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS) cnn2 = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + config.GRID_SHAPE, config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS) cnn2.model.set_weights(cnn1.model.get_weights()) cnn1.plot_model() player1.model = cnn1 ######## RETRAINING ######## player1.replay(memory.ltmemory) for _ in range(1): scores, memory, points, sp_scores = play_matches.playMatches(player1, player2, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory) # play_matches.playMatches() has copied stmemory to ltmemory, so we can clear stmemory safely memory.clear_stmemory() player1.replay(memory.ltmemory) print('TOURNAMENT...') scores, _, points, sp_scores = play_matches.playMatches(player1, player2, config.EVAL_EPISODES, lg.logger_main, turns_until_tau0 = 0, memory = None) print('\nSCORES') print(scores) print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES') print(sp_scores)
def __init__(self, env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first=0): self.EPISODES = EPISODES self.turns_until_tau0 = turns_until_tau0 self.logger = logger self.goes_first = goes_first if player1version == -1: self.player1 = User('player1', env.state_size, env.action_size) else: self.player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: self.player1_network = self.player1_NN.read( env.name, run_version, player1version) self.player1_NN.model.set_weights( self.player1_network.get_weights()) self.player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, self.player1_NN) if player2version == -1: self.player2 = User('player2', env.state_size, env.action_size) else: self.player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: self.player2_network = self.player2_NN.read( env.name, run_version, player2version) self.player2_NN.model.set_weights( self.player2_network.get_weights()) self.player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, self.player2_NN)
def main(): env = Game() if initial.INITIAL_MEMORY_VERSION == None: memory = Memory(config.MEMORY_SIZE) # load neural network current_NN = Residual_CNN(config.REG_CONST,config.LEARNING_RATE,(2,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) #???? best_NN = Residual_CNN(config.REG_CONST,config.LEARNING_RATE,(2,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) best_player_version = 0 best_NN.model.set_weights(current_NN.model.get_weights()) #create players current_player = best_player = iter = 0 while 1: pass
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0): if player1version == -1: player1 = User('player1', env.state_size, env.action_size) else: player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: player2 = User('player2', env.state_size, env.action_size) else: player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) printmoves = player1version == -1 or player2version == -1 scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first, printmoves) return (scores, memory, points, sp_scores)
def loadAgent(env, config, name): agent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) agent_network = load_model("./models/" + name + '.h5', custom_objects={ 'softmax_cross_entropy_with_logits': softmax_cross_entropy_with_logits }) agent_NN.model.set_weights(agent_network.get_weights()) agent = Agent(name, env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, agent_NN) return agent
def predict(): current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + (6, 7), 42, config.HIDDEN_CNN_LAYERS) current_NN.model.set_weights( current_NN.read('connect4', 2, 74).get_weights()) current_player = Agent('current_player', 84, 42, config.MCTS_SIMS, config.CPUCT, current_NN) if request.method == 'POST': try: data = request.get_json() gs = GameState(np.array(json.loads(data["gameState"])), 1) #print(gs) preds = current_player.get_preds(gs) preds = np.array(preds[1]).reshape(6, 7) pred_arg = np.unravel_index(preds.argmax(), preds.shape) except ValueError: return jsonify("Please enter a proper GameState.") return jsonify([int(x) for x in pred_arg]) if request.method == 'GET': return "Hello World! GET request"
def train_network(agent, train_phase): if train_phase[0] == 'start': net = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, config.INPUT_START_DIM, config.OUTPUT_START_DIM, config.HIDDEN_CNN_LAYERS) net_str = 's' elif train_phase[0] == 'general': net = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, config.INPUT_DIM, config.OUTPUT_DIM, config.HIDDEN_CNN_LAYERS) net_str = 'g' net.read(agent, net_str) #validation_file = choice(listdir('validation_states')) #with open('validation_states\\' + validation_file, 'rb') as input_file: # validation = pickle.load(input_file) min_val_error = 10000.0 for i in range(config.TRAINING_LOOPS): print("Iteration #" + str(i)) game_file = choice(listdir('train_states')) with open('train_states\\' + game_file, 'rb') as input_file: game_memory = pickle.load(input_file) remove('train_states\\' + game_file) hist = net.fit(game_memory['batch_states'], game_memory['batch_targets'], config.EPOCHS, 2, 0.0, 32) #metric = hist.history['val_loss'][config.EPOCHS - 1] #if metric < min_val_error: # min_val_error = metric net.write(agent, net_str) print("Min Loss: " + str(min_val_error))
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first=0): env = Game() if player1version == -1: player1 = User("user1", env.state_size, env.action_size) else: player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: name = env.name + "{0:0>4}".format(player1version) if Provider.getNetByName(name) == None: return player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) netName = env.name + "{0:0>4}".format(player1version) player1 = Agent(netName, env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: name = input('enter username: ') user2 = Provider.getPersonByName(name) player2 = User(user2.name, env.state_size, env.action_size) else: player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: name = env.name + "{0:0>4}".format(player2version) if Provider.getNetByName(name) == None: return player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) net2Name = env.name + "{0:0>4}".format(player2version) player2 = Agent(net2Name, env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first) return (scores, memory, points, sp_scores)
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first=0): #-1代表的是玩家 if player1version == -1: player1 = User('player1', env.state_size, env.action_size) else: #Residual_CNN 返回的是一个x player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player1version > 0: #如果不是玩家,则读取训练好的版本及相关权重 player1_network = player1_NN.read(env.name, run_version, player1version) player1_NN.model.set_weights(player1_network.get_weights()) #对其进行模拟,以及mcts树的构建 player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: player2 = User('player2', env.state_size, env.action_size) else: player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) if player2version > 0: player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first) return (scores, memory, points, sp_scores)
chessenv = Game() memory = Memory(config.MEMORY_SIZE) memory.commit_stmemory(env.identities,env.gameState,env.actionSpace) memory.stmemory pool = multiprocessing.Pool(2) out = zip(pool.map(_selfplay, range(0, 2))) t = tuple(out) len(t) chessenv.action_size chessenv.state_size chessenv.grid_shape current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119,)+chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119,)+chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS) best_NN.model.set_weights(current_NN.model.get_weights()) best_player_version = 2 print('LOADING MODEL VERSION ' + str(2) + '...') m_tmp = best_NN.read(chessenv.name, 2, best_player_version) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) current_player = Agent('current_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent('best_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, best_NN) state = chessenv.reset() state.render(None)
lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*') lg.logger_main.info('=*=*=*=*=*=. NEW LOG =*=*=*=*=*') lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*') env = Game() import config memory = Memory(config.MEMORY_SIZE) ######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, Game.InputShape, Game.ActionSize, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, Game.InputShape, Game.ActionSize, config.HIDDEN_CNN_LAYERS) #If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: best_player_version = initialise.INITIAL_MODEL_VERSION print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = current_NN.read(Game.Name, initialise.INITIAL_RUN_NUMBER, best_player_version) current_NN.model.set_weights(m_tmp.get_weights()) m_tmp = best_NN.read(Game.Name, initialise.INITIAL_RUN_NUMBER, best_player_version) best_NN.model.set_weights(m_tmp.get_weights()) #otherwise just ensure the weights on the two players are the same else: best_player_version = 0 best_NN.model.set_weights(current_NN.model.get_weights())
open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/decision_" + str(d_t) + "_memory" + str(MEM_VERSION).zfill(4) + ".p", "rb"))) ######## LOAD MODEL IF NECESSARY ######## current_NN = [] best_NN = [] # create an untrained neural network objects from the config file for i in range(DECISION_TYPES): current_NN.append( Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1, ) + env.grid_shape, env.action_size[i], config.HIDDEN_CNN_LAYERS, i)) best_NN.append( Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1, ) + env.grid_shape, env.action_size[i], config.HIDDEN_CNN_LAYERS, i)) best_player_version = [] # If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != [None] * DECISION_TYPES: for i, version in enumerate(initialise.INITIAL_MODEL_VERSION): best_player_version.append(initialise.INITIAL_MODEL_VERSION[i]) print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION[i]) + '...') m_tmp = best_NN[i].read(env.name, initialise.INITIAL_RUN_NUMBER, version)
str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb")) if memories.MEMORY_SIZE != MEMORY_SIZE: memories.extension(MEMORY_SIZE) ######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file if len(env.grid_shape) == 2: shape = (1, ) + env.grid_shape else: shape = env.grid_shape if TEAM_SIZE > 1: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) else: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)
env = Game() # # If loading an existing neural network, copy the config file to root if initialise.INITIAL_RUN_NUMBER != None: copyfile( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py') import config # ######## LOAD MEMORIES IF NECESSARY ######## if initialise.INITIAL_MEMORY_VERSION == None: memory = Memory(config.MEMORY_SIZE) else: print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...') memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb")) current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) current_NN.write(1, 1)
def evaluation_worker(conn): import config from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE import initialise from model import Residual_CNN, import_tf import_tf(1024 * 3) from game import Game from agent import Agent from memory import Memory from funcs import playMatches import loggers as lg import logging import time # initialise new test memory test_memories = Memory(int(MEMORY_SIZE / 10)) env = Game() # initialise new models # create an untrained neural network objects from the config file if len(env.grid_shape) == 2: shape = (1, ) + env.grid_shape else: shape = env.grid_shape if TEAM_SIZE > 1: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) else: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) current_player_version = 0 best_player_version = 0 # If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: best_player_version = initialise.INITIAL_MODEL_VERSION #print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) # otherwise just ensure the weights on the two players are the same else: best_NN.model.set_weights(current_NN.model.get_weights()) current_player = Agent('current_player', config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN) time.sleep(20) while 1: # request current_NN weights conn.send(current_player_version) # wait indefinitely for current_NN weights conn.poll(None) data = conn.recv() if data: # set current_NN weights current_NN.model.set_weights(data) current_player_version += 1 # play tournament games tourney_players = [] if TEAM_SIZE > 1: for i in range(int(PLAYER_COUNT / TEAM_SIZE)): # for each team for k in range( TEAM_SIZE ): # alternate adding best_players and current_players up to the TEAM_SIZE if k % 2 == 0: tourney_players.append(best_player) else: tourney_players.append(current_player) else: for i in range(PLAYER_COUNT): if i % 2 == 0: tourney_players.append(best_player) else: tourney_players.append(current_player) scores, test_memories = playMatches(tourney_players, config.EVAL_EPISODES, lg.logger_tourney, 0.0, test_memories, evaluation=True) test_memories.clear_stmemory() # if the current player is significantly better than the best_player replace the best player # the replacement is made by just copying the weights of current_player's nn to best_player's nn if scores['current_player'] > scores[ 'best_player'] * config.SCORING_THRESHOLD: # if current_NN won send message conn.send(((current_player_version, best_player_version), str(scores))) best_player_version = best_player_version + 1 best_NN.model.set_weights(current_NN.model.get_weights()) best_NN.write(env.name, best_player_version) if len( test_memories.ltmemory ) == test_memories.MEMORY_SIZE and current_player_version % 5 == 0: pickle.dump( memories, open( run_folder + "memory/test_memory" + str(current_player_version).zfill(4) + ".p", "wb")) #print("Evaluating performance of current_NN") #current_player.evaluate_accuracy(test_memories.ltmemory) #print('\n') else: time.sleep(10)
def self_play_worker(conn): import os import config from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE from memory import Memory from settings import run_folder, run_archive_folder import initialise from game import Game, GameState from agent import Agent from model import Residual_CNN, import_tf import_tf(1024 * 3) from shutil import copyfile from funcs import playMatches import loggers as lg import logging import random env = Game() ######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file if len(env.grid_shape) == 2: shape = (1, ) + env.grid_shape else: shape = env.grid_shape if TEAM_SIZE > 1: best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) else: best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) best_player_version = 0 best_NN.model.set_weights(opponent_NN.model.get_weights()) best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN) opponent_player = Agent('selected_opponent', config.MCTS_SIMS, config.CPUCT, opponent_NN) if initialise.INITIAL_ITERATION != None: iteration = initialise.INITIAL_ITERATION else: iteration = 0 memories = Memory(150 * config.EPISODES) while 1: iteration += 1 # request best_NN weights conn.send(best_player_version) # wait indefinitely for best_NN weights conn.poll(None) data = conn.recv() #print('recieved: {}'.format(data)) # if weights different set weights if data: best_NN.model.set_weights(data[1]) best_player_version = data[0] if len(memories.ltmemory) != 0: # send new memories (skip first loop) conn.send(memories.ltmemory) memories = Memory(150 * config.EPISODES) ######## CREATE LIST OF PLAYERS ####### # for training it is just 2 copies of the best_player vs. 2 copies of another randomly selected model filenames = os.listdir('run/models/') filenames = [name for name in filenames if '.h5' == name[-3:]] if filenames: opponent = random.choice(filenames) m_tmp = opponent_NN.read_specific('run/models/' + opponent) opponent_NN.model.set_weights(m_tmp.get_weights()) self_play_players = [] for i in range(PLAYER_COUNT): if i % 2 == 0: self_play_players.append(best_player) else: self_play_players.append(opponent_player) else: self_play_players = [] for i in range(PLAYER_COUNT): self_play_players.append(best_player) #print("Version {} randomly selected to play against version {}".format(int(opponent[-7:-3]), best_player_version)) ######## SELF PLAY ######## #epsilon = init_epsilon - iteration * (init_epsilon / 50.0) epsilon = 0 #print('Current epsilon: {}'.format(epsilon)) print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...') _, memories = playMatches(self_play_players, config.EPISODES, lg.logger_main, epsilon, memory=memories)
class Agent_NN: def __init__(self, enable_cache=False): self.nn_start = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, config.INPUT_START_DIM, config.OUTPUT_START_DIM, config.HIDDEN_CNN_LAYERS) self.nn = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, config.INPUT_DIM, config.OUTPUT_DIM, config.HIDDEN_CNN_LAYERS) self.enable_cache = enable_cache self.cache = {} def purge_cache(self): self.cache = {} def nn_read(self, name): self.nn_start.read(name, 's') self.nn.read(name, 'g') def nn_write(self, name): self.nn_start.write(name, 's') self.nn.write(name, 'g') def predict(self, state, perspective, mcts): network = self.build_nn_input(state, perspective, mcts=mcts) if network.shape[1] == config.INPUT_DIM[0]: return self.nn.predict(network) else: return self.nn_start.predict(network) def build_start_nn_input(self, state, perspective): nn_input = np.zeros( (1, config.INPUT_START_DIM[0], config.INPUT_START_DIM[1], config.INPUT_START_DIM[2]), dtype=np.float32) numbers_output = { 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 8: 5, 9: 4, 10: 3, 11: 2, 12: 1 } rotation = np.random.randint(12) if self.enable_cache is True and rotation in self.cache: nn_input[:, :11, :, :] = self.cache[rotation] else: # Resources outputs for number, tile in state.numbers: resource = state.tiles[tile] for vertex in config.tiles_vertex[tile]: nn_input[0, resource - 2, config.vertex_to_nn_input[rotation][vertex][0], config.vertex_to_nn_input[rotation][vertex] [1]] += numbers_output[number] / 15.0 # Ports for key, r in enumerate([ config.SHEEP, config.ORE, config.BRICK, config.WHEAT, config.WOOD, config.GENERIC ]): indices = [i for i, x in enumerate(state.ports) if x == r] for i in indices: for vertex in config.ports_vertex[i]['vert']: nn_input[ 0, key + 5, config.vertex_to_nn_input[rotation][vertex][0], config.vertex_to_nn_input[rotation][vertex][1]] = 1 if self.enable_cache is True: self.cache[rotation] = nn_input[:, :11, :, :] # Settlements, cities, roads for p in range(4): p_order = (4 + p - perspective) % 4 for s in state.players[p].settlements: nn_input[0, 11 + 2 * p_order, config.vertex_to_nn_input[rotation][s][0], config.vertex_to_nn_input[rotation][s][1]] = 1 for r in state.players[p].roads: nn_input[ 0, 12 + 2 * p_order, config.vertex_to_nn_input[rotation][r[0]][0], config.vertex_to_nn_input[rotation][r[0]][1]] += 1 / 3.0 nn_input[ 0, 12 + 2 * p_order, config.vertex_to_nn_input[rotation][r[1]][0], config.vertex_to_nn_input[rotation][r[1]][1]] += 1 / 3.0 # Cards for p in range(4): p_order = (4 + p - perspective) % 4 for key, r in enumerate([ config.SHEEP, config.ORE, config.BRICK, config.WHEAT, config.WOOD ]): nn_input[0, 19 + key + 5 * p_order, :, :] = state.players[p].cards[r] / 10.0 # State if (state.game_phase == config.PHASE_INITIAL_SETTLEMENT or state.game_phase == config.PHASE_INITIAL_ROAD ) and state.initial_phase_decrease == 0: nn_input[0, 39, :, :] = 1 if (state.game_phase == config.PHASE_INITIAL_SETTLEMENT or state.game_phase == config.PHASE_INITIAL_ROAD ) and state.initial_phase_decrease == 1: nn_input[0, 40, :, :] = 1 # Player turn for p in range(4): p_order = (4 + p - perspective) % 4 if p == state.player_turn: nn_input[0, 41 + p_order, :, :] = 1 return nn_input def build_nn_input(self, state, perspective, mcts=None): if state.game_phase == config.PHASE_INITIAL_SETTLEMENT or state.game_phase == config.PHASE_INITIAL_ROAD: return self.build_start_nn_input(state, perspective) nn_input = np.zeros( (1, config.INPUT_DIM[0], config.INPUT_DIM[1], config.INPUT_DIM[2]), dtype=np.float32) numbers_output = { 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 8: 5, 9: 4, 10: 3, 11: 2, 12: 1 } rotation = np.random.randint(12) if self.enable_cache is True and rotation in self.cache: nn_input[:, :11, :, :] = self.cache[rotation] else: # Resources outputs for number, tile in state.numbers: resource = state.tiles[tile] for vertex in config.tiles_vertex[tile]: nn_input[0, resource - 2, config.vertex_to_nn_input[rotation][vertex][0], config.vertex_to_nn_input[rotation][vertex] [1]] += numbers_output[number] / 15.0 # Ports for key, r in enumerate([ config.SHEEP, config.ORE, config.BRICK, config.WHEAT, config.WOOD, config.GENERIC ]): indices = [i for i, x in enumerate(state.ports) if x == r] for i in indices: for vertex in config.ports_vertex[i]['vert']: nn_input[ 0, key + 5, config.vertex_to_nn_input[rotation][vertex][0], config.vertex_to_nn_input[rotation][vertex][1]] = 1 if self.enable_cache is True: self.cache[rotation] = nn_input[:, :11, :, :] # Settlements, cities, roads for p in range(4): p_order = (4 + p - perspective) % 4 for s in state.players[p].settlements: nn_input[0, 11 + 3 * p_order, config.vertex_to_nn_input[rotation][s][0], config.vertex_to_nn_input[rotation][s][1]] = 1 for c in state.players[p].cities: nn_input[0, 12 + 3 * p_order, config.vertex_to_nn_input[rotation][c][0], config.vertex_to_nn_input[rotation][c][1]] = 1 for r in state.players[p].roads: nn_input[ 0, 13 + 3 * p_order, config.vertex_to_nn_input[rotation][r[0]][0], config.vertex_to_nn_input[rotation][r[0]][1]] += 1 / 3.0 nn_input[ 0, 13 + 3 * p_order, config.vertex_to_nn_input[rotation][r[1]][0], config.vertex_to_nn_input[rotation][r[1]][1]] += 1 / 3.0 # Cards for p in range(4): p_order = (4 + p - perspective) % 4 for key, r in enumerate([ config.SHEEP, config.ORE, config.BRICK, config.WHEAT, config.WOOD ]): nn_input[0, 23 + key + 5 * p_order, :, :] = state.players[p].cards[r] / 10.0 # Robber for vertex in config.tiles_vertex[state.robber_tile]: nn_input[0, 43, config.vertex_to_nn_input[rotation][vertex][0], config.vertex_to_nn_input[rotation][vertex][1]] = 1 # Army Cards Played for p in range(4): p_order = (4 + p - perspective) % 4 nn_input[0, 44 + p_order, :, :] = state.players[p].used_knights / 5.0 # Army Holder for p in range(4): p_order = (4 + p - perspective) % 4 nn_input[0, 48 + p_order, :, :] = state.players[p].largest_army_badge # Longest Road Holder for p in range(4): p_order = (4 + p - perspective) % 4 nn_input[0, 52 + p_order, :, :] = state.players[p].longest_road_badge # Special Cards for p in range(4): p_order = (4 + p - perspective) % 4 for key, r in enumerate([ config.VICTORY_POINT, config.KNIGHT, config.MONOPOLY, config.ROAD_BUILDING, config.YEAR_OF_PLENTY ]): nn_input[0, 56 + key + 5 * p_order, :, :] = state.players[p].special_cards.count( r) / 3.0 # Discarding, initial game phase if state.game_phase == config.PHASE_DISCARD: nn_input[0, 76, :, :] = 1 # Player turn for p in range(4): p_order = (4 + p - perspective) % 4 if p == state.player_turn: nn_input[0, 77 + p_order, :, :] = 1 # Other game phases if state.game_phase == config.PHASE_THROW_DICE: nn_input[0, 81, :, :] = 1 if state.game_phase == config.PHASE_MOVE_ROBBER: nn_input[0, 82, :, :] = 1 if state.game_phase == config.PHASE_STEAL_CARD: nn_input[0, 83, :, :] = 1 if state.game_phase == config.PHASE_ROAD_BUILDING: nn_input[0, 84, :, :] = 1 if state.game_phase == config.PHASE_YEAR_OF_PLENTY: nn_input[0, 85, :, :] = 1 if state.game_phase == config.PHASE_TRADE_RESPOND: nn_input[0, 86, :, :] = 1 for s in range(54): if state.available_settlement_spot(s): nn_input[0, 87, config.vertex_to_nn_input[rotation][s][0], config.vertex_to_nn_input[rotation][s][1]] = 1 return nn_input
# If loading an existing neural network, copy the config file to root #if INITIAL_RUN_NUMBER != None: #copyfile(run_archive_folder + env.name + '/run' + str(INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py') if INITIAL_MEMORY_VERSION == None: memory = Memory(config.MEMORY_SIZE) else: print('LOADING MEMORY VERSION ' + str(INITIAL_MEMORY_VERSION) + '...') memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb")) current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) best_player_version = 0 best_NN.model.set_weights(current_NN.model.get_weights()) copyfile('./config.py', run_folder + 'config.py') # plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True) current_player = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)
idx_end = mem_size memory_x[idx_start:idx_end] = x memory_y_value[idx_start:idx_end] = y_value memory_y_policy[idx_start:idx_end] = y_policy idx_start += len(x) n_games += 1 pbar.update(len(x)) pbar.set_postfix(game=n_games, tot_games=i + 1) # Build Model model = Residual_CNN( reg_const=0.0001, learning_rate=0.01, input_dim=(9, 9, 7), output_dim=9 * 9 + 1, hidden_layers=[{ 'filters': 128, 'kernel_size': (3, 3) }] * 10, ) try: model.load('Go', '0.1') print("Loaded Model") except: print("Training New Model") # Train Model model.model.compile( loss={ 'value_head': 'binary_crossentropy', 'policy_head': 'categorical_crossentropy'
import config ######## LOAD MEMORIES IF NECESSARY ######## if initialise.INITIAL_MEMORY_VERSION == None: memory = Memory(config.MEMORY_SIZE) else: print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...') memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb" ) ) ######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) #If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: best_player_version = initialise.INITIAL_MODEL_VERSION print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) #otherwise just ensure the weights on the two players are the same else: best_player_version = 0 best_NN.model.set_weights(current_NN.model.get_weights()) #copy the config file to the run folder copyfile('./config.py', run_folder + 'config.py')
def retraining_worker(conn): from game import Game import initialise import config from config import PLAYER_COUNT, TEAM_SIZE, BATCH_SIZE, TRAINING_LOOPS from model import Residual_CNN, import_tf import_tf(1024 * 2) import numpy as np import time env = Game() ######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file if len(env.grid_shape) == 2: shape = (1,) + env.grid_shape else: shape = env.grid_shape if TEAM_SIZE > 1: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) else: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) # If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: m_tmp = current_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION) current_NN.model.set_weights(m_tmp.get_weights()) train_overall_loss = [] while 1: # request memory samples conn.send((TRAINING_LOOPS, BATCH_SIZE)) # wait for memory samples conn.poll(None) data = conn.recv() if data: # train on sampled memories for i, minibatch in enumerate(data): training_states = np.array([current_NN.convertToModelInput(row['state']) for row in minibatch]) training_targets = {'value_head': np.array([row['value'] for row in minibatch])} fit = current_NN.fit(training_states, training_targets, epochs=config.EPOCHS, verbose=1,\ validation_split=0, batch_size=32) if i == 0: init_loss = fit.history['loss'][0] train_overall_loss.append(round(fit.history['loss'][config.EPOCHS - 1], 4)) """display.clear_output(wait=True) display.display(pl.gcf()) pl.gcf().clear() time.sleep(.25) print('\n') current_NN.printWeightAverages() print("Max = {0}, Min = {1}, latest = {2}".format(max(self.train_overall_loss), min(self.train_overall_loss), self.train_overall_loss[-1])) print("Loss reduction: {}".format(init_loss - fit.history['loss'][0]))""" # send new current_NN weights conn.send((current_NN.model.get_weights(), train_overall_loss[-1])) else: time.sleep(10)
# The idea is as follows: # player1 has to play. His Monte Carlo Tree Search does N simulations in order to evaluate the best possible move. # If N is very big, like 5000+, the estimation should be quite accurate. But if N is smaller, like 50, the estimate # will be wrong because the MCTS will stop after 2 or 3 moves maximum, and the "expectations" at the leaves corresponding # to these game states will not be estimated correctly. # This is where the neural networks arrives: if trained on a big number of states (with correct expectations), # it will be able to predict correctly the state of the leaves of the MCTS, and the global estimation of the MCTS # will be much better. # As the beginning, the predictions of the neural network will be wrong, so the results of the MCTS will be wrong as well, # but after enough games, both the neural network and the MCTS will improve and converge. env = Game() # create an untrained neural network objects from the config file current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + config.GRID_SHAPE, config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + config.GRID_SHAPE, config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS) best_NN.model.set_weights(current_NN.model.get_weights()) current_player = Agent(CURRENT_PLAYER_NAME, config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent(BEST_PLAYER_NAME, config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, best_NN) best_player_version = 0 iteration = 0 while 1: iteration += 1 lg.logger_main.info('ITERATION NUMBER ' + str(iteration))
mem_version = initialise.INITIAL_MEMORY_VERSION if memories.MEMORY_SIZE != MEMORY_SIZE: memories.extension(MEMORY_SIZE) ######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file if len(env.grid_shape) == 2: shape = (1, ) + env.grid_shape else: shape = env.grid_shape if TEAM_SIZE > 1: tmp_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) else: tmp_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) # If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: best_player_version = initialise.INITIAL_MODEL_VERSION print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = tmp_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION) current_weights = {0: m_tmp.get_weights()} best_weights = m_tmp.get_weights()
# Load memory if necessary if initialise.INITIAL_MEMORY_VERSION is None: memory = Memory(config.MEMORY_SIZE) else: print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...') memory = pickle.load( open( run_archive_folder + 'Model_' + str(initialise.INITIAL_RUN_NUMBER) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb")) # Create an untrained neural network objects from the config file current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) # Load existing neural network if needed if initialise.INITIAL_MODEL_VERSION is not None: best_player_version = initialise.INITIAL_MODEL_VERSION print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = best_NN.read(initialise.INITIAL_RUN_NUMBER, best_player_version) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) # Otherwise ensure the initial weights are the same for both players
import pickle import numpy as np import initialise def grp(pat, txt): r = re.search(pat, txt) return r.group(0) if r else '&' color = ['b', 'r', 'c', 'm', 'g'] env = Game() # create an untrained neural network objects from the config file player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2, ) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) # player2_NN = CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) path1 = './run/models/' path3 = './Versions/' path2 = './results/figures/valeur_double_menace/' EPISODES = 5 try: os.mkdir(path2) except: pass version_list_CNN = os.listdir(path1) version_list_CNN.sort(key=lambda l: grp('(0-9+)', l))