def main(): model = keras.models.load_model('src/learn/RL_Atari/test_model_1.h5') game = Game() col_coord, row_coord = 1, 6 game = init_game(game, col_coord, row_coord) print('new game') print(game) k = 0 #print(model.predict(board2input(game,'b'),batch_size=1)) #time.sleep(40) while k < 4: qval = model.predict(board2input(game, 'b'), batch_size=1) #print(qval) #time.sleep(100) temp_qval = copy.copy(qval) move = np.argmax(qval) #print(move) move = Move.from_flat_idx(move) location = move.to_matrix_location() while game.board[location] != EMPTY: temp_qval[0][np.argmax( temp_qval )] = -100 # arbit low value. To get to second max value. move = np.argmax(temp_qval) move = Move.from_flat_idx(move) location = move.to_matrix_location() game.play(move, 'b') print(game) k = k + 1
def play(args): player1, player2, verbose = args current, other = player1, player2 current_col, other_col = 'b', 'w' last_move = None game = Game({'SZ': 9}) while True: if verbose: print(game) move = current.genmove(current_col, game) # print(move.to_gtp(9)) result = game.play(move, player=current_col) if (last_move is not None and last_move.is_pass and move.is_pass): if verbose: print(current_col, current) print(other_col, other) return result current, other = other, current current_col, other_col = other_col, current_col last_move = move
def replay_game(sgf_line, func): """Simply recreate a game from a sgf file More of a proof-of-concept or example than really a necessary function. We will use some modified version of this to create the training data. """ collection = sgf.parse(sgf_line) # This all only works if the SGF contains only one game game_tree = collection.children[0] game_properties = game_tree.nodes[0].properties # game_id = game_properties['GN'][0] if not (game_properties['RE'][0].startswith('B') or game_properties['RE'][0].startswith('W')): return None black_win = True if game_properties['RE'][0].startswith('B') else False game = Game(game_properties) # board = Board([[0]*9]*9) out = [] for n in game_tree.nodes[1:]: player_color = list(n.properties.keys())[0] move = Move.from_sgf(str(n.properties[player_color][0])) # board[move.to_matrix_location()] = 1 if player_color=='b' else -1 # neighbors = board.get_all_neigh game.play(move, player_color.lower(), checking=False) out.append(func(game, player_color.lower(), black_win)) out = np.stack(out) # print(out.shape) return out
def __init__(self, player1type, player2type, logging_level, end_of_turn_sleep_time): threading.Thread.__init__(self) self.logger = Utils.get_unique_file_logger(self, logging_level) self.end_of_turn_sleep_time = end_of_turn_sleep_time self.game = Game() self.view = None self.player1 = Player('b', logging_level) self.player1.engine.controller = self self.player2 = Player('w', logging_level) self.player2.engine.controller = self self.map = { self.player1.engine: self.player1, self.player2.engine: self.player2, } self.send_to_player(self.player1, 'set_player_type ' + player1type) self.send_to_player(self.player2, 'set_player_type ' + player2type) self.player1.name = self.wait_for_response(self.player1, 'name')[2:] self.player2.name = self.wait_for_response(self.player2, 'name')[2:] self.current_player = self.player1 self.other_player = self.player2
sgf_files.append(path) for file in sgf_files: with open(file, 'r') as f: content = f.read() try: collection = sgf.parse(content) except Exception as e: print('Failed to parse ' + file + ' as sgf-collection') continue # Assume the sgf file contains one game game_tree = collection.children[0] n_0 = game_tree.nodes[0] # n_0.properties contains the initial game setup game_id = n_0.properties['GN'][0] out_file = os.path.join(root_dir, game_id + '.csv') if os.path.isfile(out_file): os.remove(out_file) # very similar to play_from_sgf.py, unify these parts TODO board_size = int(n_0.properties['SZ'][0]) game = Game(n_0.properties, show_each_turn=True) for n in game_tree.nodes[1:]: player_color = list(n.properties.keys())[0] move_str = str(n.properties[player_color][0]) move = str2move(move_str, board_size) game.play(move, player_color.lower()) game.board2file(out_file, 'a')
def main(): model = Sequential() model.add( Dense(units=200, kernel_initializer='uniform', activation='relu', input_shape=(243, ))) model.add(Dense(units=400, kernel_initializer='uniform', activation='relu')) model.add(Dense(units=200, kernel_initializer='uniform', activation='relu')) model.add( Dense(units=81, kernel_initializer='uniform', activation='linear')) rms = RMSprop() model.compile(loss='mse', optimizer=rms) col_coord, row_coord = 1, 6 #random.randint(0, 8), random.randint(0, 8) epochs = 10 gamma = 0.9 epsilon = 1 for i in range(epochs): game = Game() game = init_game(game, col_coord, row_coord) status = 1 # game in progress while (status == 1): qval = model.predict(board2input(game, 'b'), batch_size=1) if (random.random() < epsilon): valid_moves = game.get_playable_locations('b') move = random.choice(valid_moves) while move.is_pass == True: move = random.choice(valid_moves) new_game = copy.deepcopy(game) new_game.play(move, 'b') move = move.to_flat_idx() else: temp_qval = copy.copy(qval) move = (np.argmax(temp_qval)) move = Move.from_flat_idx(move) new_game = copy.deepcopy(game) location = move.to_matrix_location() while new_game.board[location] != EMPTY: temp_qval[0][np.argmax( temp_qval )] = -100 # arbit low value. To get to second max value. move = np.argmax(temp_qval) move = Move.from_flat_idx(move) location = move.to_matrix_location() new_game.play(move, 'b') move = move.to_flat_idx() if check_dead_group(new_game, col_coord, row_coord) == True: reward = 10 status = 0 else: reward = -1 # get maxQ from new state newQ = model.predict(board2input(game, 'b'), batch_size=1) maxQ = newQ[0][move] # update, reward : update = reward if reward = 100, else = reward + gamma*maxQ if reward == -1: # non-terminal state update = (reward + (gamma * maxQ)) else: # terminal state update = reward # set y = qval, and y[action] = update => assigning reward value for action. y = np.zeros((1, 81)) y[:] = qval[:] y[0][move] = update # fit the model according to present shape and y model.fit(board2input(game, 'b'), y, batch_size=1, nb_epoch=1, verbose=0) game = copy.copy(new_game) print('game ' + str(i) + ' ends here') if epsilon > 0.1: epsilon -= (1 / epochs) #print ('epsilon : ' + str(epsilon)) model.save('test_model_1.h5')
def main(): model = Sequential() model.add( Dense(units=200, kernel_initializer='uniform', activation='relu', input_shape=(243, ))) model.add(Dense(units=400, kernel_initializer='uniform', activation='relu')) model.add(Dense(units=200, kernel_initializer='uniform', activation='relu')) model.add( Dense(units=81, kernel_initializer='uniform', activation='linear')) rms = RMSprop() model.compile(loss='mse', optimizer=rms) epochs = 50000 gamma = 0.975 epsilon = 1 batchSize = 50 buffer = 100 replay = [] h = 0 for i in range(epochs): col_coord, row_coord = random.randint(0, 8), random.randint(0, 8) #print(col_coord,row_coord) game = Game() game = init_game(game, col_coord, row_coord) status = 1 reward = -1 # by default at game start # game in progress while (status == 1): qval = model.predict(board2input(game, 'b'), batch_size=1) if reward == -1: if (random.random() < epsilon): valid_moves = game.get_playable_locations(BLACK) move = random.choice(valid_moves) while move.is_pass == True: move = random.choice(valid_moves) if len(valid_moves) == 0: print('end it') new_game = copy.deepcopy(game) new_game.play(move, 'b') move = move.to_flat_idx() else: temp_qval = copy.copy(qval) move = (np.argmax(temp_qval)) move = Move.from_flat_idx(move) new_game = copy.deepcopy(game) location = move.to_matrix_location() while new_game.board[location] != EMPTY: temp_qval[0][np.argmax( temp_qval )] = -100 # arbit low value. To get to second max value. move = np.argmax(temp_qval) move = Move.from_flat_idx(move) location = move.to_matrix_location() new_game.play(move, 'b') move = move.to_flat_idx() if check_dead_group(new_game, col_coord, row_coord) == True: reward = 50 else: reward = -1 # experience replay storage if len(replay) < buffer: replay.append((board2input(game, 'b'), move, reward, board2input(new_game, 'b'))) else: if (h < (buffer - 1)): h += 1 else: h = 0 replay[h] = (board2input(game, 'b'), move, reward, board2input(new_game, 'b')) minibatch = random.sample(replay, batchSize) X_train = [] y_train = [] for memory in minibatch: (m_game, m_move, m_reward, m_new_game) = memory oldqval = model.predict(m_game, batch_size=1) maxq = oldqval[0][m_move] y = np.zeros(81) y[:] = oldqval if m_reward == 50: update = m_reward else: update = m_reward + gamma * maxq y[m_move] = update X_train.append(m_game) y_train.append(y) X_train = np.stack(X_train) y_train = np.stack(y_train) #print('ytrain: ', y_train[0]) model.fit(X_train, y_train, batch_size=batchSize, epochs=1, verbose=0) game = copy.copy(new_game) if reward == 50: status = 0 print('game ' + str(i) + ' ends here') #print(game) #temp_move = Move.from_flat_idx(move) #print(temp_move) #print(model.predict(board2input(game,'b'),batch_size=1)) #input() if epsilon > 0.1: epsilon -= (1 / epochs) #print ('epsilon : ' + str(epsilon)) if i % 5000 == 0 and i > 0: name = 'src/learn/RL_Atari/hard_atari_' + str(i) + '.h5' model.save(name) model.save('src/learn/RL_Atari/test_model_final.h5')
class GTPcontroller(threading.Thread): def __init__(self, player1type, player2type, logging_level, end_of_turn_sleep_time): threading.Thread.__init__(self) self.logger = Utils.get_unique_file_logger(self, logging_level) self.end_of_turn_sleep_time = end_of_turn_sleep_time self.game = Game() self.view = None self.player1 = Player('b', logging_level) self.player1.engine.controller = self self.player2 = Player('w', logging_level) self.player2.engine.controller = self self.map = { self.player1.engine: self.player1, self.player2.engine: self.player2, } self.send_to_player(self.player1, 'set_player_type ' + player1type) self.send_to_player(self.player2, 'set_player_type ' + player2type) self.player1.name = self.wait_for_response(self.player1, 'name')[2:] self.player2.name = self.wait_for_response(self.player2, 'name')[2:] self.current_player = self.player1 self.other_player = self.player2 def log_and_print(self, message): self.logger.info(message) print(message) def send_to_player(self, player, command): self.log_and_print(' send to ' + player.name + ' (' + player.color + '): ' + command) player.engine.handle_input_from_controller(command) def broadcast(self, command): self.send_to_player(self.player1, 'quit') self.send_to_player(self.player2, 'quit') def wait_for_response(self, player, message): self.send_to_player(player, message) while player.latest_response is None: pass return player.get_latest_response() def run(self): self.game.start() while self.game.is_running: print('\nnext turn\n') response = self.wait_for_response( self.current_player, 'genmove ' + self.current_player.color) if response.startswith('?'): self.log_and_print( 'player ' + self.current_player.name + ' responded with an error, aborting the game: ' + '"' + response[2:] + '"') break move = response[2:] # strip away the "= " self.send_to_player( self.other_player, 'play ' + self.current_player.color + ' ' + move) self.game.play(Move().from_gtp(move, self.game.size), self.current_player.color) print('\n' + self.game.__str__()) time.sleep(self.end_of_turn_sleep_time) # swap players for next turn if self.current_player == self.player1: self.current_player = self.player2 self.other_player = self.player1 else: self.current_player = self.player1 self.other_player = self.player2 self.broadcast('quit') print('\n' + self.game.__str__()) # if self.view is not None: # self.view.game_ended() # else: print('Final result:', self.game.evaluate_points()) sys.exit(0) def handle_input_from_engine(self, engine, input): input = input.strip() player = self.map[engine] self.log_and_print('received from ' + player.name + ' (' + player.color + '): ' + input) player.latest_response = input def receive_move_from_gui(self, move): human = self.current_player.engine.bot if type(human) is HumanGui: try: self.game.play(move, self.current_player.color, testing=True) human.move = move except InvalidMove_Error as e: print('\ninvalid move')