def main(): connection = connect() config.set_device(args.gpu) # while True: name = None while name is None: name = connection.recv() model = Model('model_' + name) model.forced_restore() # print(model) agent = Agent(model) simulations = None while simulations is None: simulations = connection.recv() simulations = int(simulations) config.simulations = simulations env = GameEnv() init = deal_init(connection) env.load(init) game = Game(env, args.player) game_start(agent, simulations, game, connection)
def __call__(self): if self.empty(): return None file, init, player, p, v = self.data[self.iter] env = GameEnv(False) env.load(init) game = Game(env, player) self.iter += 1 return game, p, v
def unpack(init, ps, player): env = GameEnv(False) env.load(init) game = Game(env, player) histories = [] for action in ps: state = game.to_model_input() histories.append((histories[-1] if len(histories) else []) + [state]) game.move(action) for i in range(len(histories)): histories[i] = np.array(histories[i], dtype=np.int8) return histories
def evaluate_candidate(candidate_player, reference_player): wins = 0 draws = 0 losses = 0 # Always choose best move candidate_player.tryhard_mode = True for _ in range(400): env = GameEnv() current_player = candidate_player if random.random() < 0.5 else reference_player while True: move = current_player.select_move(env) env.play_move(*move) # Current player flips env.check_win() if env.winner and current_player is candidate_player: wins += 1 break elif env.winner and current_player is reference_player: losses += 1 break elif env.draw: draws += 1 break else: current_player = reference_player if current_player is candidate_player else candidate_player candidate_player.tryhard_mode = False return wins, draws, losses
import re if re.search('gen$', os.getcwd()): sys.path.append('..') from game import Game from game import GameEnv import numpy as np from utils import dump import os import shutil path = 'train' for cnt in range(30): env = GameEnv() games = [Game(env, player) for player in range(3)] datas = [[env.hand_cards(), None, None] for _ in range(3)] while not env.game_over(): period = env.period() if period != 3: curr_player = env.curr_player() game = games[curr_player] action = np.random.choice(game.action_list()) for game in games: game.move(action) env.move(action) else: for player, game in enumerate(games): response = env.response(player) game.move(response)
def test_once(): env = GameEnv() # cards = [[3, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0], # [0, 2, 2, 2, 2, 2, 2, 2, 3, 0, 0, 0, 0, 0, 0], # [0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 4, 0, 0]] # env.load(np.array(cards)) # history = [355, 352, 352] # history = [355, 352, 352, 337, 350, 351, 109, 124, 0, 14] history = [] games = [Game(env, 0), Game(env, 1), Game(env, 2)] print(games[0].period()) return for action in history: games[0].move(action) games[1].move(action) games[2].move(action) env.move(action) # print(games[0]) # print(games[0].curr_player()) # print(games[1]) # print(games[1].curr_player()) # print(games[2]) # print(games[2].curr_player()) # print(env) print('=================') # env.load(np.array(cards, dtype=np.int32)) # print(games[0], games[1], games[2]) print(env.bottom()) while not env.game_over(): period = env.period() curr_player = env.curr_player() print('-----------------------------------') print('period:', period) print('curr_player:', curr_player) if period == 3: # bottom print('response:', env.response(0), env.response(1), env.response(2)) print(games[0].action_list()) print('remaining cards:') print(games[0].remaining_cards()) print(games[1].remaining_cards()) print(games[2].remaining_cards()) games[0].move(env.response(0)) games[1].move(env.response(1)) games[2].move(env.response(2)) print('bottom:') print(games[0].bottom()) print(games[1].bottom()) print(games[2].bottom()) env.move(-1) print(env.hand_cards_num()) model_input = games[0].to_model_input() # for channel in range(18): # print(channel) # print(model_input[channel]) else: action_list = games[curr_player].action_list() action = np.random.choice(action_list) print('action_list:') print(games[0].action_list()) print(games[1].action_list()) print(games[2].action_list()) action_c = Action(action) print('action:', action) print('', Action(action).to_array()) games[0].move(action) games[1].move(action) games[2].move(action) env.move(action) print(env.hand_cards_num()) # if action_c.need_attach() or action_c.is_attach() or action_c.is_bottom() or action_c.is_bid(): # model_input = games[0].to_model_input() # for channel in range(18): # print(channel) # print(model_input[channel]) print(env) print(games[0].eval()) print(games[1].eval()) print(games[2].eval())
from time import sleep import numpy as np import os import time # os.environ["SDL_VIDEODRIVER"] = "dummy" from collections import deque from keras.models import Model, load_model, Sequential, load_model from keras.layers import Input, Dense, Conv2D, Flatten from keras.optimizers import Adam, RMSprop import time import keras import matplotlib.pyplot as plt from skimage.transform import resize from skimage.color import rgb2gray env = GameEnv() stateSize = len(env.reset()) fakeState = env.reset() print(stateSize) env.getGameState() # PREPROCESSING HYPERPARAMETERS stack_size = 8 # Number of frames stacked # MODEL HYPERPARAMETERS action_size = 4 # 4 possible actions # Initialize deque with zero-images one array for each image stacked_frames = deque( [np.zeros((stateSize), dtype=np.int) for i in range(stack_size)], maxlen=stack_size)
def match(AIs): def read(popen): while True: output = popen.stdout.readline() if output is not None: return output.decode('utf-8') def write(popen, obj): if popen == AIs[args.p]: return popen.stdin.write((str(obj) + '\n').encode('utf-8')) popen.stdin.flush() (name0, simulation0), (name1, simulation1), (name2, simulation2) = players_attr write(AIs[0], name0) write(AIs[0], simulation0) write(AIs[1], name1) write(AIs[1], simulation1) write(AIs[2], name2) write(AIs[2], simulation2) env = GameEnv() hand_cards = env.hand_cards() data.append(env.hand_cards()) game = Game(env, args.p) for player in range(3): write(AIs[player], list(hand_cards.flatten())) slot = [] ig = False print('Filename:', filename) print('Your position:', args.p) print('Your hand cards:', to_chars(hand_cards[args.p])) while not env.game_over(): period = env.period() player = env.curr_player() if period == 1 and player == args.p: print('Your position:', args.p) print('Your hand cards:', to_chars(env.hand_cards()[args.p])) print('Hand cards num of 3 players:', env.hand_cards_num()) print('-----') if period == 1 or period == 2: if player == args.p: if period == 1: _game = game.copy() print('Your turn:', end=' ') chars = input() try: if chars == 'pass': actions = [0] else: chars = chars.upper().rstrip().replace( ',', '').split(' ') while '' in chars: chars.remove('') # for char in chars: # if char not in card_dict: # raise KeyError('input error') actions = to_action(chars) for action in actions: if action in _game.action_list(): _game.move(action) else: raise RuntimeError('couldn\'t move') except (RuntimeError, KeyError): print('Invalid action! Please retry.') print('=====') continue game = _game for action in actions: env.move(action) for target in range(3): write(AIs[target], action) print('=====') if period == 2: print('Your bet:', end=' ') action = int(input()) + 352 if action not in game.action_list(): print('Invalid action! Please retry.') continue env.move(action) game.move(action) for target in range(3): write(AIs[target], action) else: action = int(read(AIs[player])) # print(action) env.move(action) game.move(action) for target in range(3): if target != player: write(AIs[target], action) if period == 1: slot += to_chars(Action(action).to_array()).replace( '[', '').replace(']', '').replace(',', '').split(' ') if env.curr_player() != player: if action == 0: print('Player%d pass' % player) else: print('Player%d\'s turn:' % player, str(slot).replace('\'', '')) print('=====') slot = [] if period == 2: print('Player%d\'s bet: %d' % (player, action - 352)) if period == 2 and env.period() == 3: print('Landlord is Player%d' % env.lord_player()) print('Bottom cards:', to_chars(env.bottom())) print('===== Game Start =====') if period == 3: for player in range(3): write(AIs[player], env.response(player)) game.move(env.response(args.p)) env.move(-1) evals = [None, None, None] for player in range(3): if player != args.p: evals[player] = (float(read(AIs[player])) + 1) / 2 AIs[player].terminate() # print(evals) data.append(game.policy()) with open('human/%s.pkl' % filename, 'wb') as f: pickle.dump(data, f) if args.p == env.lord_player(): is_winner = env.hand_cards_num()[args.p] == 0 else: is_winner = env.hand_cards_num()[env.lord_player()] != 0 if game.lord_player() == -1: print('<<<<<<<<<<<<<<< DRAW >>>>>>>>>>>>>>>') elif is_winner: print('<<<<<<<<<<<<<<< YOU WIN >>>>>>>>>>>>>>>') else: print('<<<<<<<<<<<<<<< YOU LOSE >>>>>>>>>>>>>>>')
def main(): model_name = "models/model_conv.h5" reference_player = TinyBrain() # candidate_player = BigBrain(load_model=model_name, tryhard_mode=False) # For continuing training candidate_player = BigBrain(tryhard_mode=False) # For starting training episode = 1 while True: env = GameEnv() first_move = True # immediate store is all the information we have immediately after a move # (current state, possible actions, move) immediate_store = [] # delayed store is all the information we get after the next move # (next state, reward, terminated) delayed_store = [] # Randomly choose who goes first current_player = candidate_player if random.random() < 0.5 else reference_player while True: state = env.state() move = current_player.select_move(env) if current_player is candidate_player: if first_move: first_move = False else: # Finish providing information for candidate player's last move possible_actions = env.possible_actions() delayed_store.append((possible_actions, 0, state, False)) # Provide starting information for candidate player's current move do_explore = random.random() < 0.3 move = current_player.select_move(env, explore=do_explore) immediate_store.append((state, move, do_explore)) env.play_move(*move) # Current player flips env.check_win() if env.winner or env.draw: # If game has ended we need to give rewards to both players if env.draw: delayed_store.append((None, DRAW_REWARD, None, True)) elif current_player is candidate_player: # Winner is always whoever played last delayed_store.append((None, WIN_REWARD, None, True)) else: delayed_store.append((None, LOSS_REWARD, None, True)) for immediate, delayed in zip(immediate_store, delayed_store): state, move, do_explore = immediate if not do_explore: candidate_player.store(state, move, *delayed) break current_player = reference_player if current_player is candidate_player else candidate_player if episode % GAMES_PER_SET == 0: print(f"Training after {episode} episodes") candidate_player.retrain(batch_size=TRAINING_SIZE) candidate_player.align_target_model() candidate_player.save(model_name) wins, draws, losses = evaluate_candidate(candidate_player, TinyBrain()) print(f"{wins}, {draws}, {losses}") if wins + losses > 0: percentage_wins = wins / (wins + losses) else: percentage_wins = 0 print(f"percentage wins: {percentage_wins}") reference_player = candidate_player reference_player.tryhard_mode = False candidate_player = BigBrain(tryhard_mode=False) candidate_player.q_network = reference_player.q_network candidate_player.align_target_model() episode += 1
def print_diagnostics(candidate_player, episode): test_env_start = GameEnv() test_env_about_to_win = GameEnv() test_env_about_to_win.play_move(0, 0) test_env_about_to_win.play_move(2, 2) test_env_about_to_win.play_move(0, 1) test_env_about_to_win.play_move(1, 1) test_env_about_to_win_p2 = GameEnv() test_env_about_to_win_p2.play_move(0, 0) test_env_about_to_win_p2.play_move(2, 2) test_env_about_to_win_p2.play_move(1, 1) test_env_about_to_win_p2.play_move(2, 1) test_env_about_to_win_p2.play_move(1, 0) initial_preferences = candidate_player.move_probabilities(test_env_start.state(), test_env_start.possible_actions()) about_to_win_preferences = candidate_player.move_probabilities(test_env_about_to_win.state(), test_env_about_to_win.possible_actions()) about_to_win_preferences_p2 = candidate_player.move_probabilities( test_env_about_to_win_p2.state(), test_env_about_to_win_p2.possible_actions() ) print(f"Initial move preference after {episode} games") print(initial_preferences) print(f"Preferences when winning in top right") print(about_to_win_preferences) print(f"Preferences when winning in bottom left") print(about_to_win_preferences_p2)
def main(): model: Model config.set_device(1) env = GameEnv() # model = Model('model') model = Model('model_201912080009') # model = Model('model_tencent0824') model.forced_restore() # agent = Agent(model) agent = Agent(model) init = [[2, 1, 2, 1, 0, 1, 3, 1, 2, 1, 0, 1, 2, 0, 0], [2, 1, 1, 2, 1, 1, 0, 3, 1, 2, 0, 2, 0, 0, 1], [0, 1, 1, 1, 3, 2, 0, 0, 1, 1, 3, 1, 2, 1, 0]] actions = [ 352, 352, 353, 338, 343, 347, 123, 0, 0, 20, 22, 23, 24, 26, 0, 28, 0, 29, 0, 0, 39, 0, 0, 116, 0, 0, 76, 324, 0, 0, 41, 42, 0, 0, 92, 317, 320, 0, 0, 31 ] #, 42, 0, 0, 15, 18] init = np.array(init, dtype=np.int32) env.load(init) print(env) root, player = new_Root(), 2 game = Game(env, player) for action in actions: mct = MCT(game, root) agent.simulates([mct], store_rnn_states='Always') root.root(action) game.move(action) print(game) print('GAUSS', game.gauss()) print(game.curr_player(), game.my_player()) print('====') print(game.curr_player()) print(game.my_player()) print(game.lord_player()) print(game.hand_cards_num()) print(game.bottom()) print('====') mct = MCT(game, root) for cnt in range(2000): agent.simulates([mct], store_rnn_states='Always') # if cnt == 0: # history, rnn_state = root.get_model_input() # print(history) # if (cnt + 1) % 10 == 0: if (cnt + 1) % 10 == 0: print(cnt + 1) for action, P, son in mct.root.edges(): print('%d: %.8f %d %.3f' % (action, P, son.N(), son.Q())) print('-------------------------') t = 1.0 s = np.array([son.N() for action, P, son in mct.root.edges()]) p = np.array([P for action, P, son in mct.root.edges()]) print(s) print(np.mean(s)) w = s + 0.001 - np.mean(s) w[w < 0] = 0 w = (w**t) / (w**t).sum() print(w) print(s / s.sum()) print(p) mct.json()
from game import GameEnv import random env = GameEnv(1 / 60) env.stepReward = 0 env.playerShotReward = 0 env.enemyShotReward = 0 env.missedShotReward = 0 env.allDeadReward = 0 env.invasionReward = 0 env.bottomReward = 0 env.underReward = 0 env.anchorReward = 0 env.cornerReward = -0.001 env.reset() actions = [i for i in range(4)] done = False # print(env.reset()) env.loop() def randMove(): done = False while not done: state, reward, done, win = env.step(random.sample(actions, 1)[0]) # for i in range(4): # env.reset() # randMove()
def main(): np.set_printoptions(precision=2, linewidth=128, suppress=True) path = 'test' config.device = torch.device('cuda:1') config.set_device(-1) model = Model('model_201912080009') model.restore() files = os.listdir('gen/%s/' % path) files.sort(key=lambda k: k.split('_')[0], reverse=True) # print(files[-1]) file = np.random.choice(files[:100]) # file = '201912101326_85d94af6fe1a588b.pkl' print(file) data = pickle_load('gen/%s/%s' % (path, file)) # data = pickle_load('gen/test/' + files[-2]) # np.random.shuffle(data) player = 2 init, actions, _v = [None, [], -1.0] init = [[2, 1, 2, 1, 0, 1, 3, 1, 2, 1, 0, 1, 2, 0, 0], [2, 1, 1, 2, 1, 1, 0, 3, 1, 2, 0, 2, 0, 0, 1], [0, 1, 1, 1, 3, 2, 0, 0, 1, 1, 3, 1, 2, 1, 0]] actions = [ 352, 352, 353, 338, 343, 347, 123, 0, 0, 20, 22, 23, 24, 26, 0, 28, 0, 29, 0, 0, 39, 0, 0, 116, 0, 0, 76, 324, 0, 0, 41, 42, 0, 0, 92, 317, 320, 0, 0, 31, 42, 0, 0, 15, 18 ] init = np.array(init, dtype=np.int32) # actions = [353, 352, 352, 339, 349, 349, 15] # init = [[2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0], # [2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0], # [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 4, 4, 4, 1, 1]] # init = np.array(init, dtype=np.int32) # actions = [353, 352, 352, 344, 345, 346, 151] # init, actions, _v = data[player] print(_v, player) print(init) print(actions) print('=============================================') print('player:', player) histories = unpack(init, actions, player) histories, lengths = history_regularize(histories) histories, lengths = to_cuda(histories, lengths) vs, ps, _ = model(histories, lengths, None) env = GameEnv(False) env.load(init) game = Game(env, player) for v, p, action in zip(vs, ps, actions): print('----------------------------------') print('my_player: %d, curr_player: %d' % (player, game.curr_player())) # for action, _dist in enumerate(dist): # print(action, _dist) idx = np.argsort(p)[::-1] for i in range(8): print(game.could_move(idx[i]), end=' ') print('(%d, %.2f%%)' % (idx[i], p[idx[i]] * 100), end='\n') print('action: %d, %.2f%%' % (action, p[action] * 100)) if idx[0] == 60 and p[idx[0]] > 0.3: print(game) print(game.policy()) print(game.hand_cards_num()) print(game.bottom()) print(game.curr_player(), game.lord_player()) return 0 # model_input = game.to_model_input() # for channel in range(26, 28): # print(channel) # print(model_input[channel]) print('%.1f, %.3f' % (_v, v[0])) game.move(action) print(game) print('Gauss:', game.gauss())
import random from game import GameEnv from DeepQNet import createDQN from Memory import Memory gamma = 0.9 num_episode = 100 num_frames = 4 epsilon = 0.2 memoryPool = Memory() batch_size = 32 checkpoint_freq = 10 for episode in range(num_episode): gameEnv = GameEnv((20, 20)) model = createDQN(num_frames, gameEnv) over = False loss = 0.0 last_frames = gameEnv.getLastFrames(4) total_reward = 0 while over is not True: decision = random.random() direction = 0 if decision < epsilon: direction = random.randint(-1, 1) else: shape = (1, ) + last_frames.shape
from utils import pickle_load import time from game import Game, GameEnv import numpy as np path = 'data/' files = os.listdir(path) file = np.random.choice(files) print(file) data = pickle_load(path + file) init, policy, v = data[0] env = GameEnv() print(init) env.load(init) print(policy) for action in policy: print(env.curr_player(), env.lord_player()) print(action) if env.period() != 3: env.move(action) else: env.move(-1) print(env) print('---------------------') time.sleep(0.5) print(v)
pygame.display.update() gameEnv.stepForward(turn) over = gameEnv.isOver() display.fill(window_color) display.blit(apple, (gameEnv.fruitLocation[0] * 10, gameEnv.fruitLocation[1] * 10)) clock.tick(10) return gameEnv.score model_name = 'dqn-00000090.model' model = load_model(model_name) width = 20 height = 20 gameEnv = GameEnv((height, width)) display_width = width * 10 display_height = height * 10 green = (0, 255, 0) red = (255, 0, 0) black = (0, 0, 0) window_color = (200, 200, 200) apple_image = pygame.image.load('apple.jpg') clock = pygame.time.Clock() pygame.init() display = pygame.display.set_mode((display_height, display_width)) display.fill(window_color)