async def main() -> None: while True: transport, engine = await chess.engine.popen_uci("/usr/games/stockfish") pub = RMQPublisher(game_id=str(uuid.uuid4())) board = chess.Board() pub.send(GameState(0, 'WHITE', None, board.fen())) while not board.is_game_over(): result = await engine.play(board, chess.engine.Limit(time=5)) board.push(result.move) pub.send(GameState(board.ply(), 'WHITE' if board.turn else 'BLACK', result.move.uci(), board.fen())) await engine.quit() pub.send(ResultEvent(board.result()))
def create_game(self): with app.app_context(): gs = GameState() gs.nr_players = 1 app.db.session.add(gs) app.db.session.commit() return gs.id
def new_game(): gs = GameState() gs.nr_players = 1 db.session.add(gs) db.session.commit() game_id = gs.id session["game_id"] = game_id session["player_id"] = GameState.Player1 return redirect(url_for('game', game_id=game_id))
def test_make_move_asserts(self): gs = GameState() self.assertRaises(AssertionError, gs.make_move, -1, 0) self.assertRaises(AssertionError, gs.make_move, 0, 3) gs = GameState.create_from_string("oxoxxoxox") self.assertRaises(AssertionError, gs.make_move, 0, 0) gs = GameState.create_from_string(" xoxxoxox") self.assertRaises(AssertionError, gs.make_move, 2, 2)
def index(request: Request): content = json.loads(request.content.read()) try: new_state = GameState(**content) except Exception as e: print(content) print(e) return "wew" wamp_component.state = new_state print(wamp_component.state.map.game_state) return "ok"
def index(request: Request): print("h") content = json.loads(request.content.read()) try: new_state = GameState(**content) except Exception as e: print(content) print(e) return "wew" # check for diffs # print(json.dumps(content, indent=3)) # new_state wamp_component.state = new_state return "ok"
def save_game(self): game_state = GameState() game_state.next_player = self.player_order[0] game_state.wind_index = self.game_board.wind_direction.index(0) for player in sorted(list(self.players)): game_state.player_data[player] = self.players[player].export() for empire in Empires: game_state.taverns[empire.value.capital] = self.engine.varostar[ empire.value.capital].export_matroz() game_state.card_decks = [ self.engine.eventdeck, self.engine.eventstack, self.engine.kincspakli, self.engine.treasurestack ] game_state.is_grog_lord_defeated = self.engine.grogbaroLegyozve.get() game_state.is_lieutenant_found = self.engine.hadnagyElokerult.get() if game_state.check(): self.save_handler.write_save(game_state) else: raise RuntimeError('Invalid game state.')
def load_saved_state(self): current_state = GameState() file_name = askopenfilename(defaultextension=self.extension, filetypes=self.type, initialdir='saved') if file_name == '': return None xml_content = parse(file_name) save = xml_content.getroot() for player in save.findall('player'): player_id = player.get('id') current_state.player_data[player_id] = self._load_player(player) current_state.next_player = save.find('currentPlayer').text current_state.wind_index = int(save.find('windDirection').text) current_state.is_lieutenant_found = True if save.find( 'firstMateFound').text == 'True' else False current_state.is_grog_lord_defeated = True if save.find( 'firstMateFound').text == 'True' else False tavern_tag = save.find('taverns') for tavern in tavern_tag.findall('tavern'): current_state.taverns[tavern.get('port')] = int( tavern.get('sailors')) current_state.card_decks = self._load_cards(save) return current_state
def test_creation(self): gs = GameState()
def test_make_move(self): gs = GameState() gs.make_move(0, 0) assert gs.state == GameState.Ongoing
def train(args): chrome_driver_path = args.chrome_driver_path checkpoint_path = args.checkpoint_path nb_actions = args.nb_actions initial_epsilon = args.initial_epsilon epsilon = initial_epsilon final_epsilon = args.final_epsilon gamma = args.gamma nb_memory = args.nb_memory nb_expolre = args.nb_expolre is_debug = args.is_debug batch_size = args.batch_size nb_observation = args.nb_observation desired_fps = args.desired_fps is_cuda = True if args.use_cuda and torch.cuda.is_available() else False log_frequency = args.log_frequency save_frequency = args.save_frequency ratio_of_win = args.ratio_of_win if args.exploiting: nb_observation = -1 epsilon = final_epsilon seed = 22 np.random.seed(seed) memory = deque() env = DinoSeleniumEnv(chrome_driver_path, speed=args.game_speed) agent = Agent(env) game_state = GameState(agent, debug=is_debug) qnetwork = QNetwork(nb_actions) if is_cuda: qnetwork.cuda() optimizer = torch.optim.Adam(qnetwork.parameters(), 1e-4) tmp_param = next(qnetwork.parameters()) try: m = torch.load(checkpoint_path) qnetwork.load_state_dict(m["qnetwork"]) optimizer.load_state_dict(m["optimizer"]) except: logger.warn("No model found in {}".format(checkpoint_path)) loss_fcn = torch.nn.MSELoss() action_indx = 0 # do nothing as the first action screen, reward, is_gameover, score = game_state.get_state(action_indx) current_state = np.expand_dims(screen, 0) # [IMAGE_CHANNELS,IMAGE_WIDTH,IMAGE_HEIGHT] current_state = np.tile(current_state, (IMAGE_CHANNELS, 1, 1)) initial_state = current_state t = 0 last_time = 0 sum_scores = 0 total_loss = 0 max_score = 0 qvalues = np.array([0, 0]) lost_action = [] win_actions = [] action_random = 0 action_greedy = 0 episodes = 0 nb_episodes = 0 if not args.exploiting: try: t, memory, epsilon, nb_episodes = pickle.load(open( "cache.p", "rb")) except: logger.warn("Could not load cache file! Starting from scratch.") try: while True: qnetwork.eval() if np.random.random() < epsilon: # epsilon greedy action_indx = np.random.randint(nb_actions) action_random += 1 else: action_greedy += 1 tensor = torch.from_numpy(current_state).float().unsqueeze(0) with torch.no_grad(): qvalues = qnetwork(tensor).squeeze() _, action_indx = qvalues.max(-1) action_indx = action_indx.item() if epsilon > final_epsilon and t > nb_observation: epsilon -= (initial_epsilon - final_epsilon) / nb_expolre screen, reward, is_gameover, score = game_state.get_state( action_indx) if is_gameover: episodes += 1 nb_episodes += 1 lost_action.append(action_indx) sum_scores += score else: win_actions.append(action_indx) if score > max_score: max_score = score if last_time: fps = 1 / (time.time() - last_time) if fps > desired_fps: time.sleep(1 / desired_fps - 1 / fps) if last_time and t % log_frequency == 0: logger.info('fps: {0}'.format(1 / (time.time() - last_time))) last_time = time.time() screen = np.expand_dims(screen, 0) next_state = np.append(screen, current_state[:IMAGE_CHANNELS - 1, :, :], axis=0) if not args.exploiting and (is_gameover or np.random.random() < ratio_of_win): memory.append((current_state, action_indx, reward, next_state, is_gameover)) if len(memory) > nb_memory: memory.popleft() if nb_observation > 0 and t > nb_observation: indxes = np.random.choice(len(memory), batch_size, replace=False) minibatch = [memory[b] for b in indxes] inputs = tmp_param.new(batch_size, IMAGE_CHANNELS, IMAGE_WIDTH, IMAGE_HEIGHT).zero_() targets = tmp_param.new(batch_size, nb_actions).zero_() for i, (state_t, action_t, reward_t, state_t1, is_gameover_t1) in enumerate(minibatch): inputs[i] = torch.from_numpy(state_t).float() tensor = inputs[i].unsqueeze(0) with torch.no_grad(): qvalues = qnetwork(tensor).squeeze() targets[i] = qvalues if is_gameover_t1: assert reward_t == -1 targets[i, action_t] = reward_t else: tensor = torch.from_numpy(state_t1).float().unsqueeze( 0) with torch.no_grad(): qvalues = qnetwork(tensor).squeeze() qvalues = qvalues.cpu().numpy() targets[i, action_t] = reward_t + gamma * qvalues.max() qnetwork.train() qnetwork.zero_grad() q_values = qnetwork(inputs) loss = loss_fcn(q_values, targets) loss.backward() optimizer.step() total_loss += loss.item() current_state = initial_state if is_gameover else next_state t += 1 if t % log_frequency == 0: logger.info( "For t {}: mean score is {} max score is {} mean loss: {} number of episode: {}" .format(t, sum_scores / (episodes + 0.1), max_score, total_loss / 1000, episodes)) logger.info( "t: {} action_index: {} reward: {} max qvalue: {} total number of eposodes so far: {}" .format(t, action_indx, reward, qvalues.max(), nb_episodes)) tmp = np.array(lost_action) dnc = (tmp == 0).sum() logger.info( "Lost actions do_nothing: {} jump: {} length of memory {}". format(dnc, len(tmp) - dnc, len(memory))) tmp = np.array(win_actions) dnc = (tmp == 0).sum() logger.info("Win actions do_nothing: {} jump: {}".format( dnc, len(tmp) - dnc)) logger.info("Greedy action {} Random action {}".format( action_greedy, action_random)) action_greedy = 0 action_random = 0 lost_action = [] win_actions = [] if episodes != 0: sum_scores = 0 total_loss = 0 episodes = 0 if t % save_frequency and not args.exploiting: env.pause_game() with open("cache.p", "wb") as fh: pickle.dump((t, memory, epsilon, nb_episodes), fh) gc.collect() torch.save( { "qnetwork": qnetwork.state_dict(), "optimizer": optimizer.state_dict() }, checkpoint_path) env.resume_game() except KeyboardInterrupt: if not args.exploiting: torch.save( { "qnetwork": qnetwork.state_dict(), "optimizer": optimizer.state_dict() }, checkpoint_path) with open("cache.p", "wb") as fh: pickle.dump((t, memory, epsilon, nb_episodes), fh)