def replay(self, wps, pi_mcts, board_logs, plus_turns, weights, batch_size: int, beta: float) -> None: inputs = np.zeros((batch_size, 7, 5, 3)) policy_true = np.zeros((batch_size, 315)) values_true = np.zeros((batch_size)) input_weights = np.zeros((batch_size)) indices = np.random.choice(np.arange(len(wps)), size=batch_size, replace=False) mini_batch = [(wps[i], pi_mcts[i], board_logs[i], plus_turns[i], weights[i]) for i in indices] for i, (winner, pi, board, plus_turn, weight) in enumerate(mini_batch): gs = GameState() gs.board = board inputs[i] = gs.to_inputs(flip=not plus_turn) # shape=(4, 5, 5) policy_true[i] = pi**beta values_true[i] = winner input_weights[i] = weight # epochsは訓練データの反復回数、verbose=0は表示なしの設定 self.model.fit(inputs, [policy_true, values_true], sample_weight=input_weights, epochs=1, verbose=0, shuffle=True)
class Game: def __init__(self): # initialize imported components pygame.init() # number of frames rendered per second # (equal to number of loop turns) self.FPS = 60 # clock to synchronize ticks with desired FPS self.clock = pygame.time.Clock() # main loop condition self.running = True # initialize subsystems self.drawer = Drawer() self.handler = EventHandler(self) self.game_state = GameState() self.manager = Manager() self.ticker = Ticker(self) def initialize(self): # create game objects from scaffold # and add them to proper subsystems scaffold = Scaffold() for obj in scaffold.objects: self.manager.add_object(obj) for obs in scaffold.observers: self.game_state.add_observer(obs) for e in scaffold.recurring_events: self.ticker.add_event(e) def shutdown(self): # this will break main loop self.running = False def exit(self): # close all imported components pygame.quit() # force exit program exit(1) def loop(self): self.initialize() # main loop while self.running: # synchronize self.clock.tick(self.FPS) # notify ticker self.ticker.tick() # render game objects self.drawer.draw(self.manager) # respond to events self.handler.handle() self.exit()
class TestGameState(unittest.TestCase): def setUp(self): self.gs = GameState() def test_outputs_to_move_max(self): outputs = np.linspace(0.0, 1.0, 100) self.gs.outputs_to_move_max(outputs) self.assertTrue((self.gs.board == np.array([[-1, -1, -2, -1, -1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 1, 2, 1, 0]])).all()) self.gs.outputs_to_move_max(outputs) self.assertFalse((self.gs.board == np.array([[-1, -1, -2, -1, -1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 1, 2, 1, 0]])).all()) def test_outputs_to_move_random(self): outputs = np.linspace(0.0, 1.0, 100) outputs /= np.sum(outputs) self.gs.outputs_to_move_random(outputs) def test_flip(self): self.assertTrue((self.gs.to_inputs() == self.gs.to_inputs(True)).all())
def save_used_hash(self, gs: GameState, uct_nodes: List[UctNode], index): self.node_hash[index].flag = True self.used += 1 current_node = uct_nodes[index] child_index = current_node.child_index child_move = current_node.child_move child_num = current_node.child_num for i in range(child_num): if child_index[i] != NOT_EXPANDED and self.node_hash[ child_index[i]].flag == False: gs.move_with_id(child_move[i]) self.save_used_hash(gs, uct_nodes, child_index[i]) gs.pop()
def new_game(self, layout, pacman_agent, ghost_agents, display, quiet=False, catch_exceptions=False): agents = [pacman_agent] + ghost_agents[:layout.get_ghost_count()] init_state = GameState() init_state.initialize(layout, len(ghost_agents)) game = Game(agents, display, self, catch_exceptions=catch_exceptions) game.state = init_state self.initial_state = init_state.deep_copy() self.quiet = quiet return game
def main(): window = arcade.Window(constants.SCREEN_WIDTH, constants.SCREEN_HEIGHT, constants.SCREEN_TITLE) views = {} game = Director(views) game_state = GameState() start_screen = StartScreen(views) controls = ControlScreen(views) game_over = GameOverView(views) win_screen = WinScreen(views) views['window'] = window views['game'] = game views['start_screen'] = start_screen views['controls'] = controls views['game_over'] = game_over views['win_screen'] = win_screen views['game_state'] = game_state views['window'].show_view(views['start_screen']) game_view = views['start_screen'] game_view.setup() arcade.run()
def get_input(): k, m, n = map(int, input().split()) space = Space(n, m, k) initial_state = GameState() for i in range(k): input_string = input() column = Column() if input_string != '#': for card in input_string.split(): column.add_card(Card(card[-1], int(card[:-1]))) initial_state.add_column(column) space.add_state(initial_state) return space
def take_action_eps_greedy(board: np.ndarray, episode: int, mainQN: QNetwork, gs: GameState) -> Tuple[Winner, int]: """t+1での行動を返す boardは入力の型(README参照)で与えること returnは勝利判定と打った手""" # 徐々に最適行動のみをとる、ε-greedy法 epsilon = 0.001 + 0.9 / (1.0 + episode) if epsilon <= np.random.uniform(0, 1): retTargetQs = mainQN.model.predict(board)[0] s = gs.outputs_to_move_max(retTargetQs) # 最大の報酬を返す行動を選択する else: s = gs.random_play() # ランダムに行動する return s
def __init__(self, player1: Player, player2: Player): self.__player1 = player1 self.__player2 = player2 self.__set_move_strategy(player1) self.__set_move_strategy(player2) self.__board_builder = Connect4BoardBuilder() self.__game_state = GameState(self.__board_builder, player1)
def __init__(self): # initialize imported components pygame.init() # number of frames rendered per second # (equal to number of loop turns) self.FPS = 60 # clock to synchronize ticks with desired FPS self.clock = pygame.time.Clock() # main loop condition self.running = True # initialize subsystems self.drawer = Drawer() self.handler = EventHandler(self) self.game_state = GameState() self.manager = Manager() self.ticker = Ticker(self)
def successors(game_state): """Return a dict of {state:action} pairs. A state is a (wizard, boss, timed_effects) tuple, wizard: (hit_points, armor, mana); boss: (hit_points)""" result = {} w, b, in_use_spells_state = game_state spells_in_use = [name for name, timer in in_use_spells_state if timer > 1] # apply_effects gets callend once before launch spell is called # this clears up spells that have just timer=1 left spells = [magic_missile, drain, shield, poison, recharge] available_spells = [s for s in spells if s().name not in spells_in_use] for available_spell in available_spells: boss = Boss(damage=boss_damage, hit_points=b) h,a,m = w wizard = Wizard(mana=m, hit_points=h, armor=a) gs = GameState(wizard, boss) candidate_spell = available_spell() # restore timed spells for name, timer in in_use_spells_state: gs.spells.append(get_spell_by_name(name)(timer)) gs.apply_effects() if not boss.is_alive(): result[represent(gs)] = None # boss is killed by existing spells else: wizard.launch_spell(gs, spell=candidate_spell) if not wizard.is_alive(): continue if not boss.is_alive(): # boss killed by spell result[represent(gs)] = candidate_spell.name else: gs.apply_effects() if not boss.is_alive(): # boss killed by timed effect result[represent(gs)] = candidate_spell.name else: boss.attack(wizard) if wizard.is_alive(): result[represent(gs)] = candidate_spell.name return result
def __init__(self, settings: Settings) -> None: pygame.init() self.settings = settings self.font = pygame.font.SysFont("Arial", 18) self.display = pygame.display.set_mode( (self.settings.screen_width, self.settings.screen_height)) self.clock = pygame.time.Clock() pygame.display.set_caption(self.settings.window_caption) self.running = False self.current_level = LevelFactory.create(1) self.camera_position = self.current_level.initial_position self.game_state = GameState(self.current_level) self.level_renderer = MapRenderer( self.game_state, self.display, self.current_level.sprite, )
def delete_old_hash(self, gs: GameState, uct_nodes: List[UctNode]): """古いハッシュを削除""" # 現在の局面をルートとする局面以外を削除する root = self.find_same_hash_index(gs.board_hash(), gs.turn, gs.n_turns) self.used = 0 for i in range(UCT_HASH_SIZE): self.node_hash[i].flag = False if root != UCT_HASH_SIZE: self.save_used_hash(gs, uct_nodes, root) self.enough_size = True
def __init__(self, thread_index, global_network, initial_learning_rate, learning_rate_input, optimizer, max_global_time_step, device): self.thread_index = thread_index self.learning_rate_input = learning_rate_input self.max_global_time_step = max_global_time_step if USE_LSTM: self.local_network = A3CLSTMNetwork(STATE_DIM, STATE_CHN, ACTION_DIM, device, thread_index) else: self.local_network = A3CFFNetwork(STATE_DIM, STATE_CHN, ACTION_DIM, device, thread_index) self.local_network.create_loss(ENTROPY_BETA) self.gradients = tf.gradients(self.local_network.total_loss, self.local_network.get_vars()) clip_accum_grads = [ tf.clip_by_norm(accum_grad, 10.0) for accum_grad in self.gradients ] self.apply_gradients = optimizer.apply_gradients( zip(clip_accum_grads, global_network.get_vars())) # self.apply_gradients = optimizer.apply_gradients(zip(self.gradients, global_network.get_vars())) self.sync = self.local_network.sync_from(global_network) self.game_state = GameState(thread_index) self.local_t = 0 self.initial_learning_rate = initial_learning_rate # for log self.episode_reward = 0.0 self.episode_start_time = 0.0 self.prev_local_t = 0 return
def simulate(self, do_draw, **kwargs): self.game.init_physics() for i in range(self.repeats): clock = pygame.time.Clock() self.game.reset() self.game.init_physics() while not self.game.finished: first_team_state = GameState.from_game(self.game) second_team_state = first_team_state.get_complement() home_moves = self.first_model.get_next_moves(first_team_state) away_moves = self.second_model.get_next_moves( second_team_state) self.game.execute_commands(home_moves, away_moves) self.game.update(self.time_step) if do_draw: surface = kwargs["surface"] camera = kwargs["camera"] surface.fill(BLACK) self.game.draw(surface, camera) fps = int(clock.get_fps()) font = pygame.font.SysFont(pygame.font.get_default_font(), 24) text_surface = font.render(f"FPS: {fps}", True, (255, 255, 255)) surface.blit(text_surface, (50, 5)) pygame.display.flip() clock.tick() self.final_scores.append( (self.game.home_score, self.game.away_score))
class A3CActorThread(object): def __init__(self, thread_index, global_network, initial_learning_rate, learning_rate_input, optimizer, max_global_time_step, device): self.thread_index = thread_index self.learning_rate_input = learning_rate_input self.max_global_time_step = max_global_time_step if USE_LSTM: self.local_network = A3CLSTMNetwork(STATE_DIM, STATE_CHN, ACTION_DIM, device, thread_index) else: self.local_network = A3CFFNetwork(STATE_DIM, STATE_CHN, ACTION_DIM, device, thread_index) self.local_network.create_loss(ENTROPY_BETA) self.gradients = tf.gradients(self.local_network.total_loss, self.local_network.get_vars()) clip_accum_grads = [ tf.clip_by_norm(accum_grad, 10.0) for accum_grad in self.gradients ] self.apply_gradients = optimizer.apply_gradients( zip(clip_accum_grads, global_network.get_vars())) # self.apply_gradients = optimizer.apply_gradients(zip(self.gradients, global_network.get_vars())) self.sync = self.local_network.sync_from(global_network) self.game_state = GameState(thread_index) self.local_t = 0 self.initial_learning_rate = initial_learning_rate # for log self.episode_reward = 0.0 self.episode_start_time = 0.0 self.prev_local_t = 0 return def _anneal_learning_rate(self, global_time_step): learning_rate = self.initial_learning_rate * \ (self.max_global_time_step - global_time_step) / self.max_global_time_step if learning_rate < 0.0: learning_rate = 0.0 return learning_rate def choose_action(self, policy_output): return np.random.choice(range(len(policy_output)), p=policy_output) def _record_log(self, sess, global_t, summary_writer, summary_op, reward_input, reward, time_input, living_time): summary_str = sess.run(summary_op, feed_dict={ reward_input: reward, time_input: living_time }) summary_writer.add_summary(summary_str, global_t) summary_writer.flush() return def _discount_accum_reward(self, rewards, running_add=0.0, gamma=0.99): """ discounted the reward using gamma """ discounted_r = np.zeros_like(rewards, dtype=np.float32) for t in reversed(range(len(rewards))): running_add = rewards[t] + running_add * gamma discounted_r[t] = running_add return list(discounted_r) def process(self, sess, global_t, summary_writer, summary_op, reward_input, time_input): batch_state = [] batch_action = [] batch_reward = [] terminal_end = False # reduce the influence of socket connecting time if self.episode_start_time == 0.0: self.episode_start_time = timestamp() # copy weight from global network sess.run(self.sync) start_local_t = self.local_t if USE_LSTM: start_lstm_state = self.local_network.lstm_state_out for i in range(LOCAL_T_MAX): policy_ = self.local_network.run_policy(sess, self.game_state.s_t) if self.thread_index == 0 and self.local_t % 1000 == 0: print 'policy=', policy_ action_id = self.choose_action(policy_) action_onehot = np.zeros([ACTION_DIM]) action_onehot[action_id] = 1 batch_state.append(self.game_state.s_t) batch_action.append(action_onehot) self.game_state.process(action_id) reward = self.game_state.reward terminal = self.game_state.terminal self.episode_reward += reward batch_reward.append(np.clip(reward, -1.0, 1.0)) self.local_t += 1 # s_t1 -> s_t self.game_state.update() if terminal: terminal_end = True episode_end_time = timestamp() living_time = episode_end_time - self.episode_start_time self._record_log(sess, global_t, summary_writer, summary_op, reward_input, self.episode_reward, time_input, living_time) print("global_t=%d / reward=%.2f / living_time=%.4f") % ( global_t, self.episode_reward, living_time) # reset variables self.episode_reward = 0.0 self.episode_start_time = episode_end_time self.game_state.reset() if USE_LSTM: self.local_network.reset_lstm_state() break # log if self.local_t % 40 == 0: living_time = timestamp() - self.episode_start_time self._record_log(sess, global_t, summary_writer, summary_op, reward_input, self.episode_reward, time_input, living_time) # -----------end of batch (LOCAL_T_MAX)-------------------- R = 0.0 if not terminal_end: R = self.local_network.run_value(sess, self.game_state.s_t) # print ('global_t: %d, R: %f') % (global_t, R) batch_value = self.local_network.run_batch_value( sess, batch_state, start_lstm_state) batch_R = self._discount_accum_reward(batch_reward, R, GAMMA) batch_td = np.array(batch_R) - np.array(batch_value) cur_learning_rate = self._anneal_learning_rate(global_t) # print("=" * 60) # print(batch_value) # print(self.local_network.run_batch_value(sess, batch_state, start_lstm_state)) # print("=" * 60) # import sys # sys.exit() if USE_LSTM: sess.run(self.apply_gradients, feed_dict={ self.local_network.state_input: batch_state, self.local_network.action_input: batch_action, self.local_network.td: batch_td, self.local_network.R: batch_R, self.local_network.step_size: [len(batch_state)], self.local_network.initial_lstm_state: start_lstm_state, self.learning_rate_input: cur_learning_rate }) else: sess.run(self.apply_gradients, feed_dict={ self.local_network.state_input: batch_state, self.local_network.action_input: batch_action, self.local_network.td: batch_td, self.local_network.R: batch_R, self.learning_rate_input: cur_learning_rate }) diff_local_t = self.local_t - start_local_t return diff_local_t
from classifier.digit_classifier import DigitClassifier import cv2 if __name__ == "__main__": if len(argv) < 2: print("Provide an index") test_img_index = int(argv[1]) img = cv2.imread(f"test_data/frame_{test_img_index}.png", cv2.IMREAD_COLOR) game_data_handler = TestGameData(test_img_index) game_classifier = GameClassifier() game_classifier.load() digit_classifier = DigitClassifier() digit_classifier.load() game_state = GameState(game_data_handler, digit_classifier) state, data = game_state.get_game_state(img) game_data, my_team = data classifier_input = game_dataset.shape_input(game_data, game_data_handler) outcome = game_classifier.predict(classifier_input) if my_team == "red": outcome = 1 - outcome pct = f"{outcome * 100:.2f}" print(f"Probability of win: {pct}%", flush=True) shape = (img.shape[1] // 2, img.shape[0] // 2) resized = cv2.resize(img, shape, interpolation=cv2.INTER_AREA)
def learn(model_config_path=None, weight_path=None): config = Config() qc = config.Qlearn total_reward_vec = np.zeros(qc.num_consecutive_iterations) # 各試行の報酬を格納 # Qネットワークとメモリ、Actorの生成-------------------------------------------------------- if model_config_path is None or weight_path is None: mainQN = QNetwork(config) # メインのQネットワーク mainQN.build() targetQN = QNetwork(config) # 価値を計算するQネットワーク targetQN.build() else: mainQN = QNetwork(config) success_load = mainQN.load(model_config_path, weight_path) if not success_load: raise FileNotFoundError( f"{model_config_path} {weight_path}が読み込めませんでした") targetQN = QNetwork(config) targetQN.load(model_config_path, weight_path) memory = Memory(max_size=qc.memory_size) for episode in trange(qc.num_episodes): # 試行数分繰り返す gs = GameState() state = gs.random_play() # 1step目は適当な行動をとる episode_reward = 0 targetQN.model.set_weights( mainQN.model.get_weights()) # 行動決定と価値計算のQネットワークをおなじにする for t in range(qc.max_number_of_steps): # 2手のループ board = gs.to_inputs() state, action = take_action_eps_greedy(board, episode, mainQN, gs) # 時刻tでの行動を決定する # next_state, reward, done, info = env.step(action) # 行動a_tの実行による、s_{t+1}, _R{t}を計算する # verbose ========== # if t % 10 == 9: # print(gs) # ================== if state == Winner.minus: reward = qc.reward_win # 報酬 else: reward = 0 next_board = gs.to_inputs() # board = next_board # 状態更新 # 1施行終了時の処理 if state != Winner.not_ended: episode_reward += reward # 合計報酬を更新 memory.add((board, action, reward, next_board)) # メモリの更新する # Qネットワークの重みを学習・更新する replay if len(memory) > qc.batch_size: # and not islearned: mainQN.replay(memory, qc.batch_size, qc.gamma, targetQN) if qc.DQN_MODE: targetQN.model.set_weights( mainQN.model.get_weights()) # 行動決定と価値計算のQネットワークをおなじにする total_reward_vec = np.hstack( (total_reward_vec[1:], episode_reward)) # 報酬を記録 print( '%d/%d: Episode finished after %d time steps / mean %f winner: %s' % (episode + 1, qc.num_episodes, t + 1, total_reward_vec.mean(), 'plus' if state == Winner.plus else 'minus')) break state, _ = gs.random_play() if state == Winner.plus: reward = qc.reward_lose else: reward = 0 episode_reward += reward # 合計報酬を更新 memory.add((board, action, reward, next_board)) # メモリの更新する # Qネットワークの重みを学習・更新する replay if len(memory) > qc.batch_size: # and not islearned: mainQN.replay(memory, qc.batch_size, qc.gamma, targetQN) if qc.DQN_MODE: targetQN.model.set_weights( mainQN.model.get_weights()) # 行動決定と価値計算のQネットワークをおなじにする # 1施行終了時の処理 if state != Winner.not_ended: total_reward_vec = np.hstack( (total_reward_vec[1:], episode_reward)) # 報酬を記録 print( '%d/%d: Episode finished after %d time steps / mean %f winner: %s' % (episode + 1, qc.num_episodes, t + 1, total_reward_vec.mean(), 'plus' if state == Winner.plus else 'minus')) break # 複数施行の平均報酬で終了を判断 # if total_reward_vec.mean() >= goal_average_reward: # print('Episode %d train agent successfuly!' % episode) # islearned = True if episode % qc.save_interval == qc.save_interval - 1: d = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') mainQN.save(f"results/001_QLearning/{d}-mainQN.json", f"results/001_QLearning/{d}-mainQN.h5") with open(f"results/001_QLearning/{d}-config.json", 'x') as f: json.dump(config._to_dict(), f, indent=4) # 最後に保存(直前にしていればしない) if episode % qc.save_interval != qc.save_interval - 1: d = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') mainQN.save(f"results/001_QLearning/{d}-mainQN.json", f"results/001_QLearning/{d}-mainQN.h5") with open(f"results/001_QLearning/{d}-config.json", 'x') as f: json.dump(config._to_dict(), f, indent=4)
atexit.register(cleanup) get_nicknames(clients) print('The players playing are: ', end='') for i, client in enumerate(clients): print(client[NAME], end='') if not i == len(clients) - 1: print(', ', end='') else: print() # newline maze = random_maze(MAZE_WIDTH, server_config.MAP_COMPLEXITY, server_config.MAP_DENSITY, PLAYERS) game = GameState(maze) for id_, client in enumerate(clients): player = Player(maze.starting_locations[id_], client[NAME]) client['id'] = player.id client[PLAYER] = player game.add_player(player) init_player_data = [] for _, player in game.players.items(): init_player_data.append(player.serializable_init()) print("Sending maze data...") # Send the maze to all clients for client in clients: network.message.send_msg(client[SOCKET][0], str.encode(maze.as_json()))
def setUp(self): self.gs = GameState()
class A3CActorThread(object): def __init__(self, thread_index, global_network, initial_learning_rate, learning_rate_input, optimizer, max_global_time_step, device): self.thread_index = thread_index self.learning_rate_input = learning_rate_input self.max_global_time_step = max_global_time_step if USE_LSTM: self.local_network = A3CLSTMNetwork(STATE_DIM, STATE_CHN, ACTION_DIM, device, thread_index) else: self.local_network = A3CFFNetwork(STATE_DIM, STATE_CHN, ACTION_DIM, device, thread_index) self.local_network.create_loss(ENTROPY_BETA) self.gradients = tf.gradients(self.local_network.total_loss, self.local_network.get_vars()) clip_accum_grads = [ tf.clip_by_norm(accum_grad, 10.0) for accum_grad in self.gradients ] self.apply_gradients = optimizer.apply_gradients( zip(clip_accum_grads, global_network.get_vars())) # self.apply_gradients = optimizer.apply_gradients(zip(self.gradients, global_network.get_vars())) self.sync = self.local_network.sync_from(global_network) self.game_state = GameState(thread_index) self.local_t = 0 self.initial_learning_rate = initial_learning_rate # for log self.episode_reward = 0.0 self.episode_start_time = 0.0 self.prev_local_t = 0 return def _anneal_learning_rate(self, global_time_step): learning_rate = self.initial_learning_rate * \ (self.max_global_time_step - global_time_step) / self.max_global_time_step if learning_rate < 0.0: learning_rate = 0.0 return learning_rate def choose_action(self, policy_output): return np.random.choice(range(len(policy_output)), p=policy_output) def _record_log(self, sess, global_t, summary_writer, summary_op, reward_input, reward, time_input, living_time): summary_str = sess.run(summary_op, feed_dict={ reward_input: reward, time_input: living_time }) summary_writer.add_summary(summary_str, global_t) summary_writer.flush() return def process(self, sess, global_t, summary_writer, summary_op, reward_input, time_input): states = [] actions = [] rewards = [] values = [] terminal_end = False # reduce the influence of socket connecting time if self.episode_start_time == 0.0: self.episode_start_time = timestamp() # copy weight from global network sess.run(self.sync) start_local_t = self.local_t if USE_LSTM: start_lstm_state = self.local_network.lstm_state_out for i in range(LOCAL_T_MAX): policy_, value_ = self.local_network.run_policy_and_value( sess, self.game_state.s_t) if self.thread_index == 0 and self.local_t % 1000 == 0: print 'policy=', policy_ print 'value=', value_ action_id = self.choose_action(policy_) states.append(self.game_state.s_t) actions.append(action_id) values.append(value_) self.game_state.process(action_id) reward = self.game_state.reward terminal = self.game_state.terminal self.episode_reward += reward rewards.append(np.clip(reward, -1.0, 1.0)) self.local_t += 1 # s_t1 -> s_t self.game_state.update() if terminal: terminal_end = True episode_end_time = timestamp() living_time = episode_end_time - self.episode_start_time self._record_log(sess, global_t, summary_writer, summary_op, reward_input, self.episode_reward, time_input, living_time) print("global_t=%d / reward=%.2f / living_time=%.4f") % ( global_t, self.episode_reward, living_time) # reset variables self.episode_reward = 0.0 self.episode_start_time = episode_end_time self.game_state.reset() if USE_LSTM: self.local_network.reset_lstm_state() break # log if self.local_t % 40 == 0: living_time = timestamp() - self.episode_start_time self._record_log(sess, global_t, summary_writer, summary_op, reward_input, self.episode_reward, time_input, living_time) # -----------end of batch (LOCAL_T_MAX)-------------------- R = 0.0 if not terminal_end: R = self.local_network.run_value(sess, self.game_state.s_t) # print ('global_t: %d, R: %f') % (global_t, R) states.reverse() actions.reverse() rewards.reverse() values.reverse() batch_state = [] batch_action = [] batch_td = [] batch_R = [] for (ai, ri, si, Vi) in zip(actions, rewards, states, values): R = ri + GAMMA * R td = R - Vi action = np.zeros([ACTION_DIM]) action[ai] = 1 batch_state.append(si) batch_action.append(action) batch_td.append(td) batch_R.append(R) cur_learning_rate = self._anneal_learning_rate(global_t) if USE_LSTM: batch_state.reverse() batch_action.reverse() batch_td.reverse() batch_R.reverse() sess.run(self.apply_gradients, feed_dict={ self.local_network.state_input: batch_state, self.local_network.action_input: batch_action, self.local_network.td: batch_td, self.local_network.R: batch_R, self.local_network.step_size: [len(batch_state)], self.local_network.initial_lstm_state: start_lstm_state, self.learning_rate_input: cur_learning_rate }) else: sess.run(self.apply_gradients, feed_dict={ self.local_network.state_input: batch_state, self.local_network.action_input: batch_action, self.local_network.td: batch_td, self.local_network.R: batch_R, self.learning_rate_input: cur_learning_rate }) diff_local_t = self.local_t - start_local_t return diff_local_t
sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from game.spells import Effect, TimedEffect from game.spells import magic_missile, drain, shield, poison, recharge from game.characters import Boss, Wizard from game.strategies import SelectSpellByPredefinedOrder from game.game_state import GameState def hit_armor_mana(wizard): return (wizard.hit_points, wizard.armor, wizard.mana) # Now, suppose the same initial conditions, except that the boss has 14 hit points instead: mock_order = [recharge(), shield(), drain(), poison(), magic_missile()] wizard = Wizard(hit_points=10, mana=250, spell_selection_strategy=SelectSpellByPredefinedOrder(mock_order)) boss = Boss(hit_points=14, damage=8) game_state = GameState(wizard, boss) # -- Player turn -- # - Player has 10 hit points, 0 armor, 250 mana assert hit_armor_mana(wizard) == (10, 0, 250) # - Boss has 14 hit points assert boss.hit_points == 14 # Player casts Recharge. s = wizard.launch_spell(game_state) assert s.name == 'Recharge' # -- Boss turn -- # - Player has 10 hit points, 0 armor, 21 mana assert hit_armor_mana(wizard) == (10, 0, 21) # - Boss has 14 hit points assert boss.hit_points == 14
class Game: def __init__(self, settings: Settings) -> None: pygame.init() self.settings = settings self.font = pygame.font.SysFont("Arial", 18) self.display = pygame.display.set_mode( (self.settings.screen_width, self.settings.screen_height)) self.clock = pygame.time.Clock() pygame.display.set_caption(self.settings.window_caption) self.running = False self.current_level = LevelFactory.create(1) self.camera_position = self.current_level.initial_position self.game_state = GameState(self.current_level) self.level_renderer = MapRenderer( self.game_state, self.display, self.current_level.sprite, ) def run(self) -> None: self.running = True while self.running: self.handle_input() self.update() self.render() self.clock.tick(self.settings.frames_per_second) pygame.quit() quit() def update(self) -> None: self.game_state.update_camera(self.camera_position) def update_fps(self) -> pygame.surface.Surface: fps = str(int(self.clock.get_fps())) fps_text = self.font.render(fps, True, pygame.Color("coral")) return fps_text def render(self) -> None: self.display.fill((0, 0, 0)) self.display.blit(self.update_fps(), (10, 0)) self.level_renderer.render(self.game_state) pygame.display.update() def handle_input(self) -> None: for event in pygame.event.get(): if event.type == pygame.QUIT: self.running = False if event.type == pygame.KEYDOWN: if event.key == pygame.K_ESCAPE: self.running = False return elif event.key == pygame.K_RIGHT: self.camera_position = ( self.camera_position[0] + 1, self.camera_position[1], ) elif event.key == pygame.K_LEFT: self.camera_position = ( self.camera_position[0] - 1, self.camera_position[1], ) elif event.key == pygame.K_DOWN: self.camera_position = ( self.camera_position[0], self.camera_position[1] + 1, ) elif event.key == pygame.K_UP: self.camera_position = ( self.camera_position[0], self.camera_position[1] - 1, ) return
def PlayGame(stop_flag, attach_target): gs = GameState() items = [ "spellthief's edge", "Tear of the Goddess", "kindlegem", "amplifying Tome", "amplifying Tome", "Blasting Wand", "EverFrost" ] loop_count = 1 ff_time = 0 first_run = True s_time = time.time() increase_loop_dur = random.randint(3, 7) while Client.is_league_game_running(): gs.update() if gs.has_game_started() and not stop_flag['val']: if first_run is True: time.sleep(1) Actions.cast_spell('ctrl+4') Actions.cast_spell('y') time.sleep(1) Actions.purchase_recommend() first_run = False ff_time = time.time() + 60 * 15 Actions.action_troll_ward(gs.get_my_team_side()) if time.time() > ff_time: Actions.type_in_chat("/ff") ff_time += 60 if not gs.is_adc_dead() and not gs.is_i_dead(): if gs.is_yummi_attached() is True: if gs.is_adc_hp_low() is True: Actions.cast_spell('e') if gs.is_adc_hp_critical() is True: coord = gs.get_general_enemy_dir_coords() Actions.cast_spell('d') mouse.move(coord.x, coord.y) time.sleep(0.01) Actions.cast_spell('r') time.sleep(0.01) Actions.cast_spell('q') else: Actions.yummi_attach(attach_target['val']) if gs.is_i_dead(): Actions.purchase_recommend() if random.randint(0, 15) == 10: Actions.type_shit_in_chat() if gs.is_adc_dead() and not gs.is_i_dead(): if gs.get_fountain_coords() is not None: Actions.retreat(gs.get_fountain_coords()) if time.time() - s_time > increase_loop_dur: loop_count = loop_count + 1 increase_loop_dur = random.randint(3, 7) s_time = time.time() if loop_count % 3 == 0: if random.randint(0, 1) == 1: pass Actions.random_mouse_movement() time.sleep(0.15) if loop_count % 4 == 0: if random.randint(0, 1) == 1: Actions.level_all_spells('r', 'q', 'w', 'e') if loop_count % 15 == 0: if random.randint(0, 1) == 1: if gs.is_yummi_attached(): Actions.cast_spell('4') Actions.cast_spell('1') if loop_count % 15 == 0: if random.randint(0, 1) == 1: if gs.is_yummi_attached(): Actions.cast_spell('ctrl+4') time.sleep(0.04)
def successors_2(game_state): """Return a dict of {state:action} pairs. A state is a (wizard, boss, timed_effects) tuple, wizard: (hit_points, armor, mana); boss: (hit_points)""" result = {} w, b, in_use_spells_state = game_state spells_in_use = [name for name, timer in in_use_spells_state if timer > 1] # apply_effects gets callend once before launch spell is called # this clears up spells that have just timer=1 left spells = [magic_missile, drain, shield, poison, recharge] available_spells = [s for s in spells if s().name not in spells_in_use] # print game_state for available_spell in available_spells: boss = Boss(damage=boss_damage, hit_points=b) h,a,m = w wizard = Wizard(mana=m, hit_points=h, armor=a) gs = GameState(wizard, boss, level='hard') candidate_spell = available_spell() # print 'candidate_spell: %s' % candidate_spell.name # restore timed spells for name, timer in in_use_spells_state: gs.spells.append(get_spell_by_name(name)(timer)) gs.apply_effects('before_wizard') if not wizard.is_alive(): # can't happen at easy level # print 'wizard is dead by hard level 1' # print represent(gs) continue if not boss.is_alive(): # if boss dies by existing effects it is not necessary to add # a new spell result[represent(gs)] = None # boss is killed by existing spells else: # here boss is alive wizard.launch_spell(gs, spell=candidate_spell) if not wizard.is_alive(): # print 'wizard is dead launching spell' # print represent(gs) continue if not boss.is_alive(): # boss killed by spell result[represent(gs)] = candidate_spell.name else: gs.apply_effects('before_boss') if not wizard.is_alive(): # print 'wizard is dead by hard level 2' # print represent(gs) continue if not boss.is_alive(): # boss killed by timed effect result[represent(gs)] = candidate_spell.name else: boss.attack(wizard) if wizard.is_alive(): # if wizard is alive I log the launched spell # or else I don't care about this state result[represent(gs)] = candidate_spell.name return result
classifier_input = shape_input(game_data, game_data_handler) outcome = game_classifier.predict(classifier_input) if my_team == "red": outcome = 1 - outcome return outcome TAB_PRESSED = False ALT_PRESSED = False game_data_handler = GameData() game_classifier = GameClassifier().cpu() game_classifier.load() digit_classifier = DigitClassifier().cpu() digit_classifier.load() game_state_handler = GameState(game_data_handler, digit_classifier) auth = json.load(open("data/auth.json", encoding="utf-8")) while True: if not ALT_PRESSED and alt_pressed(): ALT_PRESSED = True elif ALT_PRESSED and not alt_pressed(): ALT_PRESSED = False if not TAB_PRESSED and tab_pressed(): TAB_PRESSED = True if not ALT_PRESSED: try: state, data = get_game_state(game_state_handler)
{ "championId": self.champions[8], "teamId": 200, "summonerId": self.summ_ids[8], "spell1Id": self.get_summoner_spell_index(self.summs[8][0]), # Exhaust "spell2Id": self.get_summoner_spell_index(self.summs[8][1]) # Flash }, { "championId": self.champions[9], "teamId": 200, "summonerId": self.summ_ids[9], "spell1Id": self.get_summoner_spell_index(self.summs[9][0]), # Ignite "spell2Id": self.get_summoner_spell_index(self.summs[9][1]) # Flash } ] } if __name__ == "__main__": test_img_index = 4 img = cv2.imread(f"test_data/frame_{test_img_index}.png", cv2.IMREAD_COLOR) champion_data = TestGameData(test_img_index) digit_classifier = DigitClassifier() digit_classifier.load() game_state_handler = GameState(champion_data, digit_classifier) state, data = game_state_handler.get_game_state(img) game_data, my_team = data for team in game_data: print(f"====== {team.upper()} TEAM ======") print(f"Towers destroyed: {game_data[team]['towers_destroyed']}") print(f"Dragons: {game_data[team]['dragons']}") for player_data in game_data[team]["players"]: print(player_data) print("***********************************************")
def new_game(): print("NEW GAME") g = GameState(3, 3, 3) play(g)