コード例 #1
0
    def replay(self, wps, pi_mcts, board_logs, plus_turns, weights,
               batch_size: int, beta: float) -> None:
        inputs = np.zeros((batch_size, 7, 5, 3))
        policy_true = np.zeros((batch_size, 315))
        values_true = np.zeros((batch_size))
        input_weights = np.zeros((batch_size))
        indices = np.random.choice(np.arange(len(wps)),
                                   size=batch_size,
                                   replace=False)
        mini_batch = [(wps[i], pi_mcts[i], board_logs[i], plus_turns[i],
                       weights[i]) for i in indices]

        for i, (winner, pi, board, plus_turn, weight) in enumerate(mini_batch):
            gs = GameState()
            gs.board = board
            inputs[i] = gs.to_inputs(flip=not plus_turn)  # shape=(4, 5, 5)
            policy_true[i] = pi**beta
            values_true[i] = winner
            input_weights[i] = weight

        # epochsは訓練データの反復回数、verbose=0は表示なしの設定
        self.model.fit(inputs, [policy_true, values_true],
                       sample_weight=input_weights,
                       epochs=1,
                       verbose=0,
                       shuffle=True)
コード例 #2
0
class Game:
    def __init__(self):
        # initialize imported components
        pygame.init()
        # number of frames rendered per second
        # (equal to number of loop turns)
        self.FPS = 60
        # clock to synchronize ticks with desired FPS
        self.clock = pygame.time.Clock()
        # main loop condition
        self.running = True

        # initialize subsystems
        self.drawer = Drawer()
        self.handler = EventHandler(self)
        self.game_state = GameState()
        self.manager = Manager()
        self.ticker = Ticker(self)

    def initialize(self):
        # create game objects from scaffold
        # and add them to proper subsystems
        scaffold = Scaffold()
        for obj in scaffold.objects:
            self.manager.add_object(obj)

        for obs in scaffold.observers:
            self.game_state.add_observer(obs)

        for e in scaffold.recurring_events:
            self.ticker.add_event(e)

    def shutdown(self):
        # this will break main loop
        self.running = False

    def exit(self):
        # close all imported components
        pygame.quit()
        # force exit program
        exit(1)

    def loop(self):
        self.initialize()

        # main loop
        while self.running:
            # synchronize
            self.clock.tick(self.FPS)
            # notify ticker
            self.ticker.tick()

            # render game objects
            self.drawer.draw(self.manager)
            # respond to events
            self.handler.handle()

        self.exit()
コード例 #3
0
ファイル: test_game_state.py プロジェクト: bababax11/slipe
class TestGameState(unittest.TestCase):
    def setUp(self):
        self.gs = GameState()

    def test_outputs_to_move_max(self):
        outputs = np.linspace(0.0, 1.0, 100)
        self.gs.outputs_to_move_max(outputs)
        self.assertTrue((self.gs.board == np.array([[-1, -1, -2, -1, -1],
                                                    [0, 0, 0, 0, 1],
                                                    [0, 0, 0, 0, 0],
                                                    [0, 0, 0, 0, 0],
                                                    [1, 1, 2, 1, 0]])).all())
        self.gs.outputs_to_move_max(outputs)
        self.assertFalse((self.gs.board == np.array([[-1, -1, -2, -1, -1],
                                                     [0, 0, 0, 0, 1],
                                                     [0, 0, 0, 0, 0],
                                                     [0, 0, 0, 0, 0],
                                                     [1, 1, 2, 1, 0]])).all())

    def test_outputs_to_move_random(self):
        outputs = np.linspace(0.0, 1.0, 100)
        outputs /= np.sum(outputs)
        self.gs.outputs_to_move_random(outputs)

    def test_flip(self):
        self.assertTrue((self.gs.to_inputs() == self.gs.to_inputs(True)).all())
コード例 #4
0
    def save_used_hash(self, gs: GameState, uct_nodes: List[UctNode], index):
        self.node_hash[index].flag = True
        self.used += 1

        current_node = uct_nodes[index]
        child_index = current_node.child_index
        child_move = current_node.child_move
        child_num = current_node.child_num
        for i in range(child_num):
            if child_index[i] != NOT_EXPANDED and self.node_hash[
                    child_index[i]].flag == False:
                gs.move_with_id(child_move[i])
                self.save_used_hash(gs, uct_nodes, child_index[i])
                gs.pop()
コード例 #5
0
 def new_game(self,
              layout,
              pacman_agent,
              ghost_agents,
              display,
              quiet=False,
              catch_exceptions=False):
     agents = [pacman_agent] + ghost_agents[:layout.get_ghost_count()]
     init_state = GameState()
     init_state.initialize(layout, len(ghost_agents))
     game = Game(agents, display, self, catch_exceptions=catch_exceptions)
     game.state = init_state
     self.initial_state = init_state.deep_copy()
     self.quiet = quiet
     return game
コード例 #6
0
def main():

    window = arcade.Window(constants.SCREEN_WIDTH, constants.SCREEN_HEIGHT,
                           constants.SCREEN_TITLE)

    views = {}

    game = Director(views)
    game_state = GameState()
    start_screen = StartScreen(views)
    controls = ControlScreen(views)
    game_over = GameOverView(views)
    win_screen = WinScreen(views)

    views['window'] = window
    views['game'] = game
    views['start_screen'] = start_screen
    views['controls'] = controls
    views['game_over'] = game_over
    views['win_screen'] = win_screen
    views['game_state'] = game_state

    views['window'].show_view(views['start_screen'])
    game_view = views['start_screen']
    game_view.setup()
    arcade.run()
コード例 #7
0
ファイル: utils.py プロジェクト: aut-ce/CE251-AI
def get_input():
    k, m, n = map(int, input().split())

    space = Space(n, m, k)
    initial_state = GameState()

    for i in range(k):
        input_string = input()
        column = Column()
        if input_string != '#':
            for card in input_string.split():
                column.add_card(Card(card[-1], int(card[:-1])))
        initial_state.add_column(column)

    space.add_state(initial_state)
    return space
コード例 #8
0
ファイル: model.py プロジェクト: bababax11/slipe
def take_action_eps_greedy(board: np.ndarray, episode: int, mainQN: QNetwork,
                           gs: GameState) -> Tuple[Winner, int]:
    """t+1での行動を返す
    boardは入力の型(README参照)で与えること
    returnは勝利判定と打った手"""
    # 徐々に最適行動のみをとる、ε-greedy法
    epsilon = 0.001 + 0.9 / (1.0 + episode)

    if epsilon <= np.random.uniform(0, 1):
        retTargetQs = mainQN.model.predict(board)[0]
        s = gs.outputs_to_move_max(retTargetQs)  # 最大の報酬を返す行動を選択する

    else:
        s = gs.random_play()  # ランダムに行動する

    return s
コード例 #9
0
 def __init__(self, player1: Player, player2: Player):
     self.__player1 = player1
     self.__player2 = player2
     self.__set_move_strategy(player1)
     self.__set_move_strategy(player2)
     self.__board_builder = Connect4BoardBuilder()
     self.__game_state = GameState(self.__board_builder, player1)
コード例 #10
0
    def __init__(self):
        # initialize imported components
        pygame.init()
        # number of frames rendered per second
        # (equal to number of loop turns)
        self.FPS = 60
        # clock to synchronize ticks with desired FPS
        self.clock = pygame.time.Clock()
        # main loop condition
        self.running = True

        # initialize subsystems
        self.drawer = Drawer()
        self.handler = EventHandler(self)
        self.game_state = GameState()
        self.manager = Manager()
        self.ticker = Ticker(self)
コード例 #11
0
def successors(game_state):
	"""Return a dict of {state:action} pairs.  A state is a (wizard, boss, timed_effects) tuple,
	wizard: (hit_points, armor, mana); boss: (hit_points)"""
	result = {}
	w, b, in_use_spells_state = game_state
	spells_in_use = [name for name, timer in in_use_spells_state if timer > 1]
	# apply_effects gets callend once before launch spell is called
	# this clears up spells that have just timer=1 left
	spells = [magic_missile, drain, shield, poison, recharge]
	available_spells = [s for s in spells if s().name not in spells_in_use]

	for available_spell in available_spells:
		boss = Boss(damage=boss_damage, hit_points=b)
		h,a,m = w
		wizard = Wizard(mana=m,
			hit_points=h,
			armor=a)
		gs = GameState(wizard, boss)

		candidate_spell = available_spell()

		# restore timed spells
		for name, timer in in_use_spells_state:
			gs.spells.append(get_spell_by_name(name)(timer))
			
		gs.apply_effects()
		if not boss.is_alive():
			result[represent(gs)] = None # boss is killed by existing spells
		else:
			wizard.launch_spell(gs, spell=candidate_spell)
			if not wizard.is_alive():
				continue
			if not boss.is_alive(): # boss killed by spell
				result[represent(gs)] = candidate_spell.name
			else:
				gs.apply_effects()
				if not boss.is_alive(): # boss killed by timed effect
					result[represent(gs)] = candidate_spell.name
				else:
					boss.attack(wizard)

					if wizard.is_alive():
						result[represent(gs)] = candidate_spell.name

	return result
コード例 #12
0
    def __init__(self, settings: Settings) -> None:
        pygame.init()

        self.settings = settings
        self.font = pygame.font.SysFont("Arial", 18)

        self.display = pygame.display.set_mode(
            (self.settings.screen_width, self.settings.screen_height))
        self.clock = pygame.time.Clock()
        pygame.display.set_caption(self.settings.window_caption)
        self.running = False
        self.current_level = LevelFactory.create(1)
        self.camera_position = self.current_level.initial_position
        self.game_state = GameState(self.current_level)
        self.level_renderer = MapRenderer(
            self.game_state,
            self.display,
            self.current_level.sprite,
        )
コード例 #13
0
    def delete_old_hash(self, gs: GameState, uct_nodes: List[UctNode]):
        """古いハッシュを削除"""
        # 現在の局面をルートとする局面以外を削除する
        root = self.find_same_hash_index(gs.board_hash(), gs.turn, gs.n_turns)

        self.used = 0
        for i in range(UCT_HASH_SIZE):
            self.node_hash[i].flag = False

        if root != UCT_HASH_SIZE:
            self.save_used_hash(gs, uct_nodes, root)

        self.enough_size = True
コード例 #14
0
    def __init__(self, thread_index, global_network, initial_learning_rate,
                 learning_rate_input, optimizer, max_global_time_step, device):

        self.thread_index = thread_index
        self.learning_rate_input = learning_rate_input
        self.max_global_time_step = max_global_time_step

        if USE_LSTM:
            self.local_network = A3CLSTMNetwork(STATE_DIM, STATE_CHN,
                                                ACTION_DIM, device,
                                                thread_index)
        else:
            self.local_network = A3CFFNetwork(STATE_DIM, STATE_CHN, ACTION_DIM,
                                              device, thread_index)
        self.local_network.create_loss(ENTROPY_BETA)
        self.gradients = tf.gradients(self.local_network.total_loss,
                                      self.local_network.get_vars())

        clip_accum_grads = [
            tf.clip_by_norm(accum_grad, 10.0) for accum_grad in self.gradients
        ]
        self.apply_gradients = optimizer.apply_gradients(
            zip(clip_accum_grads, global_network.get_vars()))
        # self.apply_gradients = optimizer.apply_gradients(zip(self.gradients, global_network.get_vars()))

        self.sync = self.local_network.sync_from(global_network)

        self.game_state = GameState(thread_index)

        self.local_t = 0
        self.initial_learning_rate = initial_learning_rate

        # for log
        self.episode_reward = 0.0
        self.episode_start_time = 0.0
        self.prev_local_t = 0
        return
コード例 #15
0
    def simulate(self, do_draw, **kwargs):

        self.game.init_physics()

        for i in range(self.repeats):
            clock = pygame.time.Clock()
            self.game.reset()
            self.game.init_physics()

            while not self.game.finished:
                first_team_state = GameState.from_game(self.game)
                second_team_state = first_team_state.get_complement()

                home_moves = self.first_model.get_next_moves(first_team_state)
                away_moves = self.second_model.get_next_moves(
                    second_team_state)

                self.game.execute_commands(home_moves, away_moves)

                self.game.update(self.time_step)

                if do_draw:
                    surface = kwargs["surface"]
                    camera = kwargs["camera"]

                    surface.fill(BLACK)
                    self.game.draw(surface, camera)

                    fps = int(clock.get_fps())
                    font = pygame.font.SysFont(pygame.font.get_default_font(),
                                               24)
                    text_surface = font.render(f"FPS: {fps}", True,
                                               (255, 255, 255))
                    surface.blit(text_surface, (50, 5))

                    pygame.display.flip()

                clock.tick()

            self.final_scores.append(
                (self.game.home_score, self.game.away_score))
コード例 #16
0
class A3CActorThread(object):
    def __init__(self, thread_index, global_network, initial_learning_rate,
                 learning_rate_input, optimizer, max_global_time_step, device):

        self.thread_index = thread_index
        self.learning_rate_input = learning_rate_input
        self.max_global_time_step = max_global_time_step

        if USE_LSTM:
            self.local_network = A3CLSTMNetwork(STATE_DIM, STATE_CHN,
                                                ACTION_DIM, device,
                                                thread_index)
        else:
            self.local_network = A3CFFNetwork(STATE_DIM, STATE_CHN, ACTION_DIM,
                                              device, thread_index)
        self.local_network.create_loss(ENTROPY_BETA)
        self.gradients = tf.gradients(self.local_network.total_loss,
                                      self.local_network.get_vars())

        clip_accum_grads = [
            tf.clip_by_norm(accum_grad, 10.0) for accum_grad in self.gradients
        ]
        self.apply_gradients = optimizer.apply_gradients(
            zip(clip_accum_grads, global_network.get_vars()))
        # self.apply_gradients = optimizer.apply_gradients(zip(self.gradients, global_network.get_vars()))

        self.sync = self.local_network.sync_from(global_network)

        self.game_state = GameState(thread_index)

        self.local_t = 0
        self.initial_learning_rate = initial_learning_rate

        # for log
        self.episode_reward = 0.0
        self.episode_start_time = 0.0
        self.prev_local_t = 0
        return

    def _anneal_learning_rate(self, global_time_step):
        learning_rate = self.initial_learning_rate * \
            (self.max_global_time_step - global_time_step) / self.max_global_time_step
        if learning_rate < 0.0:
            learning_rate = 0.0
        return learning_rate

    def choose_action(self, policy_output):
        return np.random.choice(range(len(policy_output)), p=policy_output)

    def _record_log(self, sess, global_t, summary_writer, summary_op,
                    reward_input, reward, time_input, living_time):
        summary_str = sess.run(summary_op,
                               feed_dict={
                                   reward_input: reward,
                                   time_input: living_time
                               })
        summary_writer.add_summary(summary_str, global_t)
        summary_writer.flush()
        return

    def _discount_accum_reward(self, rewards, running_add=0.0, gamma=0.99):
        """ discounted the reward using gamma
        """
        discounted_r = np.zeros_like(rewards, dtype=np.float32)
        for t in reversed(range(len(rewards))):
            running_add = rewards[t] + running_add * gamma
            discounted_r[t] = running_add

        return list(discounted_r)

    def process(self, sess, global_t, summary_writer, summary_op, reward_input,
                time_input):
        batch_state = []
        batch_action = []
        batch_reward = []

        terminal_end = False
        # reduce the influence of socket connecting time
        if self.episode_start_time == 0.0:
            self.episode_start_time = timestamp()

        # copy weight from global network
        sess.run(self.sync)

        start_local_t = self.local_t
        if USE_LSTM:
            start_lstm_state = self.local_network.lstm_state_out

        for i in range(LOCAL_T_MAX):
            policy_ = self.local_network.run_policy(sess, self.game_state.s_t)
            if self.thread_index == 0 and self.local_t % 1000 == 0:
                print 'policy=', policy_

            action_id = self.choose_action(policy_)

            action_onehot = np.zeros([ACTION_DIM])
            action_onehot[action_id] = 1
            batch_state.append(self.game_state.s_t)
            batch_action.append(action_onehot)

            self.game_state.process(action_id)
            reward = self.game_state.reward
            terminal = self.game_state.terminal

            self.episode_reward += reward
            batch_reward.append(np.clip(reward, -1.0, 1.0))

            self.local_t += 1

            # s_t1 -> s_t
            self.game_state.update()

            if terminal:
                terminal_end = True
                episode_end_time = timestamp()
                living_time = episode_end_time - self.episode_start_time

                self._record_log(sess, global_t, summary_writer, summary_op,
                                 reward_input, self.episode_reward, time_input,
                                 living_time)

                print("global_t=%d / reward=%.2f / living_time=%.4f") % (
                    global_t, self.episode_reward, living_time)

                # reset variables
                self.episode_reward = 0.0
                self.episode_start_time = episode_end_time
                self.game_state.reset()
                if USE_LSTM:
                    self.local_network.reset_lstm_state()
                break
            # log
            if self.local_t % 40 == 0:
                living_time = timestamp() - self.episode_start_time
                self._record_log(sess, global_t, summary_writer, summary_op,
                                 reward_input, self.episode_reward, time_input,
                                 living_time)
        # -----------end of batch (LOCAL_T_MAX)--------------------

        R = 0.0
        if not terminal_end:
            R = self.local_network.run_value(sess, self.game_state.s_t)
        # print ('global_t: %d, R: %f') % (global_t, R)

        batch_value = self.local_network.run_batch_value(
            sess, batch_state, start_lstm_state)
        batch_R = self._discount_accum_reward(batch_reward, R, GAMMA)
        batch_td = np.array(batch_R) - np.array(batch_value)
        cur_learning_rate = self._anneal_learning_rate(global_t)

        # print("=" * 60)
        # print(batch_value)
        # print(self.local_network.run_batch_value(sess, batch_state, start_lstm_state))
        # print("=" * 60)
        # import sys
        # sys.exit()

        if USE_LSTM:
            sess.run(self.apply_gradients,
                     feed_dict={
                         self.local_network.state_input: batch_state,
                         self.local_network.action_input: batch_action,
                         self.local_network.td: batch_td,
                         self.local_network.R: batch_R,
                         self.local_network.step_size: [len(batch_state)],
                         self.local_network.initial_lstm_state:
                         start_lstm_state,
                         self.learning_rate_input: cur_learning_rate
                     })
        else:
            sess.run(self.apply_gradients,
                     feed_dict={
                         self.local_network.state_input: batch_state,
                         self.local_network.action_input: batch_action,
                         self.local_network.td: batch_td,
                         self.local_network.R: batch_R,
                         self.learning_rate_input: cur_learning_rate
                     })

        diff_local_t = self.local_t - start_local_t
        return diff_local_t
コード例 #17
0
from classifier.digit_classifier import DigitClassifier
import cv2

if __name__ == "__main__":
    if len(argv) < 2:
        print("Provide an index")

    test_img_index = int(argv[1])
    img = cv2.imread(f"test_data/frame_{test_img_index}.png", cv2.IMREAD_COLOR)
    game_data_handler = TestGameData(test_img_index)
    game_classifier = GameClassifier()
    game_classifier.load()
    digit_classifier = DigitClassifier()
    digit_classifier.load()

    game_state = GameState(game_data_handler, digit_classifier)
    state, data = game_state.get_game_state(img)
    game_data, my_team = data
    classifier_input = game_dataset.shape_input(game_data, game_data_handler)

    outcome = game_classifier.predict(classifier_input)
    if my_team == "red":
        outcome = 1 - outcome

    pct = f"{outcome * 100:.2f}"

    print(f"Probability of win: {pct}%", flush=True)

    shape = (img.shape[1] // 2, img.shape[0] // 2)

    resized = cv2.resize(img, shape, interpolation=cv2.INTER_AREA)
コード例 #18
0
ファイル: model.py プロジェクト: bababax11/slipe
def learn(model_config_path=None, weight_path=None):
    config = Config()
    qc = config.Qlearn

    total_reward_vec = np.zeros(qc.num_consecutive_iterations)  # 各試行の報酬を格納
    # Qネットワークとメモリ、Actorの生成--------------------------------------------------------
    if model_config_path is None or weight_path is None:
        mainQN = QNetwork(config)  # メインのQネットワーク
        mainQN.build()
        targetQN = QNetwork(config)  # 価値を計算するQネットワーク
        targetQN.build()
    else:
        mainQN = QNetwork(config)
        success_load = mainQN.load(model_config_path, weight_path)
        if not success_load:
            raise FileNotFoundError(
                f"{model_config_path} {weight_path}が読み込めませんでした")
        targetQN = QNetwork(config)
        targetQN.load(model_config_path, weight_path)
    memory = Memory(max_size=qc.memory_size)

    for episode in trange(qc.num_episodes):  # 試行数分繰り返す
        gs = GameState()
        state = gs.random_play()  # 1step目は適当な行動をとる
        episode_reward = 0

        targetQN.model.set_weights(
            mainQN.model.get_weights())  # 行動決定と価値計算のQネットワークをおなじにする

        for t in range(qc.max_number_of_steps):  # 2手のループ
            board = gs.to_inputs()

            state, action = take_action_eps_greedy(board, episode, mainQN,
                                                   gs)  # 時刻tでの行動を決定する
            # next_state, reward, done, info = env.step(action)   # 行動a_tの実行による、s_{t+1}, _R{t}を計算する

            # verbose ==========
            # if t % 10 == 9:
            #     print(gs)
            # ==================

            if state == Winner.minus:
                reward = qc.reward_win  # 報酬
            else:
                reward = 0

            next_board = gs.to_inputs()

            # board = next_board  # 状態更新
            # 1施行終了時の処理
            if state != Winner.not_ended:
                episode_reward += reward  # 合計報酬を更新
                memory.add((board, action, reward, next_board))  # メモリの更新する
                # Qネットワークの重みを学習・更新する replay
                if len(memory) > qc.batch_size:  # and not islearned:
                    mainQN.replay(memory, qc.batch_size, qc.gamma, targetQN)
                if qc.DQN_MODE:
                    targetQN.model.set_weights(
                        mainQN.model.get_weights())  # 行動決定と価値計算のQネットワークをおなじにする

                total_reward_vec = np.hstack(
                    (total_reward_vec[1:], episode_reward))  # 報酬を記録
                print(
                    '%d/%d: Episode finished after %d time steps / mean %f winner: %s'
                    % (episode + 1, qc.num_episodes, t + 1,
                       total_reward_vec.mean(),
                       'plus' if state == Winner.plus else 'minus'))
                break

            state, _ = gs.random_play()

            if state == Winner.plus:
                reward = qc.reward_lose
            else:
                reward = 0

            episode_reward += reward  # 合計報酬を更新
            memory.add((board, action, reward, next_board))  # メモリの更新する

            # Qネットワークの重みを学習・更新する replay
            if len(memory) > qc.batch_size:  # and not islearned:
                mainQN.replay(memory, qc.batch_size, qc.gamma, targetQN)

            if qc.DQN_MODE:
                targetQN.model.set_weights(
                    mainQN.model.get_weights())  # 行動決定と価値計算のQネットワークをおなじにする

            # 1施行終了時の処理
            if state != Winner.not_ended:
                total_reward_vec = np.hstack(
                    (total_reward_vec[1:], episode_reward))  # 報酬を記録
                print(
                    '%d/%d: Episode finished after %d time steps / mean %f winner: %s'
                    % (episode + 1, qc.num_episodes, t + 1,
                       total_reward_vec.mean(),
                       'plus' if state == Winner.plus else 'minus'))
                break

        # 複数施行の平均報酬で終了を判断
        # if total_reward_vec.mean() >= goal_average_reward:
        #     print('Episode %d train agent successfuly!' % episode)
        # islearned = True
        if episode % qc.save_interval == qc.save_interval - 1:
            d = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
            mainQN.save(f"results/001_QLearning/{d}-mainQN.json",
                        f"results/001_QLearning/{d}-mainQN.h5")
            with open(f"results/001_QLearning/{d}-config.json", 'x') as f:
                json.dump(config._to_dict(), f, indent=4)

    # 最後に保存(直前にしていればしない)
    if episode % qc.save_interval != qc.save_interval - 1:
        d = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
        mainQN.save(f"results/001_QLearning/{d}-mainQN.json",
                    f"results/001_QLearning/{d}-mainQN.h5")
        with open(f"results/001_QLearning/{d}-config.json", 'x') as f:
            json.dump(config._to_dict(), f, indent=4)
コード例 #19
0

atexit.register(cleanup)

get_nicknames(clients)
print('The players playing are: ', end='')
for i, client in enumerate(clients):
    print(client[NAME], end='')
    if not i == len(clients) - 1:
        print(', ', end='')
    else:
        print()  # newline

maze = random_maze(MAZE_WIDTH, server_config.MAP_COMPLEXITY,
                   server_config.MAP_DENSITY, PLAYERS)
game = GameState(maze)

for id_, client in enumerate(clients):
    player = Player(maze.starting_locations[id_], client[NAME])
    client['id'] = player.id
    client[PLAYER] = player
    game.add_player(player)

init_player_data = []
for _, player in game.players.items():
    init_player_data.append(player.serializable_init())

print("Sending maze data...")
# Send the maze to all clients
for client in clients:
    network.message.send_msg(client[SOCKET][0], str.encode(maze.as_json()))
コード例 #20
0
ファイル: test_game_state.py プロジェクト: bababax11/slipe
 def setUp(self):
     self.gs = GameState()
コード例 #21
0
class A3CActorThread(object):
    def __init__(self, thread_index, global_network, initial_learning_rate,
                 learning_rate_input, optimizer, max_global_time_step, device):

        self.thread_index = thread_index
        self.learning_rate_input = learning_rate_input
        self.max_global_time_step = max_global_time_step

        if USE_LSTM:
            self.local_network = A3CLSTMNetwork(STATE_DIM, STATE_CHN,
                                                ACTION_DIM, device,
                                                thread_index)
        else:
            self.local_network = A3CFFNetwork(STATE_DIM, STATE_CHN, ACTION_DIM,
                                              device, thread_index)
        self.local_network.create_loss(ENTROPY_BETA)
        self.gradients = tf.gradients(self.local_network.total_loss,
                                      self.local_network.get_vars())

        clip_accum_grads = [
            tf.clip_by_norm(accum_grad, 10.0) for accum_grad in self.gradients
        ]
        self.apply_gradients = optimizer.apply_gradients(
            zip(clip_accum_grads, global_network.get_vars()))
        # self.apply_gradients = optimizer.apply_gradients(zip(self.gradients, global_network.get_vars()))

        self.sync = self.local_network.sync_from(global_network)

        self.game_state = GameState(thread_index)

        self.local_t = 0
        self.initial_learning_rate = initial_learning_rate

        # for log
        self.episode_reward = 0.0
        self.episode_start_time = 0.0
        self.prev_local_t = 0
        return

    def _anneal_learning_rate(self, global_time_step):
        learning_rate = self.initial_learning_rate * \
            (self.max_global_time_step - global_time_step) / self.max_global_time_step
        if learning_rate < 0.0:
            learning_rate = 0.0
        return learning_rate

    def choose_action(self, policy_output):
        return np.random.choice(range(len(policy_output)), p=policy_output)

    def _record_log(self, sess, global_t, summary_writer, summary_op,
                    reward_input, reward, time_input, living_time):
        summary_str = sess.run(summary_op,
                               feed_dict={
                                   reward_input: reward,
                                   time_input: living_time
                               })
        summary_writer.add_summary(summary_str, global_t)
        summary_writer.flush()
        return

    def process(self, sess, global_t, summary_writer, summary_op, reward_input,
                time_input):
        states = []
        actions = []
        rewards = []
        values = []

        terminal_end = False
        # reduce the influence of socket connecting time
        if self.episode_start_time == 0.0:
            self.episode_start_time = timestamp()

        # copy weight from global network
        sess.run(self.sync)

        start_local_t = self.local_t
        if USE_LSTM:
            start_lstm_state = self.local_network.lstm_state_out

        for i in range(LOCAL_T_MAX):
            policy_, value_ = self.local_network.run_policy_and_value(
                sess, self.game_state.s_t)
            if self.thread_index == 0 and self.local_t % 1000 == 0:
                print 'policy=', policy_
                print 'value=', value_

            action_id = self.choose_action(policy_)

            states.append(self.game_state.s_t)
            actions.append(action_id)
            values.append(value_)

            self.game_state.process(action_id)
            reward = self.game_state.reward
            terminal = self.game_state.terminal

            self.episode_reward += reward
            rewards.append(np.clip(reward, -1.0, 1.0))

            self.local_t += 1

            # s_t1 -> s_t
            self.game_state.update()

            if terminal:
                terminal_end = True
                episode_end_time = timestamp()
                living_time = episode_end_time - self.episode_start_time

                self._record_log(sess, global_t, summary_writer, summary_op,
                                 reward_input, self.episode_reward, time_input,
                                 living_time)

                print("global_t=%d / reward=%.2f / living_time=%.4f") % (
                    global_t, self.episode_reward, living_time)

                # reset variables
                self.episode_reward = 0.0
                self.episode_start_time = episode_end_time
                self.game_state.reset()
                if USE_LSTM:
                    self.local_network.reset_lstm_state()
                break
            # log
            if self.local_t % 40 == 0:
                living_time = timestamp() - self.episode_start_time
                self._record_log(sess, global_t, summary_writer, summary_op,
                                 reward_input, self.episode_reward, time_input,
                                 living_time)
        # -----------end of batch (LOCAL_T_MAX)--------------------

        R = 0.0
        if not terminal_end:
            R = self.local_network.run_value(sess, self.game_state.s_t)
        # print ('global_t: %d, R: %f') % (global_t, R)

        states.reverse()
        actions.reverse()
        rewards.reverse()
        values.reverse()

        batch_state = []
        batch_action = []
        batch_td = []
        batch_R = []

        for (ai, ri, si, Vi) in zip(actions, rewards, states, values):
            R = ri + GAMMA * R
            td = R - Vi
            action = np.zeros([ACTION_DIM])
            action[ai] = 1

            batch_state.append(si)
            batch_action.append(action)
            batch_td.append(td)
            batch_R.append(R)

        cur_learning_rate = self._anneal_learning_rate(global_t)
        if USE_LSTM:
            batch_state.reverse()
            batch_action.reverse()
            batch_td.reverse()
            batch_R.reverse()
            sess.run(self.apply_gradients,
                     feed_dict={
                         self.local_network.state_input: batch_state,
                         self.local_network.action_input: batch_action,
                         self.local_network.td: batch_td,
                         self.local_network.R: batch_R,
                         self.local_network.step_size: [len(batch_state)],
                         self.local_network.initial_lstm_state:
                         start_lstm_state,
                         self.learning_rate_input: cur_learning_rate
                     })
        else:
            sess.run(self.apply_gradients,
                     feed_dict={
                         self.local_network.state_input: batch_state,
                         self.local_network.action_input: batch_action,
                         self.local_network.td: batch_td,
                         self.local_network.R: batch_R,
                         self.learning_rate_input: cur_learning_rate
                     })

        diff_local_t = self.local_t - start_local_t
        return diff_local_t
コード例 #22
0
    sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))

from game.spells import Effect, TimedEffect
from game.spells import magic_missile, drain, shield, poison, recharge
from game.characters import Boss, Wizard
from game.strategies import SelectSpellByPredefinedOrder
from game.game_state import GameState

def hit_armor_mana(wizard):
	return (wizard.hit_points, wizard.armor, wizard.mana)

# Now, suppose the same initial conditions, except that the boss has 14 hit points instead:
mock_order = [recharge(), shield(), drain(), poison(), magic_missile()]
wizard = Wizard(hit_points=10, mana=250, spell_selection_strategy=SelectSpellByPredefinedOrder(mock_order))
boss = Boss(hit_points=14, damage=8)
game_state = GameState(wizard, boss)

# -- Player turn --
# - Player has 10 hit points, 0 armor, 250 mana
assert hit_armor_mana(wizard) == (10, 0, 250)
# - Boss has 14 hit points
assert boss.hit_points == 14
# Player casts Recharge.
s = wizard.launch_spell(game_state)
assert s.name == 'Recharge'

# -- Boss turn --
# - Player has 10 hit points, 0 armor, 21 mana
assert hit_armor_mana(wizard) == (10, 0, 21)
# - Boss has 14 hit points
assert boss.hit_points == 14
コード例 #23
0
class Game:
    def __init__(self, settings: Settings) -> None:
        pygame.init()

        self.settings = settings
        self.font = pygame.font.SysFont("Arial", 18)

        self.display = pygame.display.set_mode(
            (self.settings.screen_width, self.settings.screen_height))
        self.clock = pygame.time.Clock()
        pygame.display.set_caption(self.settings.window_caption)
        self.running = False
        self.current_level = LevelFactory.create(1)
        self.camera_position = self.current_level.initial_position
        self.game_state = GameState(self.current_level)
        self.level_renderer = MapRenderer(
            self.game_state,
            self.display,
            self.current_level.sprite,
        )

    def run(self) -> None:
        self.running = True

        while self.running:
            self.handle_input()
            self.update()
            self.render()
            self.clock.tick(self.settings.frames_per_second)

        pygame.quit()
        quit()

    def update(self) -> None:
        self.game_state.update_camera(self.camera_position)

    def update_fps(self) -> pygame.surface.Surface:
        fps = str(int(self.clock.get_fps()))
        fps_text = self.font.render(fps, True, pygame.Color("coral"))
        return fps_text

    def render(self) -> None:
        self.display.fill((0, 0, 0))
        self.display.blit(self.update_fps(), (10, 0))
        self.level_renderer.render(self.game_state)
        pygame.display.update()

    def handle_input(self) -> None:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                self.running = False
            if event.type == pygame.KEYDOWN:
                if event.key == pygame.K_ESCAPE:
                    self.running = False
                    return
                elif event.key == pygame.K_RIGHT:
                    self.camera_position = (
                        self.camera_position[0] + 1,
                        self.camera_position[1],
                    )
                elif event.key == pygame.K_LEFT:
                    self.camera_position = (
                        self.camera_position[0] - 1,
                        self.camera_position[1],
                    )
                elif event.key == pygame.K_DOWN:
                    self.camera_position = (
                        self.camera_position[0],
                        self.camera_position[1] + 1,
                    )
                elif event.key == pygame.K_UP:
                    self.camera_position = (
                        self.camera_position[0],
                        self.camera_position[1] - 1,
                    )
            return
コード例 #24
0
def PlayGame(stop_flag, attach_target):
    gs = GameState()
    items = [
        "spellthief's edge", "Tear of the Goddess", "kindlegem",
        "amplifying Tome", "amplifying Tome", "Blasting Wand", "EverFrost"
    ]
    loop_count = 1
    ff_time = 0
    first_run = True

    s_time = time.time()
    increase_loop_dur = random.randint(3, 7)

    while Client.is_league_game_running():
        gs.update()

        if gs.has_game_started() and not stop_flag['val']:
            if first_run is True:
                time.sleep(1)
                Actions.cast_spell('ctrl+4')
                Actions.cast_spell('y')
                time.sleep(1)
                Actions.purchase_recommend()
                first_run = False
                ff_time = time.time() + 60 * 15
                Actions.action_troll_ward(gs.get_my_team_side())

            if time.time() > ff_time:
                Actions.type_in_chat("/ff")
                ff_time += 60

            if not gs.is_adc_dead() and not gs.is_i_dead():
                if gs.is_yummi_attached() is True:
                    if gs.is_adc_hp_low() is True:
                        Actions.cast_spell('e')

                    if gs.is_adc_hp_critical() is True:
                        coord = gs.get_general_enemy_dir_coords()
                        Actions.cast_spell('d')
                        mouse.move(coord.x, coord.y)
                        time.sleep(0.01)
                        Actions.cast_spell('r')
                        time.sleep(0.01)
                        Actions.cast_spell('q')
                else:
                    Actions.yummi_attach(attach_target['val'])

            if gs.is_i_dead():
                Actions.purchase_recommend()

                if random.randint(0, 15) == 10:
                    Actions.type_shit_in_chat()

            if gs.is_adc_dead() and not gs.is_i_dead():
                if gs.get_fountain_coords() is not None:
                    Actions.retreat(gs.get_fountain_coords())

            if time.time() - s_time > increase_loop_dur:
                loop_count = loop_count + 1

                increase_loop_dur = random.randint(3, 7)
                s_time = time.time()

            if loop_count % 3 == 0:
                if random.randint(0, 1) == 1:
                    pass
                    Actions.random_mouse_movement()
                    time.sleep(0.15)

            if loop_count % 4 == 0:
                if random.randint(0, 1) == 1:
                    Actions.level_all_spells('r', 'q', 'w', 'e')

            if loop_count % 15 == 0:
                if random.randint(0, 1) == 1:
                    if gs.is_yummi_attached():
                        Actions.cast_spell('4')
                        Actions.cast_spell('1')

            if loop_count % 15 == 0:
                if random.randint(0, 1) == 1:
                    if gs.is_yummi_attached():
                        Actions.cast_spell('ctrl+4')

            time.sleep(0.04)
コード例 #25
0
def successors_2(game_state):
	"""Return a dict of {state:action} pairs.  A state is a (wizard, boss, timed_effects) tuple,
	wizard: (hit_points, armor, mana); boss: (hit_points)"""
	result = {}
	w, b, in_use_spells_state = game_state
	spells_in_use = [name for name, timer in in_use_spells_state if timer > 1]
	# apply_effects gets callend once before launch spell is called
	# this clears up spells that have just timer=1 left
	spells = [magic_missile, drain, shield, poison, recharge]
	available_spells = [s for s in spells if s().name not in spells_in_use]

	# print game_state

	for available_spell in available_spells:
		boss = Boss(damage=boss_damage, hit_points=b)
		h,a,m = w
		wizard = Wizard(mana=m,
			hit_points=h,
			armor=a)
		gs = GameState(wizard, boss, level='hard')

		candidate_spell = available_spell()
		# print 'candidate_spell: %s' % candidate_spell.name

		# restore timed spells
		for name, timer in in_use_spells_state:
			gs.spells.append(get_spell_by_name(name)(timer))
			
		gs.apply_effects('before_wizard')
		if not wizard.is_alive():
			# can't happen at easy level
			# print 'wizard is dead by hard level 1'
			# print represent(gs)
			continue

		if not boss.is_alive():
			# if boss dies by existing effects it is not necessary to add
			# a new spell
			result[represent(gs)] = None # boss is killed by existing spells
		else:
			# here boss is alive
			wizard.launch_spell(gs, spell=candidate_spell)
			if not wizard.is_alive():
				# print 'wizard is dead launching spell'
				# print represent(gs)
				continue
			if not boss.is_alive(): # boss killed by spell
				result[represent(gs)] = candidate_spell.name
			else:
				gs.apply_effects('before_boss')
				if not wizard.is_alive():
					# print 'wizard is dead by hard level 2'
					# print represent(gs)
					continue
				if not boss.is_alive(): # boss killed by timed effect
					result[represent(gs)] = candidate_spell.name
				else:
					boss.attack(wizard)

					if wizard.is_alive():
						# if wizard is alive I log the launched spell
						# or else I don't care about this state
						result[represent(gs)] = candidate_spell.name

	return result
コード例 #26
0
    classifier_input = shape_input(game_data, game_data_handler)
    outcome = game_classifier.predict(classifier_input)
    if my_team == "red":
        outcome = 1 - outcome
    return outcome


TAB_PRESSED = False
ALT_PRESSED = False

game_data_handler = GameData()
game_classifier = GameClassifier().cpu()
game_classifier.load()
digit_classifier = DigitClassifier().cpu()
digit_classifier.load()
game_state_handler = GameState(game_data_handler, digit_classifier)

auth = json.load(open("data/auth.json", encoding="utf-8"))

while True:
    if not ALT_PRESSED and alt_pressed():
        ALT_PRESSED = True
    elif ALT_PRESSED and not alt_pressed():
        ALT_PRESSED = False

    if not TAB_PRESSED and tab_pressed():
        TAB_PRESSED = True
        if not ALT_PRESSED:
            try:
                state, data = get_game_state(game_state_handler)
コード例 #27
0
                {
                    "championId": self.champions[8], "teamId": 200, "summonerId": self.summ_ids[8],
                    "spell1Id": self.get_summoner_spell_index(self.summs[8][0]), # Exhaust
                    "spell2Id": self.get_summoner_spell_index(self.summs[8][1]) # Flash
                },
                {
                    "championId": self.champions[9], "teamId": 200, "summonerId": self.summ_ids[9],
                    "spell1Id": self.get_summoner_spell_index(self.summs[9][0]), # Ignite
                    "spell2Id": self.get_summoner_spell_index(self.summs[9][1]) # Flash
                } 
            ]
        }

if __name__ == "__main__":
    test_img_index = 4
    img = cv2.imread(f"test_data/frame_{test_img_index}.png", cv2.IMREAD_COLOR)
    champion_data = TestGameData(test_img_index)
    digit_classifier = DigitClassifier()
    digit_classifier.load()
    game_state_handler = GameState(champion_data, digit_classifier)
    state, data = game_state_handler.get_game_state(img)
    game_data, my_team = data
    for team in game_data:
        print(f"====== {team.upper()} TEAM ======")
        print(f"Towers destroyed: {game_data[team]['towers_destroyed']}")
        print(f"Dragons: {game_data[team]['dragons']}")
        for player_data in game_data[team]["players"]:
            print(player_data)
            print("***********************************************")
    
コード例 #28
0
def new_game():
    print("NEW GAME")
    g = GameState(3, 3, 3)
    play(g)