Python GameState.random_play Examples

Programming Language: Python

Namespace/Package Name: game.game_state

Class/Type: GameState

Method/Function: random_play

Examples at hotexamples.com: 2

Python GameState.random_play - 2 examples found. These are the top rated real world Python examples of game.game_state.GameState.random_play extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GameState(15)

update(3)

to_inputs(3)

apply_effects(3)

reset(2)

random_play(2)

process(2)

outputs_to_move_max(2)

get_game_state(2)

is_adc_hp_critical(1)

pop(1)

outputs_to_move_random(1)

move_with_id(1)

is_yummi_attached(1)

is_i_dead(1)

is_adc_hp_low(1)

initialize(1)

is_adc_dead(1)

add_column(1)

has_game_started(1)

get_my_team_side(1)

get_general_enemy_dir_coords(1)

get_game_duration(1)

get_fountain_coords(1)

from_game(1)

deep_copy(1)

board_hash(1)

board(1)

add_player(1)

add_observer(1)

update_camera(1)

Example #1

Show file

File: model.py Project: bababax11/slipe

def take_action_eps_greedy(board: np.ndarray, episode: int, mainQN: QNetwork,
                           gs: GameState) -> Tuple[Winner, int]:
    """ｔ＋１での行動を返す
    boardは入力の型(README参照)で与えること
    returnは勝利判定と打った手"""
    # 徐々に最適行動のみをとる、ε-greedy法
    epsilon = 0.001 + 0.9 / (1.0 + episode)

    if epsilon <= np.random.uniform(0, 1):
        retTargetQs = mainQN.model.predict(board)[0]
        s = gs.outputs_to_move_max(retTargetQs)  # 最大の報酬を返す行動を選択する

    else:
        s = gs.random_play()  # ランダムに行動する

    return s

Example #2

Show file

File: model.py Project: bababax11/slipe

def learn(model_config_path=None, weight_path=None):
    config = Config()
    qc = config.Qlearn

    total_reward_vec = np.zeros(qc.num_consecutive_iterations)  # 各試行の報酬を格納
    # Qネットワークとメモリ、Actorの生成--------------------------------------------------------
    if model_config_path is None or weight_path is None:
        mainQN = QNetwork(config)  # メインのQネットワーク
        mainQN.build()
        targetQN = QNetwork(config)  # 価値を計算するQネットワーク
        targetQN.build()
    else:
        mainQN = QNetwork(config)
        success_load = mainQN.load(model_config_path, weight_path)
        if not success_load:
            raise FileNotFoundError(
                f"{model_config_path} {weight_path}が読み込めませんでした")
        targetQN = QNetwork(config)
        targetQN.load(model_config_path, weight_path)
    memory = Memory(max_size=qc.memory_size)

    for episode in trange(qc.num_episodes):  # 試行数分繰り返す
        gs = GameState()
        state = gs.random_play()  # 1step目は適当な行動をとる
        episode_reward = 0

        targetQN.model.set_weights(
            mainQN.model.get_weights())  # 行動決定と価値計算のQネットワークをおなじにする

        for t in range(qc.max_number_of_steps):  # 2手のループ
            board = gs.to_inputs()

            state, action = take_action_eps_greedy(board, episode, mainQN,
                                                   gs)  # 時刻tでの行動を決定する
            # next_state, reward, done, info = env.step(action)   # 行動a_tの実行による、s_{t+1}, _R{t}を計算する

            # verbose ==========
            # if t % 10 == 9:
            #     print(gs)
            # ==================

            if state == Winner.minus:
                reward = qc.reward_win  # 報酬
            else:
                reward = 0

            next_board = gs.to_inputs()

            # board = next_board  # 状態更新
            # 1施行終了時の処理
            if state != Winner.not_ended:
                episode_reward += reward  # 合計報酬を更新
                memory.add((board, action, reward, next_board))  # メモリの更新する
                # Qネットワークの重みを学習・更新する replay
                if len(memory) > qc.batch_size:  # and not islearned:
                    mainQN.replay(memory, qc.batch_size, qc.gamma, targetQN)
                if qc.DQN_MODE:
                    targetQN.model.set_weights(
                        mainQN.model.get_weights())  # 行動決定と価値計算のQネットワークをおなじにする

                total_reward_vec = np.hstack(
                    (total_reward_vec[1:], episode_reward))  # 報酬を記録
                print(
                    '%d/%d: Episode finished after %d time steps / mean %f winner: %s'
                    % (episode + 1, qc.num_episodes, t + 1,
                       total_reward_vec.mean(),
                       'plus' if state == Winner.plus else 'minus'))
                break

            state, _ = gs.random_play()

            if state == Winner.plus:
                reward = qc.reward_lose
            else:
                reward = 0

            episode_reward += reward  # 合計報酬を更新
            memory.add((board, action, reward, next_board))  # メモリの更新する

            # Qネットワークの重みを学習・更新する replay
            if len(memory) > qc.batch_size:  # and not islearned:
                mainQN.replay(memory, qc.batch_size, qc.gamma, targetQN)

            if qc.DQN_MODE:
                targetQN.model.set_weights(
                    mainQN.model.get_weights())  # 行動決定と価値計算のQネットワークをおなじにする

            # 1施行終了時の処理
            if state != Winner.not_ended:
                total_reward_vec = np.hstack(
                    (total_reward_vec[1:], episode_reward))  # 報酬を記録
                print(
                    '%d/%d: Episode finished after %d time steps / mean %f winner: %s'
                    % (episode + 1, qc.num_episodes, t + 1,
                       total_reward_vec.mean(),
                       'plus' if state == Winner.plus else 'minus'))
                break

        # 複数施行の平均報酬で終了を判断
        # if total_reward_vec.mean() >= goal_average_reward:
        #     print('Episode %d train agent successfuly!' % episode)
        # islearned = True
        if episode % qc.save_interval == qc.save_interval - 1:
            d = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
            mainQN.save(f"results/001_QLearning/{d}-mainQN.json",
                        f"results/001_QLearning/{d}-mainQN.h5")
            with open(f"results/001_QLearning/{d}-config.json", 'x') as f:
                json.dump(config._to_dict(), f, indent=4)

    # 最後に保存(直前にしていればしない)
    if episode % qc.save_interval != qc.save_interval - 1:
        d = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
        mainQN.save(f"results/001_QLearning/{d}-mainQN.json",
                    f"results/001_QLearning/{d}-mainQN.h5")
        with open(f"results/001_QLearning/{d}-config.json", 'x') as f:
            json.dump(config._to_dict(), f, indent=4)