Python Policy.choose_actionの例

プログラミング言語: Python

名前空間/パッケージ名: policy

クラス/型: Policy

メソッド/関数: choose_action

hotexamples.comのコード掲載数: 2

Python Policy.choose_action - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのpolicy.Policy.choose_actionの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Policy(30)

action_prob(20)

__init__(13)

act(12)

checkWin(6)

build_deterministic(5)

action(4)

MakeMove(3)

build(3)

CheckLegal(3)

CRAWLER_NUMBER(2)

query(2)

qFunc(2)

choose_action(2)

fromString(2)

INVALID(2)

epsilonGreedy(2)

check_policy(1)

user(1)

classifier(1)

group(1)

script(1)

set_probability(1)

APPLY_TIME_INTERVAL(1)

actions_probas_from(1)

check(1)

calculate_probs(1)

apply_accumulated_gradients(1)

add_models(1)

B(1)

action_masks(1)

_placeholders(1)

_func(1)

__getitem__(1)

W(1)

TIME_INTERVAL_ST(1)

TIME_INTERVAL_ED(1)

CRAWLER_TYPE(1)

weights(1)

コード例 #1

ファイルを表示

class Agent:
    def __init__(self, state_size, action_size, sample_num):
        sess = tf.Session()
        self.policy = Policy(sess, state_size, action_size, sample_num)
        self.state_batch = []
        self.action_batch = []
        self.reward_list = []
        self.step_list = []
        self.weight_bach = []
        self.sample_num = sample_num
        sess.run(tf.global_variables_initializer())

    def choose_action(self, state):
        return self.policy.choose_action(state)

    def store(self, state, action, reward):
        self.state_batch.append(state)
        self.action_batch.append(action)
        self.reward_list.append(reward)

    def train(self):
        state_batch = np.vstack(self.state_batch)
        action_batch = np.vstack(self.action_batch)
        t = 0
        for i in range(self.sample_num):
            tlast = t + self.step_list[i]
            for _ in range(self.step_list[i]):
                weight = 0.0
                for n in range(t, tlast):
                    weight += self.reward_list[n] * np.power(gamma, (n - t))
                self.weight_bach.append(weight)
                t += 1
        weight_bach = np.vstack(self.weight_bach)
        self.policy.train(state_batch, action_batch, weight_bach)
        self.state_batch = []
        self.action_batch = []
        self.reward_list = []
        self.step_list = []
        self.weight_bach = []

コード例 #2

ファイルを表示

    else:
        render = False

    if episode % SHOW_INFOS == 1:
        show_infos(episode)

    init_game()

    over = False

    if render:
        show_render()

    while not over:

        action = policy.choose_action()
        env.apply_action(action)
        over = is_over()

        policy.update_replay_memory(over)
        policy.train(over)

        steps_remaining -= 1

        if render:
            show_render()

    if policy.END_EPSILON_DECAYING >= episode >= policy.START_EPSILON_DECAYING:
        policy.epsilon -= policy.epsilon_decay_value

policy.test_model()