Esempi in Python per Policy.choose_action

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: policy

Classe/tipologia: Policy

Metodo/funzione: choose_action

Esempi su hotexamples.com: 2

Policy.choose_action in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per policy.Policy.choose_action, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Policy(30)

action_prob(20)

__init__(13)

act(12)

checkWin(6)

build_deterministic(5)

action(4)

MakeMove(3)

build(3)

CheckLegal(3)

CRAWLER_NUMBER(2)

query(2)

qFunc(2)

choose_action(2)

fromString(2)

INVALID(2)

epsilonGreedy(2)

check_policy(1)

user(1)

classifier(1)

group(1)

script(1)

set_probability(1)

APPLY_TIME_INTERVAL(1)

actions_probas_from(1)

check(1)

calculate_probs(1)

apply_accumulated_gradients(1)

add_models(1)

B(1)

action_masks(1)

_placeholders(1)

_func(1)

__getitem__(1)

W(1)

TIME_INTERVAL_ST(1)

TIME_INTERVAL_ED(1)

CRAWLER_TYPE(1)

weights(1)

Esempio n. 1

Mostra file

class Agent:
    def __init__(self, state_size, action_size, sample_num):
        sess = tf.Session()
        self.policy = Policy(sess, state_size, action_size, sample_num)
        self.state_batch = []
        self.action_batch = []
        self.reward_list = []
        self.step_list = []
        self.weight_bach = []
        self.sample_num = sample_num
        sess.run(tf.global_variables_initializer())

    def choose_action(self, state):
        return self.policy.choose_action(state)

    def store(self, state, action, reward):
        self.state_batch.append(state)
        self.action_batch.append(action)
        self.reward_list.append(reward)

    def train(self):
        state_batch = np.vstack(self.state_batch)
        action_batch = np.vstack(self.action_batch)
        t = 0
        for i in range(self.sample_num):
            tlast = t + self.step_list[i]
            for _ in range(self.step_list[i]):
                weight = 0.0
                for n in range(t, tlast):
                    weight += self.reward_list[n] * np.power(gamma, (n - t))
                self.weight_bach.append(weight)
                t += 1
        weight_bach = np.vstack(self.weight_bach)
        self.policy.train(state_batch, action_batch, weight_bach)
        self.state_batch = []
        self.action_batch = []
        self.reward_list = []
        self.step_list = []
        self.weight_bach = []

Esempio n. 2

Mostra file

    else:
        render = False

    if episode % SHOW_INFOS == 1:
        show_infos(episode)

    init_game()

    over = False

    if render:
        show_render()

    while not over:

        action = policy.choose_action()
        env.apply_action(action)
        over = is_over()

        policy.update_replay_memory(over)
        policy.train(over)

        steps_remaining -= 1

        if render:
            show_render()

    if policy.END_EPSILON_DECAYING >= episode >= policy.START_EPSILON_DECAYING:
        policy.epsilon -= policy.epsilon_decay_value

policy.test_model()