Esempi in Python per Policy.calculate_probs

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: policy

Classe/tipologia: Policy

Metodo/funzione: calculate_probs

Esempi su hotexamples.com: 1

Policy.calculate_probs in Python: 1 esempio trovato. Questo è il miglior esempio reale in Python per policy.Policy.calculate_probs, estratto da progetti open source. Lo puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Policy(30)

action_prob(20)

__init__(13)

act(12)

checkWin(6)

build_deterministic(5)

action(4)

MakeMove(3)

build(3)

CheckLegal(3)

CRAWLER_NUMBER(2)

query(2)

qFunc(2)

choose_action(2)

fromString(2)

INVALID(2)

epsilonGreedy(2)

check_policy(1)

user(1)

classifier(1)

group(1)

script(1)

set_probability(1)

APPLY_TIME_INTERVAL(1)

actions_probas_from(1)

check(1)

calculate_probs(1)

apply_accumulated_gradients(1)

add_models(1)

B(1)

action_masks(1)

_placeholders(1)

_func(1)

__getitem__(1)

W(1)

TIME_INTERVAL_ST(1)

TIME_INTERVAL_ED(1)

CRAWLER_TYPE(1)

weights(1)

Esempio n. 1

Mostra file

def main():
    args = parse_args()
    print(args)

    maxlen_obs = 150
    maxlen_look = 150
    maxlen_inv = 50
    max_len_action = 12

    sp = spm.SentencePieceProcessor()
    sp.Load('../spm_models/unigram_8k.model')

    rom_path = args.rom_path + utils.game_file(args.game_name)

    policy = Policy(args)
    # policy.model.load_weights('weights/%s_%s_round%s.5000.h5' % (args.game_name, args.uct_type, args.round))
    policy.load_weights(
        'gcp/weights/%s/round_%d/%s_weight_policy_best_seed%d.pickle' %
        (args.game_name, args.round, args.uct_type, args.seed))
    # 63 / 100
    env = JerichoEnv(rom_path, 1, args.env_step_limit)
    env.create()

    scores = []

    for seed in range(5):
        env = JerichoEnv(rom_path, seed, args.env_step_limit)
        env.create()

        obs, info = env.reset()
        cum_reward = 0
        step = 0
        prev_action = '<s>'

        # livingroom_steps = ["S", "E"]
        #
        # for action in livingroom_steps:
        #     obs, reward, done, info = env.step(action)
        # prev_action = action

        for _ in range(args.max_episode_len):
            print('#################################################')
            print('STEP: %s' % step)
            print()
            print(info['look'])
            print()
            print(info['inv'])
            print()

            obs, look, inv, prev_action, score = utils.state_representation(
                obs, info['look'], info['inv'], prev_action, info['score'],
                maxlen_obs, maxlen_look, maxlen_inv, max_len_action)
            probs = policy.calculate_probs(obs, look, inv, prev_action, score,
                                           info['valid'])
            print(info['valid'])
            print(probs)
            idx = np.argmax(probs)
            # idx = int(np.random.choice([i for i in range(probs.shape[0])], 1, p=probs[:,0]))
            action = info['valid'][idx]

            obs, reward, done, info = env.step(action)
            cum_reward += reward
            step += 1

            print('ACTION: %s' % action)
            print()
            print('Reward: %s, Score: %s' % (reward, info['score']))
            print()
            print(obs + info['look'] + info['inv'])
            print()

            prev_action = action

        scores.append(info['score'])

    print(scores)
    print('AVERAGE SCORE: %s' % np.mean(scores))

    f = open('outputs/eval_result_%s_%s.txt' % (args.game_name, args.uct_type),
             'a')
    f.write("- Round %d (learning) : num_eval=%d, mean_ep_return=%.3f, std_ep_return=%.3f\n" % \
            (args.round, len(scores), np.mean(scores), np.std(scores)))
    f.close()