Beispiel #1
0
    bar = np.ones(
        (5, 3), dtype=np.uint8) * np.array([255, 255, 0], dtype=np.uint8)
# For benchmark
st = time.time()
env = gym.make('mouseCl-v0')
o = env.reset()
if args.load:
    player = Player(env.observation_space, env.action_space, args.load,
                    args.log_name, cur_loop * total_steps, cur_r, buf_full,
                    load_buffer, buf_count)
else:
    player = Player(env.observation_space, env.action_space)
if not args.vm:
    env.render()
for step in trange(total_steps, ncols=50):
    action = player.act(o)
    o, r, d, i = env.step(action)
    player.step(o, r, d, i)
    if d:
        o = env.reset()
    if not args.vm:
        env.render()

next_save = player.save_model()
if not args.load:
    save_dir = player.save_dir
else:
    save_dir, _ = os.path.split(args.load)
next_dir = os.path.join(save_dir, str(next_save))
score = player.evaluate(gym.make('mouseCl-v0'), vid_type)
print('eval_score:{0}'.format(score))
Beispiel #2
0
        log_name= args.log_name
    )
else :
    player = Player(
        observation_space= env.observation_space,
        action_space= env.action_space, 
        model_f= model_f,
        tqdm= my_tqdm,
    )
if args.render :
    env.render()

if args.profile:
    # Warm up
    for step in range(hp.Learn_start+20):
        action = player.act(bef_o)
        aft_o,r,d,i = env.step(action)
        player.step(bef_o,action,r,d,i)
        if d :
            bef_o = env.reset()
        else:
            bef_o = aft_o
        if args.render :
            env.render()

    with Profile(f'log/{args.log_name}'):
        for step in range(5):
            action = player.act(bef_o)
            aft_o,r,d,i = env.step(action)
            player.step(bef_o,action,r,d,i)
            if d :
#         action = 2
#     elif step%5 == 1 :
#         action = 1
#     elif step%5 == 2 :
#         action = 2
#     elif step%5 == 3 :
#         action = 1
#     elif step%5 == 4 :
#         action = 0
#     o, r, d, i = test_env.step(action)
#     player.step(action, r,d,i)
#     if d :
#         o = test_env.reset()
if args.profile:
    for step in trange(hp.Learn_start + 50, ncols=100):
        action = player.act(o, training=True)
        o, r, d, i = test_env.step(action)
        player.step(action, r, d, i)
        if d:
            o = test_env.reset()
    with tf.profiler.experimental.Profile('log/profile'):
        for step in trange(5, ncols=100):
            action = player.act(o, training=True)
            o, r, d, i = test_env.step(action)
            player.step(action, r, d, i)
            if d:
                o = test_env.reset()

else:
    for step in trange(10000, ncols=100):
        action = player.act(o, training=True)