Beispiel #1
0
cur_r = int(args.cur_r)
load_buffer = args.load_buffer

my_tqdm = tqdm(total=total_steps, dynamic_ncols=True)

print('starting loop, {} loops left'.format(total_loop))
if not args.vm :
    from gym.envs.classic_control.rendering import SimpleImageViewer
    eye_viewer = SimpleImageViewer(maxwidth=1500)
    bar = np.ones((5,3),dtype=np.uint8)*np.array([255,255,0],dtype=np.uint8)
# For benchmark
st = time.time()
env = gym.make(ENVIRONMENT)
bef_o = env.reset()
if args.load :
    player = Player(env.observation_space, env.action_space, my_tqdm,
                args.load, args.log_name, cur_loop*total_steps, cur_r, load_buffer)
elif args.log_name:
    # If log directory is explicitely selected
    player = Player(env.observation_space, env.action_space, my_tqdm, 
                log_name=args.log_name)
else :
    player = Player(env.observation_space, env.action_space, my_tqdm)
if not args.vm :
    env.render()
for step in range(total_steps):
    action = player.act(bef_o)
    aft_o,r,d,i = env.step(action)
    player.step(bef_o,action,r,d,i)
    if d :
        bef_o = env.reset()
    else:
Beispiel #2
0
    from gym.envs.classic_control.rendering import SimpleImageViewer
    eye_viewer = SimpleImageViewer(maxwidth=1500)
    bar = np.ones((5,3),dtype=np.uint8)*np.array([255,255,0],dtype=np.uint8)
# For benchmark
st = time.time()

env = tools.EnvWrapper(gym.make(ENVIRONMENT, **env_kwargs))
eval_env = tools.EnvWrapper(gym.make(ENVIRONMENT, **env_kwargs))
bef_o = env.reset()

if args.log_name:
    # If log directory is explicitely selected
    player = Player(
        observation_space= env.observation_space, 
        action_space= env.action_space, 
        model_f= model_f,
        tqdm= my_tqdm,
        log_name= args.log_name
    )
else :
    player = Player(
        observation_space= env.observation_space,
        action_space= env.action_space, 
        model_f= model_f,
        tqdm= my_tqdm,
    )
if args.render :
    env.render()

if args.profile:
    # Warm up
Beispiel #3
0
load_buffer = args.load_buffer
buf_count = int(args.buf_count)

print('starting loop, {} loops left'.format(total_loop))
if not args.vm:
    from gym.envs.classic_control.rendering import SimpleImageViewer
    eye_viewer = SimpleImageViewer(maxwidth=1500)
    bar = np.ones(
        (5, 3), dtype=np.uint8) * np.array([255, 255, 0], dtype=np.uint8)
# For benchmark
st = time.time()
env = gym.make('mouseCl-v0')
o = env.reset()
if args.load:
    player = Player(env.observation_space, env.action_space, args.load,
                    args.log_name, cur_loop * total_steps, cur_r, buf_full,
                    load_buffer, buf_count)
else:
    player = Player(env.observation_space, env.action_space)
if not args.vm:
    env.render()
for step in trange(total_steps, ncols=50):
    action = player.act(o)
    o, r, d, i = env.step(action)
    player.step(o, r, d, i)
    if d:
        o = env.reset()
    if not args.vm:
        env.render()

next_save = player.save_model()
Beispiel #4
0
# For benchmark
st = time.time()


env = gym.make(ENVIRONMENT, **env_kwargs)
if hp.CLASSIC:
    env = tools.EnvWrapper(env)
last_obs = env.reset()
render = args.render
if render :
    env.render()

player = Player(
    observation_space= env.observation_space, 
    action_space= env.action_space, 
    model_f= model_f,
    m_dir=args.load,
    log_name= args.log_name,
    mixed_float=args.mixed_float,
)

need_to_eval = False
buf = ReplayBuffer(env.observation_space, env.action_space)
reset_buffer = True
cum_reward = 0.0
rounds = 0
per_round_steps = 0
act_steps = 0

if args.profile:
    for step in range(20):
        last_obs, cum_reward, rounds, act_steps,\
Beispiel #5
0
    eye_viewer = SimpleImageViewer(maxwidth=1500)
    bar = np.ones(
        (5, 3), dtype=np.uint8) * np.array([255, 255, 0], dtype=np.uint8)
# For benchmark
st = time.time()

env = gym.make(ENVIRONMENT, **env_kwargs)
bef_o = env.reset()

if args.load:
    player = Player(
        observation_space=env.observation_space,
        action_space=env.action_space,
        model_f=model_f,
        tqdm=my_tqdm,
        m_dir=args.load,
        log_name=args.log_name,
        start_step=cur_loop * loop_steps,
        start_round=cur_r,
        load_buffer=load_buffer,
    )
elif args.log_name:
    # If log directory is explicitely selected
    player = Player(observation_space=env.observation_space,
                    action_space=env.action_space,
                    model_f=model_f,
                    tqdm=my_tqdm,
                    log_name=args.log_name)
else:
    player = Player(
        observation_space=env.observation_space,
parser = argparse.ArgumentParser()
parser.add_argument('-pf', dest='profile', action='store_true', default=False)
args = parser.parse_args()

hp.Buffer_size = 500
hp.Learn_start = 200
hp.Batch_size = 32
hp.Target_update = 500
hp.epsilon = 1
hp.epsilon_min = 0.01
hp.epsilon_nstep = 500

original_env = gym.make('mouseCl-v0')
test_env = EnvTest(original_env.observation_space)
player = Player(original_env.observation_space, test_env.action_space)
o = test_env.reset()
# for step in trange(1000) :
#     player.act(o,training=True)
#     if step%5 == 0 :
#         action = 2
#     elif step%5 == 1 :
#         action = 1
#     elif step%5 == 2 :
#         action = 2
#     elif step%5 == 3 :
#         action = 1
#     elif step%5 == 4 :
#         action = 0
#     o, r, d, i = test_env.step(action)
#     player.step(action, r,d,i)
Beispiel #7
0
cur_loop = int(args.cur_loop)
cur_r = int(args.cur_r)
load_buffer = args.load_buffer

print('starting loop, {} loops left'.format(total_loop))
if not args.vm:
    from gym.envs.classic_control.rendering import SimpleImageViewer
    eye_viewer = SimpleImageViewer(maxwidth=1500)
    bar = np.ones(
        (5, 3), dtype=np.uint8) * np.array([255, 255, 0], dtype=np.uint8)
# For benchmark
st = time.time()
env = gym.make(ENVIRONMENT)
o = env.reset()
if args.load:
    player = Player(env.observation_space, env.action_space, args.load,
                    args.log_name, cur_loop * total_steps, cur_r, load_buffer)
elif args.log_name:
    # If log directory is explicitely selected
    player = Player(env.observation_space,
                    env.action_space,
                    log_name=args.log_name)
else:
    player = Player(env.observation_space, env.action_space)
if not args.vm:
    env.render()
for step in trange(total_steps, ncols=80):
    action = player.act(o, training=True)
    o, r, d, i = env.step(action)
    player.step(action, r, d, i)
    if d:
        o = env.reset()