cur_r = int(args.cur_r) load_buffer = args.load_buffer my_tqdm = tqdm(total=total_steps, dynamic_ncols=True) print('starting loop, {} loops left'.format(total_loop)) if not args.vm : from gym.envs.classic_control.rendering import SimpleImageViewer eye_viewer = SimpleImageViewer(maxwidth=1500) bar = np.ones((5,3),dtype=np.uint8)*np.array([255,255,0],dtype=np.uint8) # For benchmark st = time.time() env = gym.make(ENVIRONMENT) bef_o = env.reset() if args.load : player = Player(env.observation_space, env.action_space, my_tqdm, args.load, args.log_name, cur_loop*total_steps, cur_r, load_buffer) elif args.log_name: # If log directory is explicitely selected player = Player(env.observation_space, env.action_space, my_tqdm, log_name=args.log_name) else : player = Player(env.observation_space, env.action_space, my_tqdm) if not args.vm : env.render() for step in range(total_steps): action = player.act(bef_o) aft_o,r,d,i = env.step(action) player.step(bef_o,action,r,d,i) if d : bef_o = env.reset() else:
from gym.envs.classic_control.rendering import SimpleImageViewer eye_viewer = SimpleImageViewer(maxwidth=1500) bar = np.ones((5,3),dtype=np.uint8)*np.array([255,255,0],dtype=np.uint8) # For benchmark st = time.time() env = tools.EnvWrapper(gym.make(ENVIRONMENT, **env_kwargs)) eval_env = tools.EnvWrapper(gym.make(ENVIRONMENT, **env_kwargs)) bef_o = env.reset() if args.log_name: # If log directory is explicitely selected player = Player( observation_space= env.observation_space, action_space= env.action_space, model_f= model_f, tqdm= my_tqdm, log_name= args.log_name ) else : player = Player( observation_space= env.observation_space, action_space= env.action_space, model_f= model_f, tqdm= my_tqdm, ) if args.render : env.render() if args.profile: # Warm up
load_buffer = args.load_buffer buf_count = int(args.buf_count) print('starting loop, {} loops left'.format(total_loop)) if not args.vm: from gym.envs.classic_control.rendering import SimpleImageViewer eye_viewer = SimpleImageViewer(maxwidth=1500) bar = np.ones( (5, 3), dtype=np.uint8) * np.array([255, 255, 0], dtype=np.uint8) # For benchmark st = time.time() env = gym.make('mouseCl-v0') o = env.reset() if args.load: player = Player(env.observation_space, env.action_space, args.load, args.log_name, cur_loop * total_steps, cur_r, buf_full, load_buffer, buf_count) else: player = Player(env.observation_space, env.action_space) if not args.vm: env.render() for step in trange(total_steps, ncols=50): action = player.act(o) o, r, d, i = env.step(action) player.step(o, r, d, i) if d: o = env.reset() if not args.vm: env.render() next_save = player.save_model()
# For benchmark st = time.time() env = gym.make(ENVIRONMENT, **env_kwargs) if hp.CLASSIC: env = tools.EnvWrapper(env) last_obs = env.reset() render = args.render if render : env.render() player = Player( observation_space= env.observation_space, action_space= env.action_space, model_f= model_f, m_dir=args.load, log_name= args.log_name, mixed_float=args.mixed_float, ) need_to_eval = False buf = ReplayBuffer(env.observation_space, env.action_space) reset_buffer = True cum_reward = 0.0 rounds = 0 per_round_steps = 0 act_steps = 0 if args.profile: for step in range(20): last_obs, cum_reward, rounds, act_steps,\
eye_viewer = SimpleImageViewer(maxwidth=1500) bar = np.ones( (5, 3), dtype=np.uint8) * np.array([255, 255, 0], dtype=np.uint8) # For benchmark st = time.time() env = gym.make(ENVIRONMENT, **env_kwargs) bef_o = env.reset() if args.load: player = Player( observation_space=env.observation_space, action_space=env.action_space, model_f=model_f, tqdm=my_tqdm, m_dir=args.load, log_name=args.log_name, start_step=cur_loop * loop_steps, start_round=cur_r, load_buffer=load_buffer, ) elif args.log_name: # If log directory is explicitely selected player = Player(observation_space=env.observation_space, action_space=env.action_space, model_f=model_f, tqdm=my_tqdm, log_name=args.log_name) else: player = Player( observation_space=env.observation_space,
parser = argparse.ArgumentParser() parser.add_argument('-pf', dest='profile', action='store_true', default=False) args = parser.parse_args() hp.Buffer_size = 500 hp.Learn_start = 200 hp.Batch_size = 32 hp.Target_update = 500 hp.epsilon = 1 hp.epsilon_min = 0.01 hp.epsilon_nstep = 500 original_env = gym.make('mouseCl-v0') test_env = EnvTest(original_env.observation_space) player = Player(original_env.observation_space, test_env.action_space) o = test_env.reset() # for step in trange(1000) : # player.act(o,training=True) # if step%5 == 0 : # action = 2 # elif step%5 == 1 : # action = 1 # elif step%5 == 2 : # action = 2 # elif step%5 == 3 : # action = 1 # elif step%5 == 4 : # action = 0 # o, r, d, i = test_env.step(action) # player.step(action, r,d,i)
cur_loop = int(args.cur_loop) cur_r = int(args.cur_r) load_buffer = args.load_buffer print('starting loop, {} loops left'.format(total_loop)) if not args.vm: from gym.envs.classic_control.rendering import SimpleImageViewer eye_viewer = SimpleImageViewer(maxwidth=1500) bar = np.ones( (5, 3), dtype=np.uint8) * np.array([255, 255, 0], dtype=np.uint8) # For benchmark st = time.time() env = gym.make(ENVIRONMENT) o = env.reset() if args.load: player = Player(env.observation_space, env.action_space, args.load, args.log_name, cur_loop * total_steps, cur_r, load_buffer) elif args.log_name: # If log directory is explicitely selected player = Player(env.observation_space, env.action_space, log_name=args.log_name) else: player = Player(env.observation_space, env.action_space) if not args.vm: env.render() for step in trange(total_steps, ncols=80): action = player.act(o, training=True) o, r, d, i = env.step(action) player.step(action, r, d, i) if d: o = env.reset()