bar = np.ones( (5, 3), dtype=np.uint8) * np.array([255, 255, 0], dtype=np.uint8) # For benchmark st = time.time() env = gym.make('mouseCl-v0') o = env.reset() if args.load: player = Player(env.observation_space, env.action_space, args.load, args.log_name, cur_loop * total_steps, cur_r, buf_full, load_buffer, buf_count) else: player = Player(env.observation_space, env.action_space) if not args.vm: env.render() for step in trange(total_steps, ncols=50): action = player.act(o) o, r, d, i = env.step(action) player.step(o, r, d, i) if d: o = env.reset() if not args.vm: env.render() next_save = player.save_model() if not args.load: save_dir = player.save_dir else: save_dir, _ = os.path.split(args.load) next_dir = os.path.join(save_dir, str(next_save)) score = player.evaluate(gym.make('mouseCl-v0'), vid_type) print('eval_score:{0}'.format(score))
log_name= args.log_name ) else : player = Player( observation_space= env.observation_space, action_space= env.action_space, model_f= model_f, tqdm= my_tqdm, ) if args.render : env.render() if args.profile: # Warm up for step in range(hp.Learn_start+20): action = player.act(bef_o) aft_o,r,d,i = env.step(action) player.step(bef_o,action,r,d,i) if d : bef_o = env.reset() else: bef_o = aft_o if args.render : env.render() with Profile(f'log/{args.log_name}'): for step in range(5): action = player.act(bef_o) aft_o,r,d,i = env.step(action) player.step(bef_o,action,r,d,i) if d :
# action = 2 # elif step%5 == 1 : # action = 1 # elif step%5 == 2 : # action = 2 # elif step%5 == 3 : # action = 1 # elif step%5 == 4 : # action = 0 # o, r, d, i = test_env.step(action) # player.step(action, r,d,i) # if d : # o = test_env.reset() if args.profile: for step in trange(hp.Learn_start + 50, ncols=100): action = player.act(o, training=True) o, r, d, i = test_env.step(action) player.step(action, r, d, i) if d: o = test_env.reset() with tf.profiler.experimental.Profile('log/profile'): for step in trange(5, ncols=100): action = player.act(o, training=True) o, r, d, i = test_env.step(action) player.step(action, r, d, i) if d: o = test_env.reset() else: for step in trange(10000, ncols=100): action = player.act(o, training=True)