def simulate_batch(batch_num): env = CarRacing() obs_data = [] action_data = [] action = env.action_space.sample() for i_episode in range(_BATCH_SIZE): observation = env.reset() # Little hack to make the Car start at random positions in the race-track position = np.random.randint(len(env.track)) env.car = Car(env.world, *env.track[position][1:4]) observation = normalize_observation(observation) obs_sequence = [] for _ in range(_TIME_STEPS): if _RENDER: env.render() action = generate_action(action) observation, reward, done, info = env.step(action) observation = normalize_observation(observation) obs_data.append(observation) print("Saving dataset for batch {}".format(batch_num)) np.save('../data/obs_data_VAE_{}'.format(batch_num), obs_data) env.close()
def VAE_trainset_generator(action_function, dst, name_this='rollout_v0', MAX_GAME_TIME=1000, MAX_RUNS=20, on=0, is_render=False, is_vebo=False): env = CarRacing() states = [] actions = [] for run in range(MAX_RUNS): env.seed(seed=5) state = env.reset() env.render() # must have! for game_time in range(MAX_GAME_TIME): if is_render: env.render() action = action_function(state) state = _process_frame(state) states.append(state) actions.append(action) state, r, done, _ = env.step(action) if is_vebo: print('RUN:{},GT:{},DATA:{}'.format(run, game_time, len(states))) env.close() states = np.array(states, dtype=np.uint8) actions = np.array(actions, dtype=np.float16) save_name = name_this + '_{}.npz'.format(on) print('saved: ' + save_name + ' len:', len(states)) np.savez_compressed(dst + '/' + save_name, action=actions, state=states)
def simulate_batch(batch_num): car_env = CarRacing() obs_data = [] action_data = [] action = car_env.action_space.sample() for item in range(batch_size): en_observ = car_env.reset() # this make car to start in random positions position = np.random.randint(len(car_env.track)) car_env.car = Car(car_env.world, *car_env.track[position][1:4]) en_observ = norm_obse(en_observ) obs_sequence = [] # time steps for i in range(steps): if render: car_env.render() action = create_action(action) en_observ, reward, done, info = car_env.step(action) en_observ = norm_obse(en_observ) obs_data.append(en_observ) print("Saving dataset for batch {}".format(batch_num)) np.save('data/TR_data_{}'.format(batch_num), obs_data) car_env.close()
def multiple_runs(): env = CarRacing() states = [] actions = [] for run in range(MAX_RUNS): state = env.reset() env.render() # must have! # done = False counter = 0 for game_time in range(MAX_GAME_TIME): # env.render() action = generate_action() state = _process_frame(state) # if game_time == 5: # plt.imshow(state) # plt.show() # state = _process_frame(state) # plt.imshow(state) # plt.show() states.append(state) actions.append(action) state, r, done, _ = env.step(action) # print(r) print('RUN:{},GT:{},DATA:{}'.format(run, game_time, len(states))) # if counter == REST_NUM: # # position = np.random.randint(len(env.track)) # env.car = Car(env.world, *env.track[position][1:4]) # counter = 0 # counter += 1 states = np.array(states, dtype=np.uint8) actions = np.array(actions, dtype=np.float16) save_name = name_this + '_{}.npz'.format(run) # np.save(dst + '/' + save_name, frame_and_action) np.savez_compressed(dst + '/' + save_name, action=actions, state=states) states = [] actions = []
def simulate_batch(batch_num, save=True, time_steps=None, reduce_size=True): env = CarRacing() if time_steps is None: time_steps = _TIME_STEPS obs_data = [] action_data = [] action = env.action_space.sample() for i_episode in range(_BATCH_SIZE): observation = env.reset() # Little hack to make the Car start at random positions in the race-track position = np.random.randint(len(env.track)) env.car = Car(env.world, *env.track[position][1:4]) observation = normalize_observation(observation, output_4d=False, reduce_size=reduce_size) obs_data.append(observation) for _ in range(time_steps): if _RENDER: env.render() action = generate_action(action) observation, reward, done, info = env.step(action) observation = normalize_observation(observation, output_4d=False, reduce_size=reduce_size) obs_data.append(observation) if save: print("Saving dataset for batch {:03d}".format(batch_num)) np.save('../data/obs_data_VAE_{:03d}'.format(batch_num), obs_data) env.close() return obs_data
def load_model(experiment=None, folder='experiments', weights=None, env='Base', full_path=None, policy=None, n_steps=None, tensorboard=False, tag=None, no_render=False, n_ep=None): if policy != None: weights_loc = os.path.join("hrl/weights", policy) names = [name for name in os.listdir(weights_loc) if '.pkl' in name] # only pkl versions = [ re.match(r'^(?:v)(\d+\.\d+)(?:_?)', i).group(1) for i in names ] # capture v#.#_ versions = [float(v) for v in versions] # Convert to float max_v = max(versions) w = [n for n in names if re.match(r'^v' + str(max_v), n) != None ][0] # Getting max version name weights_loc = os.path.join(weights_loc, str(w)) elif full_path != None: weights_loc = full_path else: if folder[-1] in '\\/': # remove \ from the end folder = folder[:-1] if weights is None: # Check what is the last weight weights_lst = [ s for s in os.listdir('/'.join([folder, experiment])) if "weights_" in s ] weights_lst = [ s.replace('weights_', '').replace('.pkl', '') for s in weights_lst ] if 'final' in weights_lst: weights = 'weights_final.pkl' else: weights_lst = [int(s) for s in weights_lst] weights = 'weights_' + str(max(weights_lst)) + '.pkl' weights_loc = '/'.join([folder, experiment, weights]) print("**** Using weights", weights_loc) tb_logger = None if tensorboard: args = { 'env': copy(env), 'train_steps': n_steps, 'weights': weights_loc, 'perf': True, 'tag': tag, 'n_ep': n_ep } id,tb_logger,logs_folder,experiment_csv,experiment_folder =\ create_experiment_folder(folder=folder,tag=tag,args=args) print("***** experiment is", experiment_folder) # Get env if env == "CarRacing_v0": from gym.envs.box2d import CarRacing env = CarRacing() else: from hrl.envs import env as environments env = getattr(environments, env)(tensorboard_logger=tb_logger) if env.high_level and not no_render: env.auto_render = True env = DummyVecEnv([lambda: env]) model = PPO2.load(weights_loc) model.set_env(env) #set_trace() if 'interrupting' in str(type(env.envs[0])): # TODO use type of env.envs[0].set_interrupting_params(ppo=model) obs = env.reset() done_count = 0 reward = 0 try: for current_step in itertools.count(): action, _states = model.predict(obs) reward = env.get_attr("reward")[0] full_reward = env.get_attr("full_reward")[0] obs, rewards, dones, info = env.step(action) if not no_render: env.render() if any(dones): if tb_logger is None: print("reward:", reward, "full_reward:", full_reward) if n_ep is not None: done_count += 1 tb_logger.log_value("episode/full_reward", full_reward, current_step) if done_count % 20 == 0: print("episode %i of %i" % (done_count, n_ep)) if done_count >= n_ep: break if n_steps is not None: if current_step % 1000 == 0: print("steps %i of %i" % (current_step, n_step)) if current_step >= n_steps: break except KeyboardInterrupt: if tensorboard and input( "Do you want to DELETE this experiment? (Yes/n) ") == "Yes": remove_experiment(experiment_folder, folder, experiment_csv, id)
def play(params, render=True, verbose=False, save_visualization=False, max_len=999): time_start = datetime.datetime.now() print('Agent train run begun ' + str(time_start)) sess, network = load_vae() env = CarRacing() # _NUM_TRIALS = 16 # <-- Ha and Schmidhuber _NUM_TRIALS = 8 agent_reward = 0 for trial in range(_NUM_TRIALS): observation = env.reset() observation = network.normalize_observation(observation) # Little hack to make the Car start at random positions in the race-track np.random.seed(int(str(time.time() * 1000000)[10:13])) position = np.random.randint(len(env.track)) env.car = Car(env.world, *env.track[position][1:4]) total_reward = 0.0 steps = 0 observations = [observation] while True: if render: env.render() observation = network.normalize_observation(observation) observations.append(observation) embedding = network.get_embedding(sess, observation) action = decide_action(sess, embedding, params) observation, r, done, info = env.step(action) total_reward += r # NB: done is not True after 1000 steps when using the hack above for # random init of position if verbose and (steps % 200 == 0 or steps == 999): print("\naction " + str(["{:+0.2f}".format(x) for x in action])) print("step {} total_reward {:+0.2f}".format( steps, total_reward)) steps += 1 if steps == max_len: break # if total_reward < -50: # break if _IS_TEST and steps > 10: break total_reward = np.maximum(-100, total_reward) agent_reward += total_reward if save_visualization: title = 'train_agent_r{:.2f}'.format(agent_reward) print('Saving trajectory:', title) network.show_pred(title, np.concatenate(observations, 0)) break print('.', end='') sess.close() env.close() print('Agent done - ' + str(time_start)) return -(agent_reward / _NUM_TRIALS)