Esempio n. 1
0
def simulate_batch(batch_num):
    env = CarRacing()

    obs_data = []
    action_data = []
    action = env.action_space.sample()
    for i_episode in range(_BATCH_SIZE):
        observation = env.reset()
        # Little hack to make the Car start at random positions in the race-track
        position = np.random.randint(len(env.track))
        env.car = Car(env.world, *env.track[position][1:4])
        observation = normalize_observation(observation)

        obs_sequence = []

        for _ in range(_TIME_STEPS):
            if _RENDER:
                env.render()

            action = generate_action(action)

            observation, reward, done, info = env.step(action)
            observation = normalize_observation(observation)

            obs_data.append(observation)

    print("Saving dataset for batch {}".format(batch_num))
    np.save('../data/obs_data_VAE_{}'.format(batch_num), obs_data)

    env.close()
def VAE_trainset_generator(action_function,
                           dst,
                           name_this='rollout_v0',
                           MAX_GAME_TIME=1000,
                           MAX_RUNS=20,
                           on=0,
                           is_render=False,
                           is_vebo=False):
    env = CarRacing()
    states = []
    actions = []
    for run in range(MAX_RUNS):
        env.seed(seed=5)
        state = env.reset()
        env.render()  # must have!
        for game_time in range(MAX_GAME_TIME):
            if is_render:
                env.render()
            action = action_function(state)
            state = _process_frame(state)
            states.append(state)
            actions.append(action)
            state, r, done, _ = env.step(action)
            if is_vebo:
                print('RUN:{},GT:{},DATA:{}'.format(run, game_time,
                                                    len(states)))
        env.close()
    states = np.array(states, dtype=np.uint8)
    actions = np.array(actions, dtype=np.float16)
    save_name = name_this + '_{}.npz'.format(on)
    print('saved: ' + save_name + ' len:', len(states))
    np.savez_compressed(dst + '/' + save_name, action=actions, state=states)
Esempio n. 3
0
def simulate_batch(batch_num):
    car_env = CarRacing()

    obs_data = []
    action_data = []
    action = car_env.action_space.sample()
    for item in range(batch_size):
        en_observ = car_env.reset()
        # this make car to start in random positions 
        position = np.random.randint(len(car_env.track))
        car_env.car = Car(car_env.world, *car_env.track[position][1:4])
        en_observ = norm_obse(en_observ)

        obs_sequence = []

        # time steps
        for i in range(steps):
            if render:
                car_env.render()

            action = create_action(action)

            en_observ, reward, done, info = car_env.step(action)
            en_observ = norm_obse(en_observ)

            obs_data.append(en_observ)

    print("Saving dataset for batch {}".format(batch_num))
    np.save('data/TR_data_{}'.format(batch_num), obs_data)
    
    car_env.close()
def multiple_runs():
    env = CarRacing()

    states = []
    actions = []
    for run in range(MAX_RUNS):
        state = env.reset()
        env.render()  # must have!
        # done = False
        counter = 0
        for game_time in range(MAX_GAME_TIME):
            # env.render()
            action = generate_action()
            state = _process_frame(state)
            # if game_time == 5:
            #     plt.imshow(state)
            #     plt.show()
            #     state = _process_frame(state)
            #     plt.imshow(state)
            #     plt.show()
            states.append(state)
            actions.append(action)
            state, r, done, _ = env.step(action)

            # print(r)
            print('RUN:{},GT:{},DATA:{}'.format(run, game_time, len(states)))
            # if counter == REST_NUM:
            #
            #     position = np.random.randint(len(env.track))
            #     env.car = Car(env.world, *env.track[position][1:4])
            #     counter = 0
            # counter += 1
        states = np.array(states, dtype=np.uint8)
        actions = np.array(actions, dtype=np.float16)
        save_name = name_this + '_{}.npz'.format(run)
        # np.save(dst + '/' + save_name, frame_and_action)

        np.savez_compressed(dst + '/' + save_name,
                            action=actions,
                            state=states)
        states = []
        actions = []
def simulate_batch(batch_num, save=True, time_steps=None, reduce_size=True):
    env = CarRacing()

    if time_steps is None:
        time_steps = _TIME_STEPS

    obs_data = []
    action_data = []
    action = env.action_space.sample()
    for i_episode in range(_BATCH_SIZE):
        observation = env.reset()
        # Little hack to make the Car start at random positions in the race-track
        position = np.random.randint(len(env.track))
        env.car = Car(env.world, *env.track[position][1:4])
        observation = normalize_observation(observation,
                                            output_4d=False,
                                            reduce_size=reduce_size)
        obs_data.append(observation)

        for _ in range(time_steps):
            if _RENDER:
                env.render()

            action = generate_action(action)

            observation, reward, done, info = env.step(action)
            observation = normalize_observation(observation,
                                                output_4d=False,
                                                reduce_size=reduce_size)

            obs_data.append(observation)

    if save:
        print("Saving dataset for batch {:03d}".format(batch_num))
        np.save('../data/obs_data_VAE_{:03d}'.format(batch_num), obs_data)

    env.close()
    return obs_data
Esempio n. 6
0
def load_model(experiment=None,
               folder='experiments',
               weights=None,
               env='Base',
               full_path=None,
               policy=None,
               n_steps=None,
               tensorboard=False,
               tag=None,
               no_render=False,
               n_ep=None):

    if policy != None:
        weights_loc = os.path.join("hrl/weights", policy)
        names = [name for name in os.listdir(weights_loc)
                 if '.pkl' in name]  # only pkl
        versions = [
            re.match(r'^(?:v)(\d+\.\d+)(?:_?)', i).group(1) for i in names
        ]  # capture v#.#_
        versions = [float(v) for v in versions]  # Convert to float
        max_v = max(versions)
        w = [n for n in names if re.match(r'^v' + str(max_v), n) != None
             ][0]  # Getting max version name
        weights_loc = os.path.join(weights_loc, str(w))
    elif full_path != None:
        weights_loc = full_path
    else:
        if folder[-1] in '\\/':
            # remove \ from the end
            folder = folder[:-1]

        if weights is None:
            # Check what is the last weight
            weights_lst = [
                s for s in os.listdir('/'.join([folder, experiment]))
                if "weights_" in s
            ]
            weights_lst = [
                s.replace('weights_', '').replace('.pkl', '')
                for s in weights_lst
            ]
            if 'final' in weights_lst:
                weights = 'weights_final.pkl'
            else:
                weights_lst = [int(s) for s in weights_lst]
                weights = 'weights_' + str(max(weights_lst)) + '.pkl'

        weights_loc = '/'.join([folder, experiment, weights])
    print("**** Using weights", weights_loc)

    tb_logger = None
    if tensorboard:
        args = {
            'env': copy(env),
            'train_steps': n_steps,
            'weights': weights_loc,
            'perf': True,
            'tag': tag,
            'n_ep': n_ep
        }
        id,tb_logger,logs_folder,experiment_csv,experiment_folder =\
                create_experiment_folder(folder=folder,tag=tag,args=args)
        print("***** experiment is", experiment_folder)

    # Get env
    if env == "CarRacing_v0":
        from gym.envs.box2d import CarRacing
        env = CarRacing()
    else:
        from hrl.envs import env as environments
        env = getattr(environments, env)(tensorboard_logger=tb_logger)
        if env.high_level and not no_render: env.auto_render = True
    env = DummyVecEnv([lambda: env])

    model = PPO2.load(weights_loc)
    model.set_env(env)

    #set_trace()
    if 'interrupting' in str(type(env.envs[0])):  # TODO use type of
        env.envs[0].set_interrupting_params(ppo=model)

    obs = env.reset()
    done_count = 0
    reward = 0
    try:
        for current_step in itertools.count():
            action, _states = model.predict(obs)

            reward = env.get_attr("reward")[0]
            full_reward = env.get_attr("full_reward")[0]

            obs, rewards, dones, info = env.step(action)
            if not no_render:
                env.render()

            if any(dones):

                if tb_logger is None:
                    print("reward:", reward, "full_reward:", full_reward)

                if n_ep is not None:
                    done_count += 1
                    tb_logger.log_value("episode/full_reward", full_reward,
                                        current_step)
                    if done_count % 20 == 0:
                        print("episode %i of %i" % (done_count, n_ep))
                    if done_count >= n_ep:
                        break

            if n_steps is not None:
                if current_step % 1000 == 0:
                    print("steps %i of %i" % (current_step, n_step))
                if current_step >= n_steps:
                    break
    except KeyboardInterrupt:
        if tensorboard and input(
                "Do you want to DELETE this experiment? (Yes/n) ") == "Yes":
            remove_experiment(experiment_folder, folder, experiment_csv, id)
def play(params,
         render=True,
         verbose=False,
         save_visualization=False,
         max_len=999):
    time_start = datetime.datetime.now()
    print('Agent train run begun ' + str(time_start))

    sess, network = load_vae()
    env = CarRacing()

    # _NUM_TRIALS = 16  # <-- Ha and Schmidhuber
    _NUM_TRIALS = 8

    agent_reward = 0
    for trial in range(_NUM_TRIALS):
        observation = env.reset()
        observation = network.normalize_observation(observation)
        # Little hack to make the Car start at random positions in the race-track
        np.random.seed(int(str(time.time() * 1000000)[10:13]))
        position = np.random.randint(len(env.track))
        env.car = Car(env.world, *env.track[position][1:4])

        total_reward = 0.0
        steps = 0
        observations = [observation]
        while True:
            if render:
                env.render()
            observation = network.normalize_observation(observation)
            observations.append(observation)

            embedding = network.get_embedding(sess, observation)
            action = decide_action(sess, embedding, params)
            observation, r, done, info = env.step(action)
            total_reward += r
            # NB: done is not True after 1000 steps when using the hack above for
            #       random init of position
            if verbose and (steps % 200 == 0 or steps == 999):
                print("\naction " + str(["{:+0.2f}".format(x)
                                         for x in action]))
                print("step {} total_reward {:+0.2f}".format(
                    steps, total_reward))

            steps += 1
            if steps == max_len:
                break
            # if total_reward < -50:
            #     break
            if _IS_TEST and steps > 10:
                break

        total_reward = np.maximum(-100, total_reward)
        agent_reward += total_reward
        if save_visualization:
            title = 'train_agent_r{:.2f}'.format(agent_reward)
            print('Saving trajectory:', title)
            network.show_pred(title, np.concatenate(observations, 0))
            break
        print('.', end='')

    sess.close()
    env.close()
    print('Agent done - ' + str(time_start))

    return -(agent_reward / _NUM_TRIALS)