def multiple_runs(on):
    env = CarRacing()

    states = []
    actions = []
    for run in range(MAX_RUNS):
        state = env.reset()
        # done = False
        counter = 0
        for game_time in range(MAX_GAME_TIME):
            # env.render()
            action = generate_action()
            state = _process_frame(state)
            states.append(state)
            actions.append(action)
            state, r, done, _ = env.step(action)

            # print(r)

            if counter == REST_NUM:
                print('RUN:{},GT:{},DATA:{}'.format(run, game_time,
                                                    len(states)))
                position = np.random.randint(len(env.track))
                env.car = Car(env.world, *env.track[position][1:4])
                counter = 0
            counter += 1
    states = np.array(states, dtype=np.uint8)
    actions = np.array(actions, dtype=np.float16)
    save_name = 'rollout_v2_{}.npz'.format(on)
    # np.save(dst + '/' + save_name, frame_and_action)

    np.savez_compressed(dst + '/' + save_name, action=actions, state=states)
Example #2
0
def simulate_batch(batch_num):
    env = CarRacing()

    obs_data = []
    action_data = []
    action = env.action_space.sample()
    for i_episode in range(_BATCH_SIZE):
        observation = env.reset()
        # Little hack to make the Car start at random positions in the race-track
        position = np.random.randint(len(env.track))
        env.car = Car(env.world, *env.track[position][1:4])
        observation = normalize_observation(observation)

        obs_sequence = []

        for _ in range(_TIME_STEPS):
            if _RENDER:
                env.render()

            action = generate_action(action)

            observation, reward, done, info = env.step(action)
            observation = normalize_observation(observation)

            obs_data.append(observation)

    print("Saving dataset for batch {}".format(batch_num))
    np.save('../data/obs_data_VAE_{}'.format(batch_num), obs_data)

    env.close()
Example #3
0
def simulate_batch(batch_num):
    car_env = CarRacing()

    obs_data = []
    action_data = []
    action = car_env.action_space.sample()
    for item in range(batch_size):
        en_observ = car_env.reset()
        # this make car to start in random positions 
        position = np.random.randint(len(car_env.track))
        car_env.car = Car(car_env.world, *car_env.track[position][1:4])
        en_observ = norm_obse(en_observ)

        obs_sequence = []

        # time steps
        for i in range(steps):
            if render:
                car_env.render()

            action = create_action(action)

            en_observ, reward, done, info = car_env.step(action)
            en_observ = norm_obse(en_observ)

            obs_data.append(en_observ)

    print("Saving dataset for batch {}".format(batch_num))
    np.save('data/TR_data_{}'.format(batch_num), obs_data)
    
    car_env.close()
def play(params):
    with torch.no_grad():
        block_print()
        device = torch.device("cpu")
        vae_model = vae.ConvVAE(VAE_Z_SIZE, VAE_KL_TOLERANCE)
        if os.path.exists("checkpoints/vae_checkpoint.pth"):
            vae_model.load_state_dict(
                torch.load("checkpoints/vae_checkpoint.pth",
                           map_location=device))
        vae_model = vae_model.eval()
        vae_model.to(device)

        rnn_model = rnn.MDMRNN(MDN_NUM_MIXTURES, MDN_HIDDEN_SIZE,
                               MDN_INPUT_SIZE, MDN_NUM_LAYERS, MDN_BATCH_SIZE,
                               1, MDN_OUTPUT_SIZE)
        if os.path.exists("checkpoints/rnn_checkpoint.pth"):
            rnn_model.load_state_dict(
                torch.load("checkpoints/rnn_checkpoint.pth",
                           map_location=device))
        rnn_model.to(device)
        rnn_model = rnn_model.eval()

        controller_model = controller.Controller(CMA_EMBEDDING_SIZE,
                                                 CMA_NUM_ACTIONS, params)

        env = CarRacing()
        _NUM_TRIALS = 16
        agent_reward = 0
        for trial in range(_NUM_TRIALS):
            observation = env.reset()
            # Little hack to make the Car start at random positions in the race-track
            np.random.seed(int(str(time.time() * 1000000)[10:13]))
            position = np.random.randint(len(env.track))
            env.car = Car(env.world, *env.track[position][1:4])

            hidden_state, cell_state = train_rnn.init_hidden(
                MDN_NUM_LAYERS, MDN_BATCH_SIZE, MDN_HIDDEN_SIZE, device)

            total_reward = 0.0
            steps = 0
            while True:
                action, hidden_state, cell_state = decide_action(
                    vae_model, rnn_model, controller_model, observation,
                    hidden_state, cell_state, device)
                observation, r, done, info = env.step(action)
                total_reward += r
                # NB: done is not True after 1000 steps when using the hack above for
                # 	  random init of position

                steps += 1
                if steps == 999:
                    break

            # If reward is out of scale, clip it
            total_reward = np.maximum(-100, total_reward)
            agent_reward += total_reward
        env.close()
        return -(agent_reward / _NUM_TRIALS)
Example #5
0
def main():
    print("Generating data for env CarRacing-v0")

    env = CarRacing()

    for obs_idx in range(1, 10):

        env.reset()

        observations = []

        for i in range(1000):
            position = np.random.randint(len(env.track))
            angle = np.random.randint(-20, 20)
            x_off = np.random.randint(-20, 20)
            init_data = list(env.track[position][1:4])
            init_data[0] += angle
            init_data[1] += x_off
            env.car = Car(env.world, *init_data)

            observation = env.step(None)[0]

            cropped_obs = normalize_observation(
                observation[:CROP_SIZE,
                            CROP_W_OFFSET:CROP_SIZE + CROP_W_OFFSET, :])

            cropped_obs = cv2.resize(cropped_obs,
                                     dsize=(64, 64),
                                     interpolation=cv2.INTER_CUBIC).astype(
                                         np.float32)

            np.clip(cropped_obs, 0.0, 1.0, cropped_obs)

            if i % 10 == 0:
                print(i)

            if i % 100 == 0:
                plt.imshow(cropped_obs)
                plt.show()

            observations.append(cropped_obs)

        observations = np.array(observations, dtype=np.float32)

        if not os.path.exists("data"):
            os.mkdir("data")

        np.save("data/observations_%d.npy" % obs_idx, observations)
Example #6
0
def multiple_runs(on):
    env = CarRacing()
    frame_and_action = []
    for run in range(MAX_RUNS):
        env.reset()
        # done = False
        counter = 0
        for game_time in range(MAX_GAME_TIME):
            # env.render()
            action = generate_action()
            state, r, done, _ = env.step(action)
            frame_and_action.append({'state': state, 'action': action})
            # print(r)
            counter += 1
            if counter > REST_NUM:
                print('RUN:{},GT:{},DATA:{}'.format(run, game_time,
                                                    len(frame_and_action)))
                position = np.random.randint(len(env.track))
                env.car = Car(env.world, *env.track[position][1:4])
                counter = 0
    save_name = 'rollout_{}.npy'.format(on)
    np.save(dst + '/' + save_name, frame_and_action)
def simulate_batch(batch_num, save=True, time_steps=None, reduce_size=True):
    env = CarRacing()

    if time_steps is None:
        time_steps = _TIME_STEPS

    obs_data = []
    action_data = []
    action = env.action_space.sample()
    for i_episode in range(_BATCH_SIZE):
        observation = env.reset()
        # Little hack to make the Car start at random positions in the race-track
        position = np.random.randint(len(env.track))
        env.car = Car(env.world, *env.track[position][1:4])
        observation = normalize_observation(observation,
                                            output_4d=False,
                                            reduce_size=reduce_size)
        obs_data.append(observation)

        for _ in range(time_steps):
            if _RENDER:
                env.render()

            action = generate_action(action)

            observation, reward, done, info = env.step(action)
            observation = normalize_observation(observation,
                                                output_4d=False,
                                                reduce_size=reduce_size)

            obs_data.append(observation)

    if save:
        print("Saving dataset for batch {:03d}".format(batch_num))
        np.save('../data/obs_data_VAE_{:03d}'.format(batch_num), obs_data)

    env.close()
    return obs_data
def simulate_batch(batch_num):
    og = start = time.time()
    block_print()
    with torch.no_grad():

        device = torch.device("cpu")
        vae_model = vae.ConvVAE(VAE_Z_SIZE, VAE_KL_TOLERANCE)
        if os.path.exists("checkpoints/vae_checkpoint.pth"):
            vae_model.load_state_dict(
                torch.load("checkpoints/vae_checkpoint.pth",
                           map_location=device))
        vae_model = vae_model.eval()
        vae_model.to(device)

        rnn_model = rnn.MDMRNN(MDN_NUM_MIXTURES, MDN_HIDDEN_SIZE,
                               MDN_INPUT_SIZE, MDN_NUM_LAYERS, MDN_BATCH_SIZE,
                               1, MDN_OUTPUT_SIZE)
        if os.path.exists("checkpoints/rnn_checkpoint.pth"):
            rnn_model.load_state_dict(
                torch.load("checkpoints/rnn_checkpoint.pth",
                           map_location=device))
        rnn_model.to(device)
        rnn_model = rnn_model.eval()

        if os.path.exists("checkpoints/params.pkl"):
            fo = open('checkpoints/params.pkl', 'rb')
            params = pickle.load(fo)
            fo.close()
            print("Loaded existing params")
        else:
            cma_num_params = CMA_NUM_ACTIONS * CMA_EMBEDDING_SIZE + CMA_NUM_ACTIONS
            params = controller.get_random_model_params(
                cma_num_params,
                np.random.rand() * 0.01)
        controller_model = controller.Controller(CMA_EMBEDDING_SIZE,
                                                 CMA_NUM_ACTIONS, params)

        env = CarRacing()

        observations = []
        actions = []

        observation = env.reset()

        position = np.random.randint(len(env.track))
        env.car = Car(env.world, *env.track[position][1:4])

        hidden_state, cell_state = train_rnn.init_hidden(
            MDN_NUM_LAYERS, MDN_BATCH_SIZE, MDN_HIDDEN_SIZE, device)

        observation = process_frame(observation)
        for _ in range(SEQUENCE_LENGTH + 1):
            observation = process_frame(observation)
            observations.append(observation)
            observation = normalize_observation(observation)
            observation = np.moveaxis(observation, 2, 0)
            observation = np.reshape(observation, (-1, 3, 64, 64))
            observation = torch.tensor(observation, device=device)
            mu, log_var = vae_model.encode(observation)
            embedding = vae_model.reparameterize(mu, log_var)

            controller_input = torch.cat(
                (embedding, hidden_state.reshape(1, -1)), dim=1)
            action = controller_model.forward(controller_input)
            actions.append(action)
            observation, reward, done, info = env.step(action)
            action_tensor = torch.from_numpy(action).float().to(device)
            action_tensor = action_tensor.view(1, -1)
            rnn_inputs = torch.cat((embedding, action_tensor), dim=1)
            pi, mean, sigma, hidden_state, cell_state = rnn_model.forward(
                rnn_inputs, hidden_state, cell_state)

        observations = np.array(observations, dtype=np.uint8)
        actions = np.array(actions, dtype=np.float16)
        np.savez_compressed('data/obs_data_VAE_{}'.format(batch_num),
                            obs=observations,
                            action=actions)
        env.close()
    end = time.time()
    logging.info("_" + str(batch_num) + " Total: " + str(end - og))
def play(params,
         render=True,
         verbose=False,
         save_visualization=False,
         max_len=999):
    time_start = datetime.datetime.now()
    print('Agent train run begun ' + str(time_start))

    sess, network = load_vae()
    env = CarRacing()

    # _NUM_TRIALS = 16  # <-- Ha and Schmidhuber
    _NUM_TRIALS = 8

    agent_reward = 0
    for trial in range(_NUM_TRIALS):
        observation = env.reset()
        observation = network.normalize_observation(observation)
        # Little hack to make the Car start at random positions in the race-track
        np.random.seed(int(str(time.time() * 1000000)[10:13]))
        position = np.random.randint(len(env.track))
        env.car = Car(env.world, *env.track[position][1:4])

        total_reward = 0.0
        steps = 0
        observations = [observation]
        while True:
            if render:
                env.render()
            observation = network.normalize_observation(observation)
            observations.append(observation)

            embedding = network.get_embedding(sess, observation)
            action = decide_action(sess, embedding, params)
            observation, r, done, info = env.step(action)
            total_reward += r
            # NB: done is not True after 1000 steps when using the hack above for
            #       random init of position
            if verbose and (steps % 200 == 0 or steps == 999):
                print("\naction " + str(["{:+0.2f}".format(x)
                                         for x in action]))
                print("step {} total_reward {:+0.2f}".format(
                    steps, total_reward))

            steps += 1
            if steps == max_len:
                break
            # if total_reward < -50:
            #     break
            if _IS_TEST and steps > 10:
                break

        total_reward = np.maximum(-100, total_reward)
        agent_reward += total_reward
        if save_visualization:
            title = 'train_agent_r{:.2f}'.format(agent_reward)
            print('Saving trajectory:', title)
            network.show_pred(title, np.concatenate(observations, 0))
            break
        print('.', end='')

    sess.close()
    env.close()
    print('Agent done - ' + str(time_start))

    return -(agent_reward / _NUM_TRIALS)