Ejemplo n.º 1
0
def main(args):

    env_name = args.env_name
    total_episodes = args.total_episodes
    start_batch = args.start_batch
    time_steps = args.time_steps

    obs_data = []
    action_data = []

    env = make_env(env_name)
    s = 0
    batch = start_batch

    while s < total_episodes:

        for i_episode in range(200):
            print('-----')
            observation = env.reset()
            env.render()
            done = False
            action = env.action_space.sample()
            t = 0
            obs_sequence = []
            action_sequence = []
            while t < time_steps:
                t = t + 1
                
                action = config.generate_data_action(t, action)
                observation = config.adjust_obs(observation)
                      
                obs_sequence.append(observation)
                action_sequence.append(action)
                
                observation, reward, done, info = env.step(action)
            
            obs_data.append(obs_sequence)
            action_data.append(action_sequence)
            
            print("Batch {} Episode {} finished after {} timesteps".format(batch, i_episode, t+1))
            print("Current dataset contains {} observations".format(sum(map(len, obs_data))))

            s = s + 1

        print("Saving dataset for batch {}".format(batch))
        np.save('./data/obs_data_' + str(batch), obs_data)
        np.save('./data/action_data_' + str(batch), action_data)

        batch = batch + 1

        obs_data = []
        action_data = []
Ejemplo n.º 2
0
def main(args):

    env_name = args.env_name
    total_episodes = args.total_episodes
    time_steps = args.time_steps
    render = args.render
    run_all_envs = args.run_all_envs
    action_refresh_rate = args.action_refresh_rate

    if run_all_envs:
        envs_to_generate = config.train_envs
    else:
        envs_to_generate = [env_name]

    for current_env_name in envs_to_generate:
        print("Generating data for env {}".format(current_env_name))

        env = make_env(current_env_name)  # <1>
        s = 0

        while s < total_episodes:

            episode_id = random.randint(0, 2**31 - 1)
            filename = DIR_NAME + str(episode_id) + ".npz"

            observation = env.reset()

            env.render()

            t = 0

            obs_sequence = []
            action_sequence = []
            reward_sequence = []
            done_sequence = []

            reward = -0.1
            done = False

            while t < time_steps:  # and not done:
                if t % action_refresh_rate == 0:
                    action = config.generate_data_action(t, env)  # <2>

                observation = config.adjust_obs(observation)  # <3>

                obs_sequence.append(observation)
                action_sequence.append(action)
                reward_sequence.append(reward)
                done_sequence.append(done)

                observation, reward, done, info = env.step(action)  # <4>

                t = t + 1

                if render:
                    env.render()

            print("Episode {} finished after {} timesteps".format(s, t))

            np.savez_compressed(filename,
                                obs=obs_sequence,
                                action=action_sequence,
                                reward=reward_sequence,
                                done=done_sequence)  # <4>

            s = s + 1

        env.close()
Ejemplo n.º 3
0
def simulate(model,
             num_episode=5,
             seed=-1,
             max_len=-1,
             generate_data_mode=False,
             render_mode=False):
    reward_list = []
    t_list = []

    max_episode_length = 1000

    if max_len > 0:
        if max_len < max_episode_length:
            max_episode_length = max_len

    if (seed >= 0):
        random.seed(seed)
        np.random.seed(seed)
        model.env.seed(seed)

    for episode in range(num_episode):

        model.reset()

        obs = model.env.reset()
        reward = 0
        action = np.array([0, 0, 0])

        if obs is None:
            obs = np.zeros(model.input_size)

        total_reward = 0.0

        model.env.render("rgb_array")

        for t in range(max_episode_length):

            if obs.shape == model.vae.input_dim:  ### running in real environment
                obs = config.adjust_obs(obs)
                reward = config.adjust_reward(reward)

            if render_mode:
                model.env.render("human")
                if RENDER_DELAY:
                    time.sleep(0.1)
            # else:
            #   model.env.render('rgb_array')

            vae_encoded_obs = model.update(obs, t)

            input_to_rnn = [
                np.array(
                    [[np.concatenate([vae_encoded_obs, action, [reward]])]]),
                np.array([model.hidden]),
                np.array([model.cell_values])
            ]

            out = model.rnn.forward.predict(input_to_rnn)

            y_pred = out[0][0][0]
            model.hidden = out[1][0]
            model.cell_values = out[2][0]

            controller_obs = np.concatenate([vae_encoded_obs, model.hidden])

            if generate_data_mode:
                action = config.generate_data_action(t=t, env=model.env)
            else:
                action = model.get_action(controller_obs,
                                          t=t,
                                          add_noise=ADD_NOISE)

            # print(action)
            # action = [-0.1,1,0]

            obs, reward, done, info = model.env.step(action)

            total_reward += reward

            if done:
                break

        if render_mode:
            print("reward", total_reward, "timesteps", t)

        reward_list.append(total_reward)
        t_list.append(t)
        model.env.close()

    return reward_list, t_list
Ejemplo n.º 4
0
def simulate(model,
             train_mode=False,
             render_mode=True,
             num_episode=5,
             seed=-1,
             max_len=-1,
             generate_data_mode=False):

    reward_list = []
    t_list = []

    max_episode_length = 3000

    if max_len > 0:
        if max_len < max_episode_length:
            max_episode_length = max_len

    if (seed >= 0):
        random.seed(seed)
        np.random.seed(seed)
        model.env.seed(seed)

    for episode in range(num_episode):

        model.reset()

        obs = model.env.reset()
        obs = config.adjust_obs(obs)
        action = model.env.action_space.sample()

        model.env.render("human")

        if obs is None:
            obs = np.zeros(model.input_size)

        total_reward = 0.0

        for t in range(max_episode_length):

            if render_mode:
                model.env.render("human")
                if RENDER_DELAY:
                    time.sleep(0.01)

            vae_encoded_obs = model.update(obs, t)
            controller_obs = np.concatenate([vae_encoded_obs, model.hidden])

            if generate_data_mode:
                action = config.generate_data_action(t=t,
                                                     current_action=action)
            elif MEAN_MODE:
                action = model.get_action(controller_obs,
                                          t=t,
                                          mean_mode=(not train_mode))
            else:
                action = model.get_action(controller_obs, t=t, mean_mode=False)

            obs, reward, done, info = model.env.step(action)
            obs = config.adjust_obs(obs)

            input_to_rnn = [
                np.array([[np.concatenate([vae_encoded_obs, action])]]),
                np.array([model.hidden]),
                np.array([model.cell_values])
            ]
            h, c = model.rnn.forward.predict(input_to_rnn)
            model.hidden = h[0]
            model.cell_values = c[0]

            total_reward += reward

            if done:
                break

        if render_mode:
            print("reward", total_reward, "timesteps", t)

        reward_list.append(total_reward)
        t_list.append(t)
        model.env.close()

    return reward_list, t_list
Ejemplo n.º 5
0
def simulate(model,
             num_episode=5,
             seed=-1,
             max_len=-1,
             generate_data_mode=False,
             render_mode=False):

    reward_list = []
    t_list = []

    max_episode_length = 100000

    if max_len > 0:
        if max_len < max_episode_length:
            max_episode_length = max_len

    if (seed >= 0):
        random.seed(seed)
        np.random.seed(seed)
        model.env.seed(seed)

    avg_time = [0, 0]
    count_times = [0, 0]

    for episode in range(num_episode):

        # print(f'Episode {episode}')

        model.reset()

        # obs = model.env.reset()
        # action = [0,0]
        obs = model.env.reset()
        reward = 0
        action = np.array([0, 0])

        if obs is None:
            obs = np.zeros(model.input_size)

        total_reward = 0.0

        # Uncomment block
        # model.env.render("rgb_array")

        for t in range(max_episode_length):

            # print(f'Timestep {t}')
            if obs.shape == model.vae.input_dim:  ### running in real environment
                obs = config.adjust_obs(obs)
                reward = config.adjust_reward(reward)

            # Uncomment block
            # if render_mode:
            #   model.env.render("human")
            #   if RENDER_DELAY:
            #     time.sleep(0.1)

            # else:
            #   model.env.render('rgb_array')

            # model.env.render()

            vae_encoded_obs = model.update(obs, t)

            input_to_rnn = [
                np.array(
                    [[np.concatenate([vae_encoded_obs, action, [reward]])]]),
                np.array([model.hidden]),
                np.array([model.cell_values])
            ]
            start = time.process_time()
            out = model.rnn.forward.predict(input_to_rnn)
            y_pred = out[0][0][0]
            model.hidden = out[1][0]
            model.cell_values = out[2][0]

            controller_obs = np.concatenate([vae_encoded_obs, model.hidden])

            if generate_data_mode:
                action = config.generate_data_action(t=t, env=model.env)
            else:
                action = model.get_action(controller_obs,
                                          t=t,
                                          add_noise=ADD_NOISE)

            # print(action)
            # action = [-0.1,1,0]

            new_time = time.process_time() - start
            avg_time[0] = ((avg_time[0] * count_times[0]) +
                           new_time) / (count_times[0] + 1)
            count_times[0] += 1

            start = time.process_time()
            obs, reward, done, _ = model.env.step(action)

            # print(f'action:{action}    reward:{reward}')

            new_time = time.process_time() - start
            avg_time[1] = ((avg_time[1] * count_times[1]) +
                           new_time) / (count_times[1] + 1)
            count_times[1] += 1

            # print(avg_time)

            total_reward += reward

            if done:
                break

        if render_mode:
            print("reward", total_reward, "timesteps", t)

        reward_list.append(total_reward)
        t_list.append(t)
        # model.env.close()

    return reward_list, t_list
Ejemplo n.º 6
0
def simulate(model, train_mode=False, render_mode=True, num_episode=5, seed=-1, max_len=-1, generate_data_mode = False):

  reward_list = []
  t_list = []

  max_episode_length = 3000

  if max_len > 0:
    if max_len < max_episode_length:
      max_episode_length = max_len

  if (seed >= 0):
    random.seed(seed)
    np.random.seed(seed)
    model.env.seed(seed)

  for episode in range(num_episode):

    model.reset()

    obs = model.env.reset()
    obs = config.adjust_obs(obs)
    action = model.env.action_space.sample()

    model.env.render("human")

    if obs is None:
      obs = np.zeros(model.input_size)

    total_reward = 0.0

    for t in range(max_episode_length):

      if render_mode:
        model.env.render("human")
        if RENDER_DELAY:
          time.sleep(0.01)

      vae_encoded_obs = model.update(obs, t)
      controller_obs = np.concatenate([vae_encoded_obs,model.hidden])

      if generate_data_mode:
        action = config.generate_data_action(t=t, current_action = action)
      elif MEAN_MODE:
        action = model.get_action(controller_obs, t=t, mean_mode=(not train_mode))
      else:
        action = model.get_action(controller_obs, t=t, mean_mode=False)

      obs, reward, done, info = model.env.step(action)
      obs = config.adjust_obs(obs)

      input_to_rnn = [np.array([[np.concatenate([vae_encoded_obs, action])]]),np.array([model.hidden]),np.array([model.cell_values])]
      h, c = model.rnn.forward.predict(input_to_rnn)
      model.hidden = h[0]
      model.cell_values = c[0]

      total_reward += reward

      if done:
        break

    if render_mode:
      print("reward", total_reward, "timesteps", t)
    
    reward_list.append(total_reward)
    t_list.append(t)
    model.env.close()

  return reward_list, t_list
Ejemplo n.º 7
0
def main(args):

    env_name = args.env_name
    total_episodes = args.total_episodes
    start_batch = args.start_batch
    time_steps = args.time_steps
    render = args.render
    batch_size = args.batch_size
    run_all_envs = args.run_all_envs

    if run_all_envs:
        envs_to_generate = config.train_envs
    else:
        envs_to_generate = [env_name]

    for current_env_name in envs_to_generate:
        print("Generating data for env {}".format(current_env_name))

        env = make_env(current_env_name)
        s = 0
        batch = start_batch

        batch_size = min(batch_size, total_episodes)

        while s < total_episodes:
            obs_data = []
            action_data = []

            for i_episode in range(batch_size):
                print('-----')
                observation = env.reset()
                observation = config.adjust_obs(observation)

                # Position car randomly on track
                position = np.random.randint(len(env.track))
                env.car = Car(env.world, *env.track[position][1:4])

                # plt.imshow(observation)
                # plt.show()

                env.render()
                done = False
                action = env.action_space.sample()
                t = 0
                obs_sequence = []
                action_sequence = []

                while t < time_steps:  #and not done:
                    t = t + 1

                    action = config.generate_data_action(t, action)

                    obs_sequence.append(observation)
                    action_sequence.append(action)

                    observation, reward, done, info = env.step(action)
                    observation = config.adjust_obs(observation)

                    if render:
                        env.render()

                obs_data.append(obs_sequence)
                action_data.append(action_sequence)

                print("Batch {} Episode {} finished after {} timesteps".format(
                    batch, i_episode, t + 1))
                print("Current dataset contains {} observations".format(
                    sum(map(len, obs_data))))

                s = s + 1

            print("Saving dataset for batch {}".format(batch))
            np.save('./data/obs_data_' + current_env_name + '_' + str(batch),
                    obs_data)
            np.save(
                './data/action_data_' + current_env_name + '_' + str(batch),
                action_data)

            batch = batch + 1

        env.close()
def main(args):

    env_name = args.env_name
    total_episodes = int(args.total_episodes)
    time_steps = int(args.time_steps)
    render = args.render
    run_all_envs = args.run_all_envs
    action_refresh_rate = args.action_refresh_rate
    alpha = float(args.alpha)
    model_name = str(args.model_name)

    if run_all_envs:
        envs_to_generate = config.train_envs
    else:
        envs_to_generate = [env_name]

    for current_env_name in envs_to_generate:
        print("Generating data for env {}".format(current_env_name))
        if not os.path.isdir(DIR_NAME + model_name):
            os.mkdir(DIR_NAME + model_name)

        env = make_env(current_env_name)  # <1>
        s = 0

        while s < total_episodes:

            episode_id = random.randint(0, 2**31 - 1)
            filename = DIR_NAME + model_name + '/' + str(episode_id) + ".npz"

            observation = env.reset()

            env.render()

            t = 0

            obs_sequenceS = []
            obs_sequenceB = []
            action_sequence = []
            reward_sequence = []
            done_sequence = []

            reward = -0.1
            done = False
            beta = alpha + np.random.rand() * (1 - alpha)

            while t < time_steps:  # and not done:
                if t % action_refresh_rate == 0:
                    action = config.generate_data_action(t, env)  # <2>

                observation = config.adjust_obs(observation)  # <3>

                obs_sequenceS.append(
                    cv2.resize(crop(observation, alpha * beta),
                               dsize=(WH, WH),
                               interpolation=cv2.INTER_CUBIC))
                obs_sequenceB.append(
                    cv2.resize(crop(observation, beta),
                               dsize=(WH, WH),
                               interpolation=cv2.INTER_CUBIC))
                action_sequence.append(action)
                reward_sequence.append(reward)
                done_sequence.append(done)

                observation, reward, done, info = env.step(action)  # <4>

                t = t + 1

                if render:
                    env.render()

            print("Episode {} finished after {} timesteps".format(s, t))

            np.savez_compressed(filename,
                                obsS=np.asarray(obs_sequenceS),
                                obsB=np.asarray(obs_sequenceB),
                                action=np.asarray(action_sequence),
                                reward=np.asarray(reward_sequence),
                                done=np.asarray(done_sequence))  # <4>

            s = s + 1

        env.close()
Ejemplo n.º 9
0
def main(args):

    env_name = args.env_name
    total_episodes = args.total_episodes
    start_batch = args.start_batch
    time_steps = args.time_steps
    render = args.render
    batch_size = args.batch_size
    run_all_envs = args.run_all_envs

    if run_all_envs:
        envs_to_generate = config.train_envs
    else:
        envs_to_generate = [env_name]


    for current_env_name in envs_to_generate:
        print("Generating data for env {}".format(current_env_name))

        env = make_env(current_env_name)
        s = 0
        batch = start_batch

        batch_size = min(batch_size, total_episodes)

        while s < total_episodes:
            obs_data = []
            action_data = []

            for i_episode in range(batch_size):
                print('-----')
                observation = env.reset()
                observation = config.adjust_obs(observation)

                # plt.imshow(observation)
                # plt.show()

                env.render()
                done = False
                action = env.action_space.sample()
                t = 0
                obs_sequence = []
                action_sequence = []

                while t < time_steps: #and not done:
                    t = t + 1
                    
                    action = config.generate_data_action(t, action)
                    
                    obs_sequence.append(observation)
                    action_sequence.append(action)

                    observation, reward, done, info = env.step(action)
                    observation = config.adjust_obs(observation)

                    if render:
                        env.render()

                obs_data.append(obs_sequence)
                action_data.append(action_sequence)
                
                print("Batch {} Episode {} finished after {} timesteps".format(batch, i_episode, t+1))
                print("Current dataset contains {} observations".format(sum(map(len, obs_data))))

                s = s + 1

            print("Saving dataset for batch {}".format(batch))
            np.save('./data/obs_data_' + current_env_name + '_' + str(batch), obs_data)
            np.save('./data/action_data_' + current_env_name + '_' + str(batch), action_data)

            batch = batch + 1

        env.close()
def main(args):
    env_name = args.env_name
    total_episodes = args.total_episodes
    start_file = args.start_file
    time_steps = args.time_steps
    render = args.render
    file_size = args.file_size
    run_all_envs = args.run_all_envs
    validation = args.validation
    start_frame = args.start_frame

    if validation:
        total_episodes = file_size

    if run_all_envs:
        envs_to_generate = config.train_envs
    else:
        envs_to_generate = [env_name]

    for current_env_name in envs_to_generate:
        print("Generating data for env {}".format(current_env_name))

        env = make_env(current_env_name)
        s = 0
        file = start_file

        file_size = min(file_size, total_episodes)

        while s < total_episodes:
            obs_data = []
            action_data = []

            for i_episode in range(file_size):
                print("-----")
                observation = env.reset()
                observation = config.adjust_obs(observation)

                # essential for saving as well
                env.render()

                done = False
                action = env.action_space.sample()
                time = 0
                obs_sequence = []
                action_sequence = []

                while time < time_steps and not done:
                    time = time + 1

                    action = config.generate_data_action(time, action)

                    observation, reward, done, info = env.step(action)
                    observation = config.adjust_obs(observation)

                    if time > start_frame:
                        obs_sequence.append(observation)  # [:56]?
                        action_sequence.append(action)

                    if render:
                        env.render()

                obs_data.append(obs_sequence)
                action_data.append(action_sequence)

                print("File {} Episode {} finished after {} timesteps".format(
                    file, i_episode, time + 1))
                print("Current dataset contains {} observations".format(
                    sum(map(len, obs_data))))

                s = s + 1

            print("Saving dataset for batch {}".format(file))

            if validation:
                np.savez_compressed("./data/obs_valid_" + current_env_name,
                                    obs_data)
                np.savez_compressed("./data/action_valid_" + current_env_name,
                                    action_data)
            else:
                # np.random.shuffle(obs_data)

                # obs_data, action_data = shuffle(obs_data, action_data)

                np.savez_compressed(
                    "./data/obs_data_" + current_env_name + "_" + str(file),
                    obs_data)
                np.savez_compressed(
                    "./data/action_data_" + current_env_name + "_" + str(file),
                    action_data)

            file = file + 1

        env.close()