Esempio n. 1
0
def generate_vae_dataset(
        N=10000, test_p=0.9, use_cached=True, imsize=84, show=False,
        dataset_path=None, env_class=None, env_kwargs=None, init_camera=sawyer_door_env_camera,
):
    filename = "/tmp/sawyer_door_push_open_and_reach" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        env = env_class(**env_kwargs)
        env =  ImageEnv(
            env, imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        oracle_sampled_data = int(N/2)
        dataset = np.zeros((N, imsize * imsize * 3))
        print('Goal Space Sampling')
        for i in range(oracle_sampled_data):
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos=.6
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        print('Random Sampling')
        for i in range(oracle_sampled_data, N):
            if i % 20==0:
                env.reset()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(
                    action
                )
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
def generate_uniform_dataset_reacher(env_class=None,
                                     env_kwargs=None,
                                     num_imgs=1000,
                                     use_cached_dataset=False,
                                     init_camera=None,
                                     imsize=48,
                                     show=False,
                                     save_file_prefix=None,
                                     env_id=None,
                                     tag='',
                                     dataset_path=None):
    if dataset_path is not None:
        dataset = load_local_or_remote_file(dataset_path)
        return dataset
    import gym
    from gym.envs import registration
    # trigger registration
    import multiworld.envs.pygame
    import multiworld.envs.mujoco
    if not env_class or not env_kwargs:
        env = gym.make(env_id)
    else:
        env = env_class(**env_kwargs)
    env = ImageEnv(
        env,
        imsize,
        init_camera=init_camera,
        transpose=True,
        normalize=True,
    )
    env.non_presampled_goal_img_is_garbage = True
    if save_file_prefix is None and env_id is not None:
        save_file_prefix = env_id
    filename = "/tmp/{}_N{}_imsize{}uniform_images_{}.npy".format(
        save_file_prefix,
        str(num_imgs),
        env.imsize,
        tag,
    )
    if use_cached_dataset and osp.isfile(filename):
        images = np.load(filename)
        print("Loaded data from {}".format(filename))
        return images

    print('Sampling Uniform Dataset')
    dataset = np.zeros((num_imgs, 3 * env.imsize**2), dtype=np.uint8)
    for j in range(num_imgs):
        obs = env.reset()
        env.set_to_goal(env.get_goal())
        img_f = env._get_flat_img()
        if show:
            img = img_f.reshape(3, env.imsize, env.imsize).transpose()
            img = img[::-1, :, ::-1]
            cv2.imshow('img', img)
            cv2.waitKey(1)
        print(j)
        dataset[j, :] = unormalize_image(img_f)
    np.save(filename, dataset)
    print("Saving file to {}".format(filename))
    return dataset
Esempio n. 3
0
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
    policy_path=None,
    action_space_sampling=False,
    env_class=SawyerPushAndPullDoorEnv,
    env_kwargs=None,
    action_plus_random_sampling=False,
    init_camera=sawyer_door_env_camera,
    ratio_action_sample_to_random=1 / 2,
    env_id=None,
):
    if policy_path is not None:
        filename = "/tmp/sawyer_door_push_and_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str(
            N) + ".npy"
    elif action_space_sampling:
        filename = "/tmp/sawyer_door_push_and_pull_open_zoomed_in_action_space_sampling" + str(
            N) + ".npy"
    else:
        filename = "/tmp/sawyer_door_push_and_pull_open" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    elif action_plus_random_sampling:
        if env_id is not None:
            import gym
            env = gym.make(env_id)
        else:
            env = env_class(**env_kwargs)
            env = ImageEnv(
                env,
                imsize,
                transpose=True,
                init_camera=init_camera,
                normalize=True,
            )
        action_sampled_data = int(N * ratio_action_sample_to_random)
        dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8)
        print('Action Space Sampling')
        for i in range(action_sampled_data):
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env._get_flat_img()
            dataset[i, :] = unormalize_image(img)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        print('Random Sampling')
        for i in range(action_sampled_data, N):
            if i % 20 == 0:
                env.reset()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(action)
            goal = env.sample_goal()
            env.set_to_goal_angle(goal['state_desired_goal'])
            img = env._get_flat_img()
            dataset[i, :] = unormalize_image(img)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .5
        info['env'] = env
    else:
        raise NotImplementedError()
    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Esempio n. 4
0
def generate_vae_dataset_from_params(
        env_class=None,
        env_kwargs=None,
        env_id=None,
        N=10000,
        test_p=0.9,
        use_cached=True,
        imsize=84,
        num_channels=1,
        show=False,
        init_camera=None,
        dataset_path=None,
        oracle_dataset=False,
        n_random_steps=100,
        vae_dataset_specific_env_kwargs=None,
        save_file_prefix=None,
):
    from multiworld.core.image_env import ImageEnv, unormalize_image
    import time

    assert oracle_dataset == True

    if env_kwargs is None:
        env_kwargs = {}
    if save_file_prefix is None:
        save_file_prefix = env_id
    if save_file_prefix is None:
        save_file_prefix = env_class.__name__
    filename = "/tmp/{}_N{}_{}_imsize{}_oracle{}.npy".format(
        save_file_prefix,
        str(N),
        init_camera.__name__ if init_camera else '',
        imsize,
        oracle_dataset,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        np.random.shuffle(dataset)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        np.random.shuffle(dataset)
        print("loaded data from saved file", filename)
    else:
        now = time.time()

        if env_id is not None:
            import gym
            import multiworld
            multiworld.register_all_envs()
            env = gym.make(env_id)
        else:
            if vae_dataset_specific_env_kwargs is None:
                vae_dataset_specific_env_kwargs = {}
            for key, val in env_kwargs.items():
                if key not in vae_dataset_specific_env_kwargs:
                    vae_dataset_specific_env_kwargs[key] = val
            env = env_class(**vae_dataset_specific_env_kwargs)
        if not isinstance(env, ImageEnv):
            env = ImageEnv(
                env,
                imsize,
                init_camera=init_camera,
                transpose=True,
                normalize=True,
            )
        setup_pickup_image_env(env, num_presampled_goals=N)
        env.reset()
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8)
        for i in range(N):
            img = env._presampled_goals['image_desired_goal'][i]
            dataset[i, :] = unormalize_image(img)
            if show:
                img = img.reshape(3, imsize, imsize).transpose()
                img = img[::-1, :, ::-1]
                cv2.imshow('img', img)
                cv2.waitKey(1)
                time.sleep(.2)
                # radius = input('waiting...')
        print("done making training data", filename, time.time() - now)
        np.random.shuffle(dataset)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Esempio n. 5
0
    # env_name = 'SawyerDoorHookResetFreeEnv-v1'
    env_name = 'SawyerPushHurdle-v0'
    # env_name = 'SawyerPushNIPSFull-v0'
    # env_name = 'SawyerPushNIPSEasy-v0'
    # env_name = 'SawyerPushHurdleResetFreeEnv-v0'
    multiworld.register_all_envs()
    imsize = 48

    # presampled_goals_path = 'data/local/goals/SawyerDoorHookResetFreeEnv-v1-goal.npy'
    # presampled_goals_path = 'data/local/goals/SawyerPickupEnvYZEasy-v0-goal-500.npy'
    # presampled_goals = np.load(presampled_goals_path, allow_pickle=True).item()

    env = ImageEnv(env,
                   imsize,
                   init_camera=sawyer_init_camera_zoomed_in,
                   transpose=True,
                   normalize=True,
                   presampled_goals=None)

    print(env.action_space.low)
    print(env.action_space.high)

    for i in range(50):
        o = env.reset()
        for t in range(50):
            print(t)
            action = env.action_space.sample()
            s, r, _, _ = env.step(action)
            img = s['image_observation']
            show_obs(img, imsize=imsize, name=env_name)
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
    action_space_sampling=False,
    init_camera=None,
    env_class=None,
    env_kwargs=None,
):
    filename = "/tmp/sawyer_xyz_pos_control_new_zoom_cam" + str(N) + '.npy'
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        if env_kwargs == None:
            env_kwargs = dict()
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8)
        if action_space_sampling:
            action_space = Box(np.array([-.1, .5, 0]), np.array([.1, .7, .5]))
            for i in range(N):
                env.set_to_goal(env.sample_goal())
                img = env._get_flat_img()
                dataset[i, :] = unormalize_image(img)
                if show:
                    cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                    cv2.waitKey(1)
                print(i)
            info['env'] = env
        else:
            policy = RandomPolicy(env.action_space)
            es = OUStrategy(action_space=env.action_space, theta=0)
            exploration_policy = PolicyWrappedWithExplorationStrategy(
                exploration_strategy=es,
                policy=policy,
            )
            for i in range(N):
                # Move the goal out of the image
                env.wrapped_env.set_goal(np.array([100, 100, 100]))
                if i % 50 == 0:
                    print('Reset')
                    env.reset()
                    exploration_policy.reset()
                for _ in range(1):
                    action = exploration_policy.get_action()[0] * 10
                    env.wrapped_env.step(action)
                img = env.step(env.action_space.sample())[0]
                dataset[i, :] = img
                if show:
                    cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                    cv2.waitKey(1)
                print(i)

        print("done making training data", time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Esempio n. 7
0
multiworld.register_all_envs()
#env = gym.make('SawyerPickupMultiobj-v0')
#env = gym.make('SawyerPickupWideEnv-v0')
env = gym.make('SawyerMultiObj-v0')
#env = gym.make('SawyerPushNIPS-v0'

env = ImageEnv(
    env,
    imsize=imsize,
    init_camera=sawyer_pusher_camera_upright_v3,
    transpose=True,
    normalize=True,
)
i = 0

env.reset()
for j in range(0, 2000000):
    action = np.array(
        [random.random() - .5,
         random.random() - .5,
         random.random() - .5])
    obs = env.step(action)[0]['image_observation']
    obs_img = 255 * obs.reshape(3, 480, 480).transpose()
    cv2.imwrite('/home/lab/imgs/obs' + str(j) + '.png', obs_img[..., ::-1])

#goal = env.sample_goal()
#obs_img = 255*goal['desired_goal'].reshape(3, 480, 480).transpose()
#cv2.imwrite('a.png', obs_img)
#cv2.imshow('window', obs_img)
#cv2.waitKey()
def generate_vae_dataset(variant):
    env_class = variant.get('env_class', None)
    env_kwargs = variant.get('env_kwargs', None)
    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    test_p = variant.get('test_p', 0.9)
    use_cached = variant.get('use_cached', True)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    show = variant.get('show', False)
    init_camera = variant.get('init_camera', None)
    dataset_path = variant.get('dataset_path', None)
    oracle_dataset_using_set_to_goal = variant.get(
        'oracle_dataset_using_set_to_goal', False)
    random_rollout_data = variant.get('random_rollout_data', False)
    random_and_oracle_policy_data = variant.get(
        'random_and_oracle_policy_data', False)
    random_and_oracle_policy_data_split = variant.get(
        'random_and_oracle_policy_data_split', 0)
    policy_file = variant.get('policy_file', None)
    n_random_steps = variant.get('n_random_steps', 100)
    vae_dataset_specific_env_kwargs = variant.get(
        'vae_dataset_specific_env_kwargs', None)
    save_file_prefix = variant.get('save_file_prefix', None)
    non_presampled_goal_img_is_garbage = variant.get(
        'non_presampled_goal_img_is_garbage', None)
    tag = variant.get('tag', '')
    from multiworld.core.image_env import ImageEnv, unormalize_image
    import rlkit.torch.pytorch_util as ptu
    info = {}
    if dataset_path is not None:
        dataset = load_local_or_remote_file(dataset_path)
        N = dataset.shape[0]
    else:
        if env_kwargs is None:
            env_kwargs = {}
        if save_file_prefix is None:
            save_file_prefix = env_id
        if save_file_prefix is None:
            save_file_prefix = env_class.__name__
        filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format(
            save_file_prefix,
            str(N),
            init_camera.__name__ if init_camera else '',
            imsize,
            random_and_oracle_policy_data_split,
            tag,
        )
        if use_cached and osp.isfile(filename):
            dataset = np.load(filename)
            print("loaded data from saved file", filename)
        else:
            now = time.time()

            if env_id is not None:
                import gym
                import multiworld
                multiworld.register_all_envs()
                env = gym.make(env_id)
            else:
                if vae_dataset_specific_env_kwargs is None:
                    vae_dataset_specific_env_kwargs = {}
                for key, val in env_kwargs.items():
                    if key not in vae_dataset_specific_env_kwargs:
                        vae_dataset_specific_env_kwargs[key] = val
                env = env_class(**vae_dataset_specific_env_kwargs)
            if not isinstance(env, ImageEnv):
                env = ImageEnv(
                    env,
                    imsize,
                    init_camera=init_camera,
                    transpose=True,
                    normalize=True,
                    non_presampled_goal_img_is_garbage=
                    non_presampled_goal_img_is_garbage,
                )
            else:
                imsize = env.imsize
                env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage
            env.reset()
            info['env'] = env
            if random_and_oracle_policy_data:
                policy_file = load_local_or_remote_file(policy_file)
                policy = policy_file['policy']
                policy.to(ptu.device)
            if random_rollout_data:
                from rlkit.exploration_strategies.ou_strategy import OUStrategy
                policy = OUStrategy(env.action_space)
            dataset = np.zeros((N, imsize * imsize * num_channels),
                               dtype=np.uint8)
            for i in range(10000):
                NP = []
                if oracle_dataset_using_set_to_goal:
                    print(i)
                    #print('th step')
                    goal = env.sample_goal()
                    env.set_to_goal(goal)
                    obs = env._get_obs()
                    #img = img.reshape(3, imsize, imsize).transpose()
                    # img = img[::-1, :, ::-1]
                    # cv2.imshow('img', img)
                    # cv2.waitKey(1)
                    img_1 = obs['image_observation']
                    NP.append(img_1)
                    #dataset[i, :] = unormalize_image(img)
                    img_1 = img_1.reshape(3, imsize, imsize).transpose()
                    if i % 3 == 0:
                        cv2.imshow('img1', img_1)
                        cv2.waitKey(1)
                    env.reset()
                    instr = env.generate_new_state(goal)
                    if i % 3 == 0:
                        print(instr)
                    obs = env._get_obs()
                    # obs = env._get_obs()
                    img_2 = obs['image_observation']
                    NP.append(img_2)
                    NP.append(instr)
                    img_2 = img_2.reshape(3, imsize, imsize).transpose()
                    if i % 3 == 0:
                        cv2.imshow('img2', img_2)
                        cv2.waitKey(1)
                    NP = np.array(NP)
                    print(NP)
                    idx = str(i)
                    name = "/home/xiaomin/Downloads/IFIG_DATA_1/" + idx + ".npy"
                    np.save(open(name, 'wb'), NP)
                    # radius = input('waiting...')
            print("done making training data", filename, time.time() - now)
            np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
def generate_vae_dataset(variant):
    env_class = variant.get('env_class', None)
    env_kwargs = variant.get('env_kwargs', None)
    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    test_p = variant.get('test_p', 0.9)
    use_cached = variant.get('use_cached', True)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    show = variant.get('show', False)
    init_camera = variant.get('init_camera', None)
    dataset_path = variant.get('dataset_path', None)
    oracle_dataset_using_set_to_goal = variant.get(
        'oracle_dataset_using_set_to_goal', False)
    random_rollout_data = variant.get('random_rollout_data', False)
    random_and_oracle_policy_data = variant.get(
        'random_and_oracle_policy_data', False)
    random_and_oracle_policy_data_split = variant.get(
        'random_and_oracle_policy_data_split', 0)
    policy_file = variant.get('policy_file', None)
    n_random_steps = variant.get('n_random_steps', 100)
    vae_dataset_specific_env_kwargs = variant.get(
        'vae_dataset_specific_env_kwargs', None)
    save_file_prefix = variant.get('save_file_prefix', None)
    non_presampled_goal_img_is_garbage = variant.get(
        'non_presampled_goal_img_is_garbage', None)
    tag = variant.get('tag', '')
    from multiworld.core.image_env import ImageEnv, unormalize_image
    import rlkit.torch.pytorch_util as ptu
    info = {}
    if dataset_path is not None:
        dataset = load_local_or_remote_file(dataset_path)
        N = dataset.shape[0]
    else:
        if env_kwargs is None:
            env_kwargs = {}
        if save_file_prefix is None:
            save_file_prefix = env_id
        if save_file_prefix is None:
            save_file_prefix = env_class.__name__
        filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format(
            save_file_prefix,
            str(N),
            init_camera.__name__ if init_camera else '',
            imsize,
            random_and_oracle_policy_data_split,
            tag,
        )
        if use_cached and osp.isfile(filename):
            dataset = np.load(filename)
            print("loaded data from saved file", filename)
        else:
            now = time.time()

            if env_id is not None:
                import gym
                import multiworld
                multiworld.register_all_envs()
                env = gym.make(env_id)
            else:
                if vae_dataset_specific_env_kwargs is None:
                    vae_dataset_specific_env_kwargs = {}
                for key, val in env_kwargs.items():
                    if key not in vae_dataset_specific_env_kwargs:
                        vae_dataset_specific_env_kwargs[key] = val
                env = env_class(**vae_dataset_specific_env_kwargs)
            if not isinstance(env, ImageEnv):
                env = ImageEnv(
                    env,
                    imsize,
                    init_camera=init_camera,
                    transpose=True,
                    normalize=True,
                    non_presampled_goal_img_is_garbage=
                    non_presampled_goal_img_is_garbage,
                )
            else:
                imsize = env.imsize
                env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage
            env.reset()
            info['env'] = env
            if random_and_oracle_policy_data:
                policy_file = load_local_or_remote_file(policy_file)
                policy = policy_file['policy']
                policy.to(ptu.device)
            if random_rollout_data:
                from rlkit.exploration_strategies.ou_strategy import OUStrategy
                policy = OUStrategy(env.action_space)
            dataset = np.zeros((N, imsize * imsize * num_channels),
                               dtype=np.uint8)
            for i in range(N):
                if random_and_oracle_policy_data:
                    num_random_steps = int(N *
                                           random_and_oracle_policy_data_split)
                    if i < num_random_steps:
                        env.reset()
                        for _ in range(n_random_steps):
                            obs = env.step(env.action_space.sample())[0]
                    else:
                        obs = env.reset()
                        policy.reset()
                        for _ in range(n_random_steps):
                            policy_obs = np.hstack((
                                obs['state_observation'],
                                obs['state_desired_goal'],
                            ))
                            action, _ = policy.get_action(policy_obs)
                            obs, _, _, _ = env.step(action)
                elif oracle_dataset_using_set_to_goal:
                    print(i)
                    goal = env.sample_goal()
                    env.set_to_goal(goal)
                    obs = env._get_obs()
                elif random_rollout_data:
                    if i % n_random_steps == 0:
                        g = dict(
                            state_desired_goal=env.sample_goal_for_rollout())
                        env.set_to_goal(g)
                        policy.reset()
                        # env.reset()
                    u = policy.get_action_from_raw_action(
                        env.action_space.sample())
                    obs = env.step(u)[0]
                else:
                    env.reset()
                    for _ in range(n_random_steps):
                        obs = env.step(env.action_space.sample())[0]
                img = obs['image_observation']
                dataset[i, :] = unormalize_image(img)
                if show:
                    img = img.reshape(3, imsize, imsize).transpose()
                    img = img[::-1, :, ::-1]
                    cv2.imshow('img', img)
                    cv2.waitKey(1)
                    # radius = input('waiting...')
            print("done making training data", filename, time.time() - now)
            np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Esempio n. 10
0
def generate_vae_dataset(
    env_class,
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    init_camera=sawyer_init_camera_zoomed_in,
    dataset_path=None,
    env_kwargs=None,
    oracle_dataset=False,
    n_random_steps=100,
):
    if env_kwargs is None:
        env_kwargs = {}
    filename = "/tmp/{}_{}_{}_oracle{}.npy".format(
        env_class.__name__,
        str(N),
        init_camera.__name__,
        oracle_dataset,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            init_camera=init_camera,
            transpose=True,
            normalize=True,
        )
        env.reset()
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            if oracle_dataset:
                goal = env.sample_goal()
                env.set_to_goal(goal)
            else:
                env.reset()
                for _ in range(n_random_steps):
                    obs = env.step(env.action_space.sample())[0]
            obs = env.step(env.action_space.sample())[0]
            img = obs['image_observation']
            dataset[i, :] = img
            if show:
                img = img.reshape(3, 84, 84).transpose()
                img = img[::-1, :, ::-1]
                cv2.imshow('img', img)
                cv2.waitKey(1)
                # radius = input('waiting...')
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
def generate_vae_dataset(variant):
    """
    If not provided a pre-train vae dataset generation function, this function will be used to collect
    the dataset for training vae.
    """
    import rlkit.torch.pytorch_util as ptu
    import gym
    import multiworld
    multiworld.register_all_envs()

    print("generating vae dataset with original images")

    env_class = variant.get('env_class', None)
    env_kwargs = variant.get('env_kwargs', None)
    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    test_p = variant.get('test_p', 0.9)
    use_cached = variant.get('use_cached', True)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    show = variant.get('show', False)
    init_camera = variant.get('init_camera', None)
    dataset_path = variant.get('dataset_path', None)
    oracle_dataset_using_set_to_goal = variant.get(
        'oracle_dataset_using_set_to_goal', False)
    random_rollout_data = variant.get('random_rollout_data', False)
    random_and_oracle_policy_data = variant.get(
        'random_and_oracle_policy_data', False)
    random_and_oracle_policy_data_split = variant.get(
        'random_and_oracle_policy_data_split', 0)
    policy_file = variant.get('policy_file', None)
    n_random_steps = variant.get('n_random_steps', 100)
    vae_dataset_specific_env_kwargs = variant.get(
        'vae_dataset_specific_env_kwargs', None)
    save_file_prefix = variant.get('save_file_prefix', None)
    non_presampled_goal_img_is_garbage = variant.get(
        'non_presampled_goal_img_is_garbage', None)
    tag = variant.get('tag', '')

    info = {}
    if dataset_path is not None:
        print('load vae training dataset from: ', dataset_path)
        pjhome = os.environ['PJHOME']
        dataset = np.load(osp.join(pjhome, dataset_path),
                          allow_pickle=True).item()
        if isinstance(dataset, dict):
            dataset = dataset['image_desired_goal']
        dataset = unormalize_image(dataset)
        N = dataset.shape[0]
    else:
        if env_kwargs is None:
            env_kwargs = {}
        if save_file_prefix is None:
            save_file_prefix = env_id
        if save_file_prefix is None:
            save_file_prefix = env_class.__name__
        filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format(
            save_file_prefix,
            str(N),
            init_camera.__name__ if init_camera else '',
            imsize,
            random_and_oracle_policy_data_split,
            tag,
        )
        if use_cached and osp.isfile(filename):
            dataset = np.load(filename)
            print("loaded data from saved file", filename)
        else:
            now = time.time()

            if env_id is not None:
                import gym
                import multiworld
                multiworld.register_all_envs()
                env = gym.make(env_id)
            else:
                if vae_dataset_specific_env_kwargs is None:
                    vae_dataset_specific_env_kwargs = {}
                for key, val in env_kwargs.items():
                    if key not in vae_dataset_specific_env_kwargs:
                        vae_dataset_specific_env_kwargs[key] = val
                env = env_class(**vae_dataset_specific_env_kwargs)
            if not isinstance(env, ImageEnv):
                env = ImageEnv(
                    env,
                    imsize,
                    init_camera=init_camera,
                    transpose=True,
                    normalize=True,
                    non_presampled_goal_img_is_garbage=
                    non_presampled_goal_img_is_garbage,
                )
            else:
                imsize = env.imsize
                env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage
            env.reset()
            info['env'] = env
            if random_and_oracle_policy_data:
                policy_file = load_local_or_remote_file(policy_file)
                policy = policy_file['policy']
                policy.to(ptu.device)
            if random_rollout_data:
                from rlkit.exploration_strategies.ou_strategy import OUStrategy
                policy = OUStrategy(env.action_space)

            dataset = np.zeros((N, imsize * imsize * num_channels),
                               dtype=np.uint8)

            for i in range(N):
                if random_and_oracle_policy_data:
                    num_random_steps = int(N *
                                           random_and_oracle_policy_data_split)
                    if i < num_random_steps:
                        env.reset()
                        for _ in range(n_random_steps):
                            obs = env.step(env.action_space.sample())[0]
                    else:
                        obs = env.reset()
                        policy.reset()
                        for _ in range(n_random_steps):
                            policy_obs = np.hstack((
                                obs['state_observation'],
                                obs['state_desired_goal'],
                            ))
                            action, _ = policy.get_action(policy_obs)
                            obs, _, _, _ = env.step(action)
                elif oracle_dataset_using_set_to_goal:
                    print(i)
                    goal = env.sample_goal()
                    env.set_to_goal(goal)
                    obs = env._get_obs()

                elif random_rollout_data:
                    if i % n_random_steps == 0:
                        g = dict(
                            state_desired_goal=env.sample_goal_for_rollout())
                        env.set_to_goal(g)
                        policy.reset()
                        # env.reset()
                    u = policy.get_action_from_raw_action(
                        env.action_space.sample())
                    obs = env.step(u)[0]
                else:
                    print("using totally random rollouts")
                    for _ in range(n_random_steps):
                        obs = env.step(env.action_space.sample())[0]

                img = obs[
                    'image_observation']  # NOTE yufei: this is already normalized image, of detype np.float64.

                dataset[i, :] = unormalize_image(img)

            np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Esempio n. 12
0
def generate_uniform_dataset_door(num_imgs=1000,
                                  use_cached_dataset=False,
                                  init_camera=None,
                                  imsize=48,
                                  policy_file=None,
                                  show=False,
                                  path_length=100,
                                  save_file_prefix=None,
                                  env_id=None,
                                  tag='',
                                  dataset_path=None):
    if dataset_path is not None:
        dataset = load_local_or_remote_file(dataset_path)
        return dataset
    import gym
    from gym.envs import registration
    # trigger registration
    import multiworld.envs.pygame
    import multiworld.envs.mujoco
    env = gym.make(env_id)
    env = ImageEnv(
        env,
        imsize,
        init_camera=init_camera,
        transpose=True,
        normalize=True,
    )
    env.non_presampled_goal_img_is_garbage = True
    if save_file_prefix is None and env_id is not None:
        save_file_prefix = env_id
    filename = "/tmp/{}_N{}_imsize{}uniform_images_{}.npy".format(
        save_file_prefix,
        str(num_imgs),
        env.imsize,
        tag,
    )
    if use_cached_dataset and osp.isfile(filename):
        images = np.load(filename)
        print("Loaded data from {}".format(filename))
        return images

    policy_file = load_local_or_remote_file(policy_file)
    policy = policy_file['policy']
    policy.to(ptu.device)
    print('Sampling Uniform Dataset')
    dataset = np.zeros((num_imgs, 3 * env.imsize**2), dtype=np.uint8)
    for j in range(num_imgs):
        obs = env.reset()
        policy.reset()
        for i in range(path_length):
            policy_obs = np.hstack((
                obs['state_observation'],
                obs['state_desired_goal'],
            ))
            action, _ = policy.get_action(policy_obs)
            obs, _, _, _ = env.step(action)
        img_f = obs['image_observation']
        if show:
            img = obs['image_observation']
            img = img.reshape(3, env.imsize, env.imsize).transpose()
            img = img[::-1, :, ::-1]
            cv2.imshow('img', img)
            cv2.waitKey(1)
        print(j)
        dataset[j, :] = unormalize_image(img_f)
    temp = env.reset_free
    env.reset_free = True
    env.reset()
    env.reset_free = temp
    np.save(filename, dataset)
    print("Saving file to {}".format(filename))
    return dataset
Esempio n. 13
0
def main(training_data_dir, validation_data_dir, test_data_dir, imsize):

    if not os.path.exists(training_data_dir): os.makedirs(training_data_dir)
    if not os.path.exists(validation_data_dir):
        os.makedirs(validation_data_dir)
    if not os.path.exists(test_data_dir): os.makedirs(test_data_dir)

    backSub = cv2.createBackgroundSubtractorMOG2(history=10)

    # Registering required nmultiworld environments.
    multiworld.register_all_envs()
    base_env_background = gym.make('SawyerPushHurdlePuckAndRobotInvisible-v0')
    env_background = ImageEnv(base_env_background,
                              imsize=imsize,
                              init_camera=camera,
                              transpose=True,
                              normalize=True)
    env_background.reset()

    base_env_hurdle = gym.make('SawyerPushHurdlePuckInvisible-v0')
    env_hurdle = ImageEnv(base_env_hurdle,
                          imsize=imsize,
                          init_camera=camera,
                          transpose=True,
                          normalize=True)
    env_hurdle.reset()

    # Generating training, validation and test data
    print("Training background subctractor")
    for i in range(10):
        action = env_background.action_space.sample()
        next_obs, reward, done, info = env_background.step(action)
        image = next_obs['observation']
        image = unnormalize_image(image)
        bg_mask = backSub.apply(image, learningRate=-1)

    print("Generating training data")
    for i in range(1000):
        action = env_hurdle.action_space.sample()
        next_obs, reward, done, info = env_hurdle.step(action)
        image = next_obs['observation']
        image = unnormalize_image(image)
        fg_mask = backSub.apply(image, learningRate=0)
        fg_mask = fg_mask / fg_mask.max()
        cv2.imwrite(training_data_dir + "/rgb_" + str(i) + ".jpg", image)
        cv2.imwrite(training_data_dir + "/mask_" + str(i) + ".jpg", fg_mask)

    print("Generating validation data")
    for i in range(200):
        action = env_hurdle.action_space.sample()
        next_obs, reward, done, info = env_hurdle.step(action)
        image = next_obs['observation']
        image = unnormalize_image(image)
        fg_mask = backSub.apply(image, learningRate=0)
        fg_mask = fg_mask / fg_mask.max()
        cv2.imwrite(validation_data_dir + "/rgb_" + str(i) + ".jpg", image)
        cv2.imwrite(validation_data_dir + "/mask_" + str(i) + ".jpg", fg_mask)

    print("Generating testing data")
    for i in range(200):
        action = env_hurdle.action_space.sample()
        next_obs, reward, done, info = env_hurdle.step(action)
        image = next_obs['observation']
        image = unnormalize_image(image)
        fg_mask = backSub.apply(image, learningRate=0)
        fg_mask = fg_mask / fg_mask.max()
        cv2.imwrite(test_data_dir + "/rgb_" + str(i) + ".jpg", image)
        cv2.imwrite(test_data_dir + "/mask_" + str(i) + ".jpg", fg_mask)

    print("Completed generating data at:", training_data_dir)
Esempio n. 14
0
def generate_vae_dataset(variant):
    import cv2

    env_class = variant.get('env_class', None)
    env_kwargs = variant.get('env_kwargs', None)
    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)

    use_images = variant.get('use_images', True)

    imsize = variant.get('imsize', 84)
    show = variant.get('show', False)
    init_camera = variant.get('init_camera', None)
    oracle_dataset = variant.get('oracle_dataset', False)
    if 'n_random_steps' in variant:
        n_random_steps = variant['n_random_steps']
    else:
        if oracle_dataset:
            n_random_steps = 3
        else:
            n_random_steps = 100
    vae_dataset_specific_env_kwargs = variant.get(
        'vae_dataset_specific_env_kwargs', None)
    non_presampled_goal_img_is_garbage = variant.get(
        'non_presampled_goal_img_is_garbage', None)
    from multiworld.core.image_env import ImageEnv, unormalize_image
    info = {}

    from railrl.core import logger
    logdir = logger.get_snapshot_dir()
    filename = osp.join(logdir, "vae_dataset.npy")

    now = time.time()

    if env_id is not None:
        import gym
        env = gym.make(env_id)
    else:
        if vae_dataset_specific_env_kwargs is None:
            vae_dataset_specific_env_kwargs = {}
        for key, val in env_kwargs.items():
            if key not in vae_dataset_specific_env_kwargs:
                vae_dataset_specific_env_kwargs[key] = val
        env = env_class(**vae_dataset_specific_env_kwargs)
    if not isinstance(env, ImageEnv):
        env = ImageEnv(
            env,
            imsize,
            init_camera=init_camera,
            transpose=True,
            normalize=True,
            non_presampled_goal_img_is_garbage=
            non_presampled_goal_img_is_garbage,
        )
    else:
        imsize = env.imsize
        env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage
    env.reset()
    info['env'] = env

    if use_images:
        data_size = len(env.observation_space.spaces['image_observation'].low)
        dtype = np.uint8
    else:
        data_size = len(env.observation_space.spaces['state_observation'].low)
        dtype = np.float32

    state_size = len(env.observation_space.spaces['state_observation'].low)

    dataset = {
        'obs': np.zeros((N, data_size), dtype=dtype),
        'actions': np.zeros((N, len(env.action_space.low)), dtype=np.float32),
        'next_obs': np.zeros((N, data_size), dtype=dtype),
        'obs_state': np.zeros((N, state_size), dtype=np.float32),
        'next_obs_state': np.zeros((N, state_size), dtype=np.float32),
    }

    for i in range(N):
        if i % (N / 50) == 0:
            print(i)
        if oracle_dataset:
            if i % 100 == 0:
                env.reset()
            goal = env.sample_goal()
            env.set_to_goal(goal)
            for _ in range(n_random_steps):
                env.step(env.action_space.sample())
        else:
            env.reset()
            for _ in range(n_random_steps):
                env.step(env.action_space.sample())

        obs = env._get_obs()
        if use_images:
            dataset['obs'][i, :] = unormalize_image(obs['image_observation'])
        else:
            dataset['obs'][i, :] = obs['state_observation']
        dataset['obs_state'][i, :] = obs['state_observation']

        action = env.action_space.sample()
        dataset['actions'][i, :] = action

        obs = env.step(action)[0]
        img = obs['image_observation']
        if use_images:
            dataset['next_obs'][i, :] = unormalize_image(img)
        else:
            dataset['next_obs'][i, :] = obs['state_observation']
        dataset['next_obs_state'][i, :] = obs['state_observation']
        show = True  #todo del own
        if show:
            img = img.reshape(3, imsize, imsize).transpose((1, 2, 0))
            img = img[::, :, ::-1]
            cv2.imshow('img', img)
            cv2.waitKey(1000)

    print("keys and shapes:")
    for k in dataset.keys():
        print(k, dataset[k].shape)
    print("done making training data", filename, time.time() - now)
    np.save(filename, dataset)
def generate_LSTM_vae_only_dataset(variant,
                                   segmented=False,
                                   segmentation_method='color'):
    from multiworld.core.image_env import ImageEnv, unormalize_image

    env_id = variant.get('env_id', None)
    N = variant.get('N', 500)
    test_p = variant.get('test_p', 0.9)
    imsize = variant.get('imsize', 48)
    num_channels = variant.get('num_channels', 3)
    init_camera = variant.get('init_camera', None)
    occlusion_prob = variant.get('occlusion_prob', 0)
    occlusion_level = variant.get('occlusion_level', 0.5)
    segmentation_kwargs = variant.get('segmentation_kwargs', {})
    if segmentation_kwargs.get('segment') is not None:
        segmented = segmentation_kwargs.get('segment')

    assert env_id is not None, 'you must provide an env id!'

    obj = 'puck-pos'
    if env_id == 'SawyerDoorHookResetFreeEnv-v1':
        obj = 'door-angle'

    pjhome = os.environ['PJHOME']
    if segmented:
        if 'unet' in segmentation_method:
            seg_name = 'seg-unet'
        else:
            seg_name = 'seg-' + segmentation_method
    else:
        seg_name = 'no-seg'

    if env_id == 'SawyerDoorHookResetFreeEnv-v1':
        seg_name += '-2'

    data_file_path = osp.join(
        pjhome, 'data/local/pre-train-lstm',
        'vae-only-{}-{}-{}-{}-{}.npy'.format(env_id, seg_name, N,
                                             occlusion_prob, occlusion_level))
    obj_state_path = osp.join(
        pjhome, 'data/local/pre-train-lstm',
        'vae-only-{}-{}-{}-{}-{}-{}.npy'.format(env_id, seg_name, N,
                                                occlusion_prob,
                                                occlusion_level, obj))

    print(data_file_path)
    if osp.exists(data_file_path):
        all_data = np.load(data_file_path)
        if len(all_data) >= N:
            print("load stored data at: ", data_file_path)
            n = int(len(all_data) * test_p)
            train_dataset = all_data[:n]
            test_dataset = all_data[n:]
            obj_states = np.load(obj_state_path)
            info = {'obj_state': obj_states}
            return train_dataset, test_dataset, info

    if segmented:
        print(
            "generating lstm vae pretrain only dataset with segmented images using method: ",
            segmentation_method)
        if segmentation_method == 'unet':
            segment_func = segment_image_unet
        else:
            raise NotImplementedError
    else:
        print("generating lstm vae pretrain only dataset with original images")

    info = {}
    dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8)
    imgs = []
    obj_states = None

    if env_id == 'SawyerDoorHookResetFreeEnv-v1':
        from rlkit.util.io import load_local_or_remote_file
        pjhome = os.environ['PJHOME']
        pre_sampled_goal_path = osp.join(
            pjhome, 'data/local/pre-train-vae/door_original_dataset.npy')
        goal_dict = np.load(pre_sampled_goal_path, allow_pickle=True).item()
        imgs = goal_dict['image_desired_goal']
        door_angles = goal_dict['state_desired_goal'][:, -1]
        obj_states = door_angles[:, np.newaxis]
    elif env_id == 'SawyerPickupEnvYZEasy-v0':
        from rlkit.util.io import load_local_or_remote_file
        pjhome = os.environ['PJHOME']
        pre_sampled_goal_path = osp.join(
            pjhome, 'data/local/pre-train-vae/pickup-original-dataset.npy')
        goal_dict = load_local_or_remote_file(pre_sampled_goal_path).item()
        imgs = goal_dict['image_desired_goal']
        puck_pos = goal_dict['state_desired_goal'][:, 3:]
        obj_states = puck_pos

    else:
        import gym
        import multiworld
        multiworld.register_all_envs()
        env = gym.make(env_id)

        if not isinstance(env, ImageEnv):
            env = ImageEnv(
                env,
                imsize,
                init_camera=init_camera,
                transpose=True,
                normalize=True,
            )
        env.reset()
        info['env'] = env

        puck_pos = np.zeros((N, 2), dtype=np.float)
        for i in range(N):
            print("lstm vae pretrain only dataset generation, number: ", i)
            if env_id == 'SawyerPushHurdle-v0':
                obs, puck_p = _generate_sawyerhurdle_dataset(
                    env, return_puck_pos=True, segmented=segmented)
            elif env_id == 'SawyerPushHurdleMiddle-v0':
                obs, puck_p = _generate_sawyerhurdlemiddle_dataset(
                    env, return_puck_pos=True)
            elif env_id == 'SawyerPushNIPSEasy-v0':
                obs, puck_p = _generate_sawyerpushnipseasy_dataset(
                    env, return_puck_pos=True)
            elif env_id == 'SawyerPushHurdleResetFreeEnv-v0':
                obs, puck_p = _generate_sawyerhurldeblockresetfree_dataset(
                    env, return_puck_pos=True)
            else:
                raise NotImplementedError
            img = obs[
                'image_observation']  # NOTE: this is already normalized image, of detype np.float64.
            imgs.append(img)
            puck_pos[i] = puck_p

        obj_states = puck_pos

    # now we segment the images
    for i in range(N):
        print("segmenting image ", i)
        img = imgs[i]
        if segmented:
            dataset[i, :] = segment_func(img,
                                         normalize=False,
                                         **segmentation_kwargs)
            p = np.random.rand(
            )  # manually drop some images, so as to make occlusions
            if p < occlusion_prob:
                mask = (np.random.uniform(low=0, high=1, size=(imsize, imsize))
                        > occlusion_level).astype(np.uint8)
                img = dataset[i].reshape(3, imsize, imsize).transpose()
                img[mask < 1] = 0
                dataset[i] = img.transpose().flatten()

        else:
            dataset[i, :] = unormalize_image(img)

    # add the trajectory dimension
    dataset = dataset[:, np.newaxis, :]  # batch_size x traj_len = 1 x imlen
    obj_states = obj_states[:,
                            np.newaxis, :]  # batch_size x traj_len = 1 x imlen
    info['obj_state'] = obj_states

    n = int(N * test_p)
    train_dataset = dataset[:n]
    test_dataset = dataset[n:]

    if N >= 500:
        print('save data to: ', data_file_path)
        all_data = np.concatenate([train_dataset, test_dataset], axis=0)
        np.save(data_file_path, all_data)
        np.save(obj_state_path, obj_states)

    return train_dataset, test_dataset, info
Esempio n. 16
0
                    right_x=6,
                    thickness=0.5
                )
            ]

if __name__ == "__main__":
    # e = Point2DEnv()
    import matplotlib.pyplot as plt
    import time

    # e = Point2DWallEnv("-", render_size=84)
    # e = ImageEnv(Point2DWallEnv(wall_shape="--", render_size=84))
    e = ImageEnv(Point2DWallEnv(wall_shape='maze',
                                render_size=256,
                                ball_radius=0.25,
                                boundary_dist=10,
                                action_limit=0.25,
                                fixed_goal=False,
                                randomize_position_on_reset=False,
                                target_radius=0,
                                initial_position=(-8, -8)),
                 imsize=256)
    for i in range(10):
        e.reset()
        for j in range(50):
            # e.step(np.random.rand(2) - 0.5)
            e.step(np.array([1, 0]))
            e.render()
            im = e.get_image()
            time.sleep(0.1)
Esempio n. 17
0
def getdata(variant):
    skewfit_variant = variant['skewfit_variant']
    print('-------------------------------')
    skewfit_preprocess_variant(skewfit_variant)
    skewfit_variant['render'] = True
    vae_environment = get_envs(skewfit_variant)
    print('done loading vae_env')

    env_class = variant.get('env_class', None)
    env_kwargs = variant.get('env_kwargs', None)
    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    test_p = variant.get('test_p', 0.9)
    use_cached = variant.get('use_cached', True)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    show = variant.get('show', False)
    init_camera = variant.get('init_camera', None)
    dataset_path = variant.get('dataset_path', None)
    oracle_dataset_using_set_to_goal = variant.get(
        'oracle_dataset_using_set_to_goal', False)
    random_rollout_data = variant.get('random_rollout_data', False)
    random_and_oracle_policy_data = variant.get(
        'random_and_oracle_policy_data', False)
    random_and_oracle_policy_data_split = variant.get(
        'random_and_oracle_policy_data_split', 0)
    policy_file = variant.get('policy_file', None)
    n_random_steps = variant.get('n_random_steps', 100)
    vae_dataset_specific_env_kwargs = variant.get(
        'vae_dataset_specific_env_kwargs', None)
    save_file_prefix = variant.get('save_file_prefix', None)
    non_presampled_goal_img_is_garbage = variant.get(
        'non_presampled_goal_img_is_garbage', None)
    tag = variant.get('tag', '')
    from multiworld.core.image_env import ImageEnv, unormalize_image
    import rlkit.torch.pytorch_util as ptu
    info = {}
    if dataset_path is not None:
        dataset = load_local_or_remote_file(dataset_path)
        N = dataset.shape[0]
    else:
        if env_kwargs is None:
            env_kwargs = {}
        if save_file_prefix is None:
            save_file_prefix = env_id
        if save_file_prefix is None:
            save_file_prefix = env_class.__name__
        filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format(
            save_file_prefix,
            str(N),
            init_camera.__name__ if init_camera else '',
            imsize,
            random_and_oracle_policy_data_split,
            tag,
        )
        if True:
            now = time.time()

            if env_id is not None:
                import gym
                import multiworld
                multiworld.register_all_envs()
                env = gym.make(env_id)
            else:
                if vae_dataset_specific_env_kwargs is None:
                    vae_dataset_specific_env_kwargs = {}
                for key, val in env_kwargs.items():
                    if key not in vae_dataset_specific_env_kwargs:
                        vae_dataset_specific_env_kwargs[key] = val
                env = env_class(**vae_dataset_specific_env_kwargs)
            if not isinstance(env, ImageEnv):
                print("using(ImageEnv)")
                env = ImageEnv(
                    env,
                    imsize,
                    init_camera=init_camera,
                    transpose=True,
                    normalize=True,
                    non_presampled_goal_img_is_garbage=
                    non_presampled_goal_img_is_garbage,
                )
            else:
                imsize = env.imsize
                env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage
            env.reset()
            info['env'] = env
            if random_and_oracle_policy_data:
                policy_file = load_local_or_remote_file(policy_file)
                policy = policy_file['policy']
                policy.to(ptu.device)
            if random_rollout_data:
                from rlkit.exploration_strategies.ou_strategy import OUStrategy
                policy = OUStrategy(env.action_space)
            dataset = np.zeros((N, imsize * imsize * num_channels),
                               dtype=np.uint8)

            for i in range(10):
                NP = []
                if True:
                    print(i)
                    #print('th step')
                    goal = env.sample_goal()
                    # print("goal___________________________")
                    # print(goal)
                    # print("goal___________________________")
                    env.set_to_goal(goal)
                    obs = env._get_obs()
                    #img = img.reshape(3, imsize, imsize).transpose()
                    # img = img[::-1, :, ::-1]
                    # cv2.imshow('img', img)
                    # cv2.waitKey(1)
                    img_1 = obs['image_observation']
                    img_1 = img_1.reshape(3, imsize, imsize).transpose()
                    NP.append(img_1)
                    if i % 3 == 0:
                        cv2.imshow('img1', img_1)
                        cv2.waitKey(1)
                    #img_1_reconstruct = vae_environment._reconstruct_img(obs['image_observation']).transpose()
                    encoded_1 = vae_environment._get_encoded(
                        obs['image_observation'])
                    print(encoded_1)
                    NP.append(encoded_1)
                    img_1_reconstruct = vae_environment._get_img(
                        encoded_1).transpose()
                    NP.append(img_1_reconstruct)
                    #dataset[i, :] = unormalize_image(img)
                    # img_1 = img_1.reshape(3, imsize, imsize).transpose()
                    if i % 3 == 0:
                        cv2.imshow('img1_reconstruction', img_1_reconstruct)
                        cv2.waitKey(1)
                    env.reset()
                    instr = env.generate_new_state(goal)
                    if i % 3 == 0:
                        print(instr)
                    obs = env._get_obs()
                    # obs = env._get_obs()
                    img_2 = obs['image_observation']
                    img_2 = img_2.reshape(3, imsize, imsize).transpose()
                    NP.append(img_2)
                    if i % 3 == 0:
                        cv2.imshow('img2', img_2)
                        cv2.waitKey(1)
                    #img_2_reconstruct = vae_environment._reconstruct_img(obs['image_observation']).transpose()
                    encoded_2 = vae_environment._get_encoded(
                        obs['image_observation'])
                    NP.append(encoded_2)
                    img_2_reconstruct = vae_environment._get_img(
                        encoded_2).transpose()
                    NP.append(img_2_reconstruct)
                    NP.append(instr)
                    # img_2 = img_2.reshape(3, imsize, imsize).transpose()
                    if i % 3 == 0:
                        cv2.imshow('img2_reconstruct', img_2_reconstruct)
                        cv2.waitKey(1)
                    NP = np.array(NP)
                    idx = str(i)
                    name = "/home/xiaomin/Downloads/IFIG_DATA_1/" + idx + ".npy"
                    np.save(open(name, 'wb'), NP)
                    # radius = input('waiting...')

                # #get the in between functions
            import dill
            import pickle
            get_encoded = dill.dumps(vae_environment._get_encoded)
            with open(
                    "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_encoded_1000_epochs_one_puck.txt",
                    "wb") as fp:
                pickle.dump(get_encoded, fp)
            with open(
                    "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_encoded_1000_epochs_one_puck.txt",
                    "rb") as fp:
                b = pickle.load(fp)
            func_get_encoded = dill.loads(b)
            encoded = func_get_encoded(obs['image_observation'])
            print(encoded)
            print('------------------------------')
            get_img = dill.dumps(vae_environment._get_img)
            with open(
                    "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_img_1000_epochs_one_puck.txt",
                    "wb") as fp:
                pickle.dump(get_img, fp)
            with open(
                    "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_img_1000_epochs_one_puck.txt",
                    "rb") as fp:
                c = pickle.load(fp)
            func_get_img = dill.loads(c)

            img_1_reconstruct = func_get_img(encoded).transpose()
            print(img_1_reconstruct)
            #dataset[i, :] = unormalize_image(img)
            # img_1 = img_1.reshape(3, imsize, imsize).transpose()
            cv2.imshow('test', img_1_reconstruct)
            cv2.waitKey(0)

            print("done making training data", filename, time.time() - now)
            np.save(filename, dataset)
Esempio n. 18
0
def generate_vae_dataset(variant):
    print(variant)
    from tqdm import tqdm
    env_class = variant.get('env_class', None)
    env_kwargs = variant.get('env_kwargs', None)
    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    batch_size = variant.get('batch_size', 128)
    test_p = variant.get('test_p', 0.9)
    use_cached = variant.get('use_cached', True)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    show = variant.get('show', False)
    init_camera = variant.get('init_camera', None)
    dataset_path = variant.get('dataset_path', None)
    augment_data = variant.get('augment_data', False)
    data_filter_fn = variant.get('data_filter_fn', lambda x: x)
    delete_after_loading = variant.get('delete_after_loading', False)
    oracle_dataset_using_set_to_goal = variant.get(
        'oracle_dataset_using_set_to_goal', False)
    random_rollout_data = variant.get('random_rollout_data', False)
    random_rollout_data_set_to_goal = variant.get(
        'random_rollout_data_set_to_goal', True)
    random_and_oracle_policy_data = variant.get(
        'random_and_oracle_policy_data', False)
    random_and_oracle_policy_data_split = variant.get(
        'random_and_oracle_policy_data_split', 0)
    policy_file = variant.get('policy_file', None)
    n_random_steps = variant.get('n_random_steps', 100)
    vae_dataset_specific_env_kwargs = variant.get(
        'vae_dataset_specific_env_kwargs', None)
    save_file_prefix = variant.get('save_file_prefix', None)
    non_presampled_goal_img_is_garbage = variant.get(
        'non_presampled_goal_img_is_garbage', None)

    conditional_vae_dataset = variant.get('conditional_vae_dataset', False)
    use_env_labels = variant.get('use_env_labels', False)
    use_linear_dynamics = variant.get('use_linear_dynamics', False)
    enviorment_dataset = variant.get('enviorment_dataset', False)
    save_trajectories = variant.get('save_trajectories', False)
    save_trajectories = save_trajectories or use_linear_dynamics or conditional_vae_dataset
    tag = variant.get('tag', '')

    assert N % n_random_steps == 0, "Fix N/horizon or dataset generation will fail"

    from multiworld.core.image_env import ImageEnv, unormalize_image
    import rlkit.torch.pytorch_util as ptu
    from rlkit.util.io import load_local_or_remote_file
    from rlkit.data_management.dataset import (
        TrajectoryDataset, ImageObservationDataset, InitialObservationDataset,
        EnvironmentDataset, ConditionalDynamicsDataset,
        InitialObservationNumpyDataset, InfiniteBatchLoader,
        InitialObservationNumpyJitteringDataset)
    info = {}
    use_test_dataset = False
    if dataset_path is not None:
        if type(dataset_path) == str:
            dataset = load_local_or_remote_file(
                dataset_path, delete_after_loading=delete_after_loading)
            dataset = dataset.item()
            N = dataset['observations'].shape[0] * dataset[
                'observations'].shape[1]
            n_random_steps = dataset['observations'].shape[1]
        if isinstance(dataset_path, list):
            dataset = concatenate_datasets(dataset_path)
            N = dataset['observations'].shape[0] * dataset[
                'observations'].shape[1]
            n_random_steps = dataset['observations'].shape[1]
        if isinstance(dataset_path, dict):

            if type(dataset_path['train']) == str:
                dataset = load_local_or_remote_file(
                    dataset_path['train'],
                    delete_after_loading=delete_after_loading)
                dataset = dataset.item()
            elif isinstance(dataset_path['train'], list):
                dataset = concatenate_datasets(dataset_path['train'])

            if type(dataset_path['test']) == str:
                test_dataset = load_local_or_remote_file(
                    dataset_path['test'],
                    delete_after_loading=delete_after_loading)
                test_dataset = test_dataset.item()
            elif isinstance(dataset_path['test'], list):
                test_dataset = concatenate_datasets(dataset_path['test'])

            N = dataset['observations'].shape[0] * dataset[
                'observations'].shape[1]
            n_random_steps = dataset['observations'].shape[1]
            use_test_dataset = True
    else:
        if env_kwargs is None:
            env_kwargs = {}
        if save_file_prefix is None:
            save_file_prefix = env_id
        if save_file_prefix is None:
            save_file_prefix = env_class.__name__
        filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format(
            save_file_prefix,
            str(N),
            init_camera.__name__
            if init_camera and hasattr(init_camera, '__name__') else '',
            imsize,
            random_and_oracle_policy_data_split,
            tag,
        )
        if use_cached and osp.isfile(filename):
            dataset = load_local_or_remote_file(
                filename, delete_after_loading=delete_after_loading)
            if conditional_vae_dataset:
                dataset = dataset.item()
            print("loaded data from saved file", filename)
        else:
            now = time.time()
            if env_id is not None:
                import gym
                import multiworld
                multiworld.register_all_envs()
                env = gym.make(env_id)
            else:
                if vae_dataset_specific_env_kwargs is None:
                    vae_dataset_specific_env_kwargs = {}
                for key, val in env_kwargs.items():
                    if key not in vae_dataset_specific_env_kwargs:
                        vae_dataset_specific_env_kwargs[key] = val
                env = env_class(**vae_dataset_specific_env_kwargs)
            if not isinstance(env, ImageEnv):
                env = ImageEnv(
                    env,
                    imsize,
                    init_camera=init_camera,
                    transpose=True,
                    normalize=True,
                    non_presampled_goal_img_is_garbage=
                    non_presampled_goal_img_is_garbage,
                )
            else:
                imsize = env.imsize
                env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage
            env.reset()
            info['env'] = env
            if random_and_oracle_policy_data:
                policy_file = load_local_or_remote_file(policy_file)
                policy = policy_file['policy']
                policy.to(ptu.device)
            if random_rollout_data:
                from rlkit.exploration_strategies.ou_strategy import OUStrategy
                policy = OUStrategy(env.action_space)

            if save_trajectories:
                dataset = {
                    'observations':
                    np.zeros((N // n_random_steps, n_random_steps,
                              imsize * imsize * num_channels),
                             dtype=np.uint8),
                    'actions':
                    np.zeros((N // n_random_steps, n_random_steps,
                              env.action_space.shape[0]),
                             dtype=np.float),
                    'env':
                    np.zeros(
                        (N // n_random_steps, imsize * imsize * num_channels),
                        dtype=np.uint8),
                }
            else:
                dataset = np.zeros((N, imsize * imsize * num_channels),
                                   dtype=np.uint8)
            labels = []
            for i in tqdm(range(N)):
                if random_and_oracle_policy_data:
                    num_random_steps = int(N *
                                           random_and_oracle_policy_data_split)
                    if i < num_random_steps:
                        env.reset()
                        for _ in range(n_random_steps):
                            obs = env.step(env.action_space.sample())[0]
                    else:
                        obs = env.reset()
                        policy.reset()
                        for _ in range(n_random_steps):
                            policy_obs = np.hstack((
                                obs['state_observation'],
                                obs['state_desired_goal'],
                            ))
                            action, _ = policy.get_action(policy_obs)
                            obs, _, _, _ = env.step(action)
                elif random_rollout_data:  #ADD DATA WHERE JUST PUCK MOVES
                    if i % n_random_steps == 0:
                        env.reset()
                        policy.reset()
                        env_img = env._get_obs()['image_observation']
                        if random_rollout_data_set_to_goal:
                            env.set_to_goal(env.get_goal())
                    obs = env._get_obs()
                    u = policy.get_action_from_raw_action(
                        env.action_space.sample())
                    env.step(u)
                elif oracle_dataset_using_set_to_goal:
                    print(i)

                    goal = env.sample_goal()
                    env.set_to_goal(goal)
                    obs = env._get_obs()
                else:
                    env.reset()
                    for _ in range(n_random_steps):
                        obs = env.step(env.action_space.sample())[0]

                img = obs['image_observation']
                if use_env_labels:
                    labels.append(obs['label'])
                if save_trajectories:
                    dataset['observations'][
                        i // n_random_steps,
                        i % n_random_steps, :] = unormalize_image(img)
                    dataset['actions'][i // n_random_steps,
                                       i % n_random_steps, :] = u
                    dataset['env'][i // n_random_steps, :] = unormalize_image(
                        env_img)
                else:
                    dataset[i, :] = unormalize_image(img)

                if show:
                    img = img.reshape(3, imsize, imsize).transpose()
                    img = img[::-1, :, ::-1]
                    cv2.imshow('img', img)
                    cv2.waitKey(1)
                    # radius = input('waiting...')
            print("done making training data", filename, time.time() - now)
            np.save(filename, dataset)
            #np.save(filename[:-4] + 'labels.npy', np.array(labels))

    info['train_labels'] = []
    info['test_labels'] = []

    dataset = data_filter_fn(dataset)
    if use_linear_dynamics and conditional_vae_dataset:
        num_trajectories = N // n_random_steps
        n = int(num_trajectories * test_p)
        train_dataset = ConditionalDynamicsDataset({
            'observations':
            dataset['observations'][:n, :, :],
            'actions':
            dataset['actions'][:n, :, :],
            'env':
            dataset['env'][:n, :]
        })
        test_dataset = ConditionalDynamicsDataset({
            'observations':
            dataset['observations'][n:, :, :],
            'actions':
            dataset['actions'][n:, :, :],
            'env':
            dataset['env'][n:, :]
        })

        num_trajectories = N // n_random_steps
        n = int(num_trajectories * test_p)
        indices = np.arange(num_trajectories)
        np.random.shuffle(indices)
        train_i, test_i = indices[:n], indices[n:]

        try:
            train_dataset = ConditionalDynamicsDataset({
                'observations':
                dataset['observations'][train_i, :, :],
                'actions':
                dataset['actions'][train_i, :, :],
                'env':
                dataset['env'][train_i, :]
            })
            test_dataset = ConditionalDynamicsDataset({
                'observations':
                dataset['observations'][test_i, :, :],
                'actions':
                dataset['actions'][test_i, :, :],
                'env':
                dataset['env'][test_i, :]
            })
        except:
            train_dataset = ConditionalDynamicsDataset({
                'observations':
                dataset['observations'][train_i, :, :],
                'actions':
                dataset['actions'][train_i, :, :],
            })
            test_dataset = ConditionalDynamicsDataset({
                'observations':
                dataset['observations'][test_i, :, :],
                'actions':
                dataset['actions'][test_i, :, :],
            })
    elif use_linear_dynamics:
        num_trajectories = N // n_random_steps
        n = int(num_trajectories * test_p)
        train_dataset = TrajectoryDataset({
            'observations':
            dataset['observations'][:n, :, :],
            'actions':
            dataset['actions'][:n, :, :]
        })
        test_dataset = TrajectoryDataset({
            'observations':
            dataset['observations'][n:, :, :],
            'actions':
            dataset['actions'][n:, :, :]
        })
    elif enviorment_dataset:
        n = int(n_random_steps * test_p)
        train_dataset = EnvironmentDataset({
            'observations':
            dataset['observations'][:, :n, :],
        })
        test_dataset = EnvironmentDataset({
            'observations':
            dataset['observations'][:, n:, :],
        })
    elif conditional_vae_dataset:
        num_trajectories = N // n_random_steps
        n = int(num_trajectories * test_p)
        indices = np.arange(num_trajectories)
        np.random.shuffle(indices)
        train_i, test_i = indices[:n], indices[n:]

        if augment_data:
            dataset_class = InitialObservationNumpyJitteringDataset
        else:
            dataset_class = InitialObservationNumpyDataset

        if 'env' not in dataset:
            dataset['env'] = dataset['observations'][:, 0]
        if use_test_dataset and ('env' not in test_dataset):
            test_dataset['env'] = test_dataset['observations'][:, 0]

        if use_test_dataset:
            train_dataset = dataset_class({
                'observations':
                dataset['observations'],
                'env':
                dataset['env']
            })

            test_dataset = dataset_class({
                'observations':
                test_dataset['observations'],
                'env':
                test_dataset['env']
            })
        else:
            train_dataset = dataset_class({
                'observations':
                dataset['observations'][train_i, :, :],
                'env':
                dataset['env'][train_i, :]
            })

            test_dataset = dataset_class({
                'observations':
                dataset['observations'][test_i, :, :],
                'env':
                dataset['env'][test_i, :]
            })

        train_batch_loader_kwargs = variant.get(
            'train_batch_loader_kwargs',
            dict(
                batch_size=batch_size,
                num_workers=0,
            ))
        test_batch_loader_kwargs = variant.get(
            'test_batch_loader_kwargs',
            dict(
                batch_size=batch_size,
                num_workers=0,
            ))

        train_data_loader = data.DataLoader(train_dataset,
                                            shuffle=True,
                                            drop_last=True,
                                            **train_batch_loader_kwargs)
        test_data_loader = data.DataLoader(test_dataset,
                                           shuffle=True,
                                           drop_last=True,
                                           **test_batch_loader_kwargs)

        train_dataset = InfiniteBatchLoader(train_data_loader)
        test_dataset = InfiniteBatchLoader(test_data_loader)
    else:
        n = int(N * test_p)
        train_dataset = ImageObservationDataset(dataset[:n, :])
        test_dataset = ImageObservationDataset(dataset[n:, :])
    return train_dataset, test_dataset, info
Esempio n. 19
0
def generate_sawyerhurdle_dataset(variant,
                                  segmented=False,
                                  segmentation_method='unet'):
    from multiworld.core.image_env import ImageEnv, unormalize_image

    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    test_p = variant.get('test_p', 0.9)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    init_camera = variant.get('init_camera', None)
    segmentation_kwargs = variant.get('segmentation_kwargs', {})

    pjhome = os.environ['PJHOME']
    seg_name = 'seg-' + segmentation_method if segmented else 'no-seg'
    data_file_path = osp.join(pjhome, 'data/local/pre-train-vae',
                              '{}-{}-{}.npy'.format(env_id, seg_name, N))
    puck_pos_path = osp.join(
        pjhome, 'data/local/pre-train-vae',
        '{}-{}-{}-puck-pos.npy'.format(env_id, seg_name, N))

    if osp.exists(data_file_path):
        all_data = np.load(data_file_path)
        if len(all_data) >= N:
            print("load stored data at: ", data_file_path)
            n = int(len(all_data) * test_p)
            train_dataset = all_data[:n]
            test_dataset = all_data[n:]
            puck_pos = np.load(puck_pos_path)
            info = {'puck_pos': puck_pos}
            return train_dataset, test_dataset, info

    if segmented:
        print("generating vae dataset with segmented images using method: ",
              segmentation_method)
        if segmentation_method == 'unet':
            segment_func = segment_image_unet
        else:
            raise NotImplementedError
    else:
        print("generating vae dataset with original images")

    assert env_id is not None
    import gym
    import multiworld
    multiworld.register_all_envs()
    env = gym.make(env_id)

    if not isinstance(env, ImageEnv):
        env = ImageEnv(
            env,
            imsize,
            init_camera=init_camera,
            transpose=True,
            normalize=True,
        )

    info = {}
    env.reset()
    info['env'] = env

    dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8)
    puck_pos = np.zeros((N, 2), dtype=np.float)

    for i in range(N):
        print("sawyer hurdle custom vae data set generation, number: ", i)
        if env_id == 'SawyerPushHurdle-v0':
            obs, puck_p = _generate_sawyerhurdle_dataset(env,
                                                         return_puck_pos=True)
        elif env_id == 'SawyerPushHurdleMiddle-v0':
            obs, puck_p = _generate_sawyerhurdlemiddle_dataset(
                env, return_puck_pos=True)
        else:
            raise NotImplementedError
        img = obs[
            'image_observation']  # NOTE yufei: this is already normalized image, of detype np.float64.

        if segmented:
            dataset[i, :] = segment_func(img,
                                         normalize=False,
                                         **segmentation_kwargs)
        else:
            dataset[i, :] = unormalize_image(img)
        puck_pos[i] = puck_p

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]

    info['puck_pos'] = puck_pos

    if N >= 2000:
        print('save data to: ', data_file_path)
        all_data = np.concatenate([train_dataset, test_dataset], axis=0)
        np.save(data_file_path, all_data)
        np.save(puck_pos_path, puck_pos)

    return train_dataset, test_dataset, info