Example #1
0
def main():
    register_all_envs()
    # env = PickAndPlaceEnv(
    #     # Environment dynamics
    #     action_scale=1.0,
    #     boundary_dist=4,
    #     ball_radius=1.5,
    #     object_radius=1.,
    #     ball_visual_radius=1.5,
    #     object_visual_radius=1.,
    #     min_grab_distance=1.,
    #     walls=None,
    #     # Rewards
    #     action_l2norm_penalty=0,
    #     reward_type="dense",
    #     success_threshold=0.60,
    #     # Reset settings
    #     fixed_goal=None,
    #     # Visualization settings
    #     images_are_rgb=True,
    #     render_dt_msec=0,
    #     render_onscreen=False,
    #     render_size=84,
    #     show_goal=False,
    #     goal_samplers=None,
    #     goal_sampling_mode='random',
    #     num_presampled_goals=10000,
    #     object_reward_only=False,
    #
    #     init_position_strategy='random',
    #     num_objects=1,
    # )
    env = gym.make('OneObject-PickAndPlace-BigBall-RandomInit-2D-v1')

    renderer = EnvRenderer(
        output_image_format='CHW',
        width=28,
        height=28,
    )
    import cv2
    from PIL import Image
    n = 12800
    imgs = []
    for _ in range(n):
        env.reset()
        img = renderer(env)
        # cv2.imshow('img', img.transpose())
        # cv2.waitKey(100)
        imgs.append(img)
    imgs = np.array(imgs)
    np.save(
        '/home/vitchyr/mnt/log/manual-upload/sets/OneObject-PickAndPlace-BigBall-RandomInit-2D-v1-ungrouped-train-28x28.npy',
        imgs,
    )
Example #2
0
    def __init__(self):
        multiworld.register_all_envs()
        self.internal_world = gym.make('HalfCheetahGoal-v0')
        self.max_speed = 6
        self.action_space = self.internal_world.action_space
        self.goal_space = self.internal_world.goal_space
        self.observation_space = self.internal_world.obs_space

        self.distance_threshold = 0.1

        self.seed()
        self.reset()
Example #3
0
def get_gym_env(env_id, env_class=None, env_kwargs=None):
    if env_kwargs is None:
        env_kwargs = {}

    assert env_id or env_class
    if env_id:
        import gym
        import multiworld
        multiworld.register_all_envs()
        env = gym.make(env_id)
    else:
        env = env_class(**env_kwargs)
    return env
Example #4
0
 def __init__(self):
     multiworld.register_all_envs()
     self.internal_world = gym.make('AntXY-v0')
     self.internal_world.include_contact_forces_in_state = False
     
     self.distance_threshold = 0.1
     self.goal_limit = 2
     self.goal_low = self.goal_limit * np.ones(2)
     self.goal_high = self.goal_limit * np.ones(2)
     self.goal_ = None
     self.goal_space = Box(self.goal_low, self.goal_high)
     self.action_space = self.internal_world.action_space
     self.observation_space_low = np.array([-np.inf for _ in range(29)])
     self.observation_space_high = np.array([np.inf for _ in range(29)])
     self.observation_space = Box(self.observation_space_low, self.observation_space_high)
     
     self.seed()
     self.reset()
Example #5
0
def get_gym_env(
        env_id,
        env_class=None,
        env_kwargs=None,
        unwrap_timed_envs=False,
):
    if env_kwargs is None:
        env_kwargs = {}

    assert env_id or env_class
    if env_id:
        import gym
        import multiworld
        multiworld.register_all_envs()
        env = gym.make(env_id)
    else:
        env = env_class(**env_kwargs)
    if isinstance(env, TimeLimit) and unwrap_timed_envs:
        env = env.env
    return env
def train_bg(variant):
    import numpy as np

    invisiable_env_id = {
        'SawyerPushNIPSEasy-v0': "SawyerPushNIPSPuckInvisible-v0",
        'SawyerPushHurdle-v0': 'SawyerPushHurdlePuckInvisible-v0',
        'SawyerPushHurdleMiddle-v0': 'SawyerPushHurdleMiddlePuckInvisible-v0',
        'SawyerDoorHookResetFreeEnv-v1':
        'SawyerDoorHookResetFreeEnvDoorInvisible-v0',
        'SawyerPickupEnvYZEasy-v0': 'SawyerPickupResetFreeEnvBallInvisible-v0',
    }

    print("training opencv background model!")
    env_id = variant.get('env_id', None)
    env_id_invis = invisiable_env_id[env_id]
    import gym
    import multiworld
    from multiworld.core.image_env import ImageEnv
    multiworld.register_all_envs()
    obj_invisible_env = gym.make(env_id_invis)
    init_camera = variant.get('init_camera', None)

    presampled_goals = None
    if variant.get("presampled_goals_path") is not None:
        presampled_goals = np.load(variant['presampled_goals_path'],
                                   allow_pickle=True).item()
        print("presampled goal path is: ", variant['presampled_goals_path'])
        # print("presampled goals are: ", presampled_goals)

    obj_invisible_env = ImageEnv(
        obj_invisible_env,
        variant.get('imsize'),
        init_camera=init_camera,
        transpose=True,
        normalize=True,
        presampled_goals=presampled_goals,
    )
    train_bgsb(obj_invisible_env)
def generate_vae_dataset(variant):
    env_class = variant.get('env_class', None)
    env_kwargs = variant.get('env_kwargs', None)
    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    test_p = variant.get('test_p', 0.9)
    use_cached = variant.get('use_cached', True)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    show = variant.get('show', False)
    init_camera = variant.get('init_camera', None)
    dataset_path = variant.get('dataset_path', None)
    oracle_dataset_using_set_to_goal = variant.get(
        'oracle_dataset_using_set_to_goal', False)
    random_rollout_data = variant.get('random_rollout_data', False)
    random_and_oracle_policy_data = variant.get(
        'random_and_oracle_policy_data', False)
    random_and_oracle_policy_data_split = variant.get(
        'random_and_oracle_policy_data_split', 0)
    policy_file = variant.get('policy_file', None)
    n_random_steps = variant.get('n_random_steps', 100)
    vae_dataset_specific_env_kwargs = variant.get(
        'vae_dataset_specific_env_kwargs', None)
    save_file_prefix = variant.get('save_file_prefix', None)
    non_presampled_goal_img_is_garbage = variant.get(
        'non_presampled_goal_img_is_garbage', None)
    tag = variant.get('tag', '')
    from multiworld.core.image_env import ImageEnv, unormalize_image
    import rlkit.torch.pytorch_util as ptu
    info = {}
    if dataset_path is not None:
        dataset = load_local_or_remote_file(dataset_path)
        N = dataset.shape[0]
    else:
        if env_kwargs is None:
            env_kwargs = {}
        if save_file_prefix is None:
            save_file_prefix = env_id
        if save_file_prefix is None:
            save_file_prefix = env_class.__name__
        filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format(
            save_file_prefix,
            str(N),
            init_camera.__name__ if init_camera else '',
            imsize,
            random_and_oracle_policy_data_split,
            tag,
        )
        if use_cached and osp.isfile(filename):
            dataset = np.load(filename)
            print("loaded data from saved file", filename)
        else:
            now = time.time()

            if env_id is not None:
                import gym
                import multiworld
                multiworld.register_all_envs()
                env = gym.make(env_id)
            else:
                if vae_dataset_specific_env_kwargs is None:
                    vae_dataset_specific_env_kwargs = {}
                for key, val in env_kwargs.items():
                    if key not in vae_dataset_specific_env_kwargs:
                        vae_dataset_specific_env_kwargs[key] = val
                env = env_class(**vae_dataset_specific_env_kwargs)
            if not isinstance(env, ImageEnv):
                env = ImageEnv(
                    env,
                    imsize,
                    init_camera=init_camera,
                    transpose=True,
                    normalize=True,
                    non_presampled_goal_img_is_garbage=
                    non_presampled_goal_img_is_garbage,
                )
            else:
                imsize = env.imsize
                env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage
            env.reset()
            info['env'] = env
            if random_and_oracle_policy_data:
                policy_file = load_local_or_remote_file(policy_file)
                policy = policy_file['policy']
                policy.to(ptu.device)
            if random_rollout_data:
                from rlkit.exploration_strategies.ou_strategy import OUStrategy
                policy = OUStrategy(env.action_space)
            dataset = np.zeros((N, imsize * imsize * num_channels),
                               dtype=np.uint8)
            for i in range(10000):
                NP = []
                if oracle_dataset_using_set_to_goal:
                    print(i)
                    #print('th step')
                    goal = env.sample_goal()
                    env.set_to_goal(goal)
                    obs = env._get_obs()
                    #img = img.reshape(3, imsize, imsize).transpose()
                    # img = img[::-1, :, ::-1]
                    # cv2.imshow('img', img)
                    # cv2.waitKey(1)
                    img_1 = obs['image_observation']
                    NP.append(img_1)
                    #dataset[i, :] = unormalize_image(img)
                    img_1 = img_1.reshape(3, imsize, imsize).transpose()
                    if i % 3 == 0:
                        cv2.imshow('img1', img_1)
                        cv2.waitKey(1)
                    env.reset()
                    instr = env.generate_new_state(goal)
                    if i % 3 == 0:
                        print(instr)
                    obs = env._get_obs()
                    # obs = env._get_obs()
                    img_2 = obs['image_observation']
                    NP.append(img_2)
                    NP.append(instr)
                    img_2 = img_2.reshape(3, imsize, imsize).transpose()
                    if i % 3 == 0:
                        cv2.imshow('img2', img_2)
                        cv2.waitKey(1)
                    NP = np.array(NP)
                    print(NP)
                    idx = str(i)
                    name = "/home/xiaomin/Downloads/IFIG_DATA_1/" + idx + ".npy"
                    np.save(open(name, 'wb'), NP)
                    # radius = input('waiting...')
            print("done making training data", filename, time.time() - now)
            np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Example #8
0
def HER_baseline_td3_experiment(variant):
    import rlkit.torch.pytorch_util as ptu
    from rlkit.data_management.obs_dict_replay_buffer import \
        ObsDictRelabelingBuffer
    from rlkit.exploration_strategies.base import (
        PolicyWrappedWithExplorationStrategy)
    from rlkit.torch.her.her_td3 import HerTd3
    from rlkit.torch.networks import MergedCNN, CNNPolicy
    import torch
    from multiworld.core.image_env import ImageEnv
    from rlkit.misc.asset_loader import load_local_or_remote_file

    init_camera = variant.get("init_camera", None)
    presample_goals = variant.get('presample_goals', False)
    presampled_goals_path = get_presampled_goals_path(
        variant.get('presampled_goals_path', None))

    if 'env_id' in variant:
        import gym
        import multiworld
        multiworld.register_all_envs()
        env = gym.make(variant['env_id'])
    else:
        env = variant["env_class"](**variant['env_kwargs'])
    image_env = ImageEnv(
        env,
        variant.get('imsize'),
        reward_type='image_sparse',
        init_camera=init_camera,
        transpose=True,
        normalize=True,
    )
    if presample_goals:
        if presampled_goals_path is None:
            image_env.non_presampled_goal_img_is_garbage = True
            presampled_goals = variant['generate_goal_dataset_fctn'](
                env=image_env, **variant['goal_generation_kwargs'])
        else:
            presampled_goals = load_local_or_remote_file(
                presampled_goals_path).item()
        del image_env
        env = ImageEnv(
            env,
            variant.get('imsize'),
            reward_type='image_distance',
            init_camera=init_camera,
            transpose=True,
            normalize=True,
            presampled_goals=presampled_goals,
        )
    else:
        env = image_env

    es = get_exploration_strategy(variant, env)

    observation_key = variant.get('observation_key', 'image_observation')
    desired_goal_key = variant.get('desired_goal_key', 'image_desired_goal')
    achieved_goal_key = desired_goal_key.replace("desired", "achieved")
    imsize = variant['imsize']
    action_dim = env.action_space.low.size
    qf1 = MergedCNN(input_width=imsize,
                    input_height=imsize,
                    output_size=1,
                    input_channels=3 * 2,
                    added_fc_input_size=action_dim,
                    **variant['cnn_params'])
    qf2 = MergedCNN(input_width=imsize,
                    input_height=imsize,
                    output_size=1,
                    input_channels=3 * 2,
                    added_fc_input_size=action_dim,
                    **variant['cnn_params'])

    policy = CNNPolicy(
        input_width=imsize,
        input_height=imsize,
        added_fc_input_size=0,
        output_size=action_dim,
        input_channels=3 * 2,
        output_activation=torch.tanh,
        **variant['cnn_params'],
    )
    target_qf1 = MergedCNN(input_width=imsize,
                           input_height=imsize,
                           output_size=1,
                           input_channels=3 * 2,
                           added_fc_input_size=action_dim,
                           **variant['cnn_params'])
    target_qf2 = MergedCNN(input_width=imsize,
                           input_height=imsize,
                           output_size=1,
                           input_channels=3 * 2,
                           added_fc_input_size=action_dim,
                           **variant['cnn_params'])

    target_policy = CNNPolicy(
        input_width=imsize,
        input_height=imsize,
        added_fc_input_size=0,
        output_size=action_dim,
        input_channels=3 * 2,
        output_activation=torch.tanh,
        **variant['cnn_params'],
    )
    exploration_policy = PolicyWrappedWithExplorationStrategy(
        exploration_strategy=es,
        policy=policy,
    )

    replay_buffer = ObsDictRelabelingBuffer(
        env=env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        **variant['replay_buffer_kwargs'])
    algo_kwargs = variant['algo_kwargs']
    algo_kwargs['replay_buffer'] = replay_buffer
    base_kwargs = algo_kwargs['base_kwargs']
    base_kwargs['training_env'] = env
    base_kwargs['render'] = variant["render"]
    base_kwargs['render_during_eval'] = variant["render"]
    her_kwargs = algo_kwargs['her_kwargs']
    her_kwargs['observation_key'] = observation_key
    her_kwargs['desired_goal_key'] = desired_goal_key
    algorithm = HerTd3(env,
                       qf1=qf1,
                       qf2=qf2,
                       policy=policy,
                       target_qf1=target_qf1,
                       target_qf2=target_qf2,
                       target_policy=target_policy,
                       exploration_policy=exploration_policy,
                       **variant['algo_kwargs'])

    algorithm.to(ptu.device)
    algorithm.train()
def generate_vae_dataset(variant):
    env_class = variant.get('env_class', None)
    env_kwargs = variant.get('env_kwargs', None)
    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    test_p = variant.get('test_p', 0.9)
    use_cached = variant.get('use_cached', True)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    show = variant.get('show', False)
    init_camera = variant.get('init_camera', None)
    dataset_path = variant.get('dataset_path', None)
    oracle_dataset_using_set_to_goal = variant.get(
        'oracle_dataset_using_set_to_goal', False)
    random_rollout_data = variant.get('random_rollout_data', False)
    random_and_oracle_policy_data = variant.get(
        'random_and_oracle_policy_data', False)
    random_and_oracle_policy_data_split = variant.get(
        'random_and_oracle_policy_data_split', 0)
    policy_file = variant.get('policy_file', None)
    n_random_steps = variant.get('n_random_steps', 100)
    vae_dataset_specific_env_kwargs = variant.get(
        'vae_dataset_specific_env_kwargs', None)
    save_file_prefix = variant.get('save_file_prefix', None)
    non_presampled_goal_img_is_garbage = variant.get(
        'non_presampled_goal_img_is_garbage', None)
    tag = variant.get('tag', '')
    from multiworld.core.image_env import ImageEnv, unormalize_image
    import rlkit.torch.pytorch_util as ptu
    info = {}
    if dataset_path is not None:
        dataset = load_local_or_remote_file(dataset_path)
        N = dataset.shape[0]
    else:
        if env_kwargs is None:
            env_kwargs = {}
        if save_file_prefix is None:
            save_file_prefix = env_id
        if save_file_prefix is None:
            save_file_prefix = env_class.__name__
        filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format(
            save_file_prefix,
            str(N),
            init_camera.__name__ if init_camera else '',
            imsize,
            random_and_oracle_policy_data_split,
            tag,
        )
        if use_cached and osp.isfile(filename):
            dataset = np.load(filename)
            print("loaded data from saved file", filename)
        else:
            now = time.time()

            if env_id is not None:
                import gym
                import multiworld
                multiworld.register_all_envs()
                env = gym.make(env_id)
            else:
                if vae_dataset_specific_env_kwargs is None:
                    vae_dataset_specific_env_kwargs = {}
                for key, val in env_kwargs.items():
                    if key not in vae_dataset_specific_env_kwargs:
                        vae_dataset_specific_env_kwargs[key] = val
                env = env_class(**vae_dataset_specific_env_kwargs)
            if not isinstance(env, ImageEnv):
                env = ImageEnv(
                    env,
                    imsize,
                    init_camera=init_camera,
                    transpose=True,
                    normalize=True,
                    non_presampled_goal_img_is_garbage=
                    non_presampled_goal_img_is_garbage,
                )
            else:
                imsize = env.imsize
                env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage
            env.reset()
            info['env'] = env
            if random_and_oracle_policy_data:
                policy_file = load_local_or_remote_file(policy_file)
                policy = policy_file['policy']
                policy.to(ptu.device)
            if random_rollout_data:
                from rlkit.exploration_strategies.ou_strategy import OUStrategy
                policy = OUStrategy(env.action_space)
            dataset = np.zeros((N, imsize * imsize * num_channels),
                               dtype=np.uint8)
            for i in range(N):
                if random_and_oracle_policy_data:
                    num_random_steps = int(N *
                                           random_and_oracle_policy_data_split)
                    if i < num_random_steps:
                        env.reset()
                        for _ in range(n_random_steps):
                            obs = env.step(env.action_space.sample())[0]
                    else:
                        obs = env.reset()
                        policy.reset()
                        for _ in range(n_random_steps):
                            policy_obs = np.hstack((
                                obs['state_observation'],
                                obs['state_desired_goal'],
                            ))
                            action, _ = policy.get_action(policy_obs)
                            obs, _, _, _ = env.step(action)
                elif oracle_dataset_using_set_to_goal:
                    print(i)
                    goal = env.sample_goal()
                    env.set_to_goal(goal)
                    obs = env._get_obs()
                elif random_rollout_data:
                    if i % n_random_steps == 0:
                        g = dict(
                            state_desired_goal=env.sample_goal_for_rollout())
                        env.set_to_goal(g)
                        policy.reset()
                        # env.reset()
                    u = policy.get_action_from_raw_action(
                        env.action_space.sample())
                    obs = env.step(u)[0]
                else:
                    env.reset()
                    for _ in range(n_random_steps):
                        obs = env.step(env.action_space.sample())[0]
                img = obs['image_observation']
                dataset[i, :] = unormalize_image(img)
                if show:
                    img = img.reshape(3, imsize, imsize).transpose()
                    img = img[::-1, :, ::-1]
                    cv2.imshow('img', img)
                    cv2.waitKey(1)
                    # radius = input('waiting...')
            print("done making training data", filename, time.time() - now)
            np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Example #10
0
from PIL import Image
import timeit
from rlkit.torch.her.her import HERTrainer
from rlkit.torch.sac.policies import MakeDeterministic
from rlkit.torch.sac.sac import SACTrainer
from rlkit.util.io import load_local_or_remote_file
from rlkit.util.video import dump_video
import gym
import multiworld
from rlkit.torch.vae.vae_trainer import ConvVAETrainer

from multiworld.core.image_env import ImageEnv, unormalize_image
from rlkit.samplers.data_collector.vae_env import VAEWrappedEnvPathCollector
from rlkit.torch.skewfit.online_vae_algorithm import OnlineVaeAlgorithm

multiworld.register_all_envs()


def skewfit_full_experiment(variant):
    # variant['skewfit_variant']['save_vae_data'] =  True
    # full_experiment_variant_preprocess(variant)
    train_vae_and_update_variant(variant)
    skewfit_experiment(variant)


def full_experiment_variant_preprocess(variant):
    train_vae_variant = variant['train_vae_variant']
    skewfit_variant = variant['skewfit_variant']
    env_id = variant['env_id']
    init_camera = variant.get('init_camera', None)
    image_size = variant.get('image_size', 84)
def train_vae_and_update_variant(variant):  # actually pretrain vae and ROLL.
    skewfit_variant = variant['skewfit_variant']
    train_vae_variant = variant['train_vae_variant']

    # prepare the background subtractor needed to perform segmentation
    if 'unet' in skewfit_variant['segmentation_method']:
        print("training opencv background model!")
        v = train_vae_variant['generate_lstm_dataset_kwargs']
        env_id = v.get('env_id', None)
        env_id_invis = invisiable_env_id[env_id]
        import gym
        import multiworld
        multiworld.register_all_envs()
        obj_invisible_env = gym.make(env_id_invis)
        init_camera = v.get('init_camera', None)

        presampled_goals = None
        if skewfit_variant.get("presampled_goals_path") is not None:
            presampled_goals = load_local_or_remote_file(
                skewfit_variant['presampled_goals_path']).item()
            print("presampled goal path is: ",
                  skewfit_variant['presampled_goals_path'])

        obj_invisible_env = ImageEnv(
            obj_invisible_env,
            v.get('imsize'),
            init_camera=init_camera,
            transpose=True,
            normalize=True,
            presampled_goals=presampled_goals,
        )

        train_num = 2000 if 'Push' in env_id else 4000
        train_bgsb(obj_invisible_env, train_num=train_num)

    if skewfit_variant.get('vae_path', None) is None:  # train new vaes
        logger.remove_tabular_output('progress.csv',
                                     relative_to_snapshot_dir=True)

        vaes, vae_train_datas, vae_test_datas = train_vae(
            train_vae_variant,
            skewfit_variant=skewfit_variant,
            return_data=True)  # one original vae, one segmented ROLL.
        if skewfit_variant.get('save_vae_data', False):
            skewfit_variant['vae_train_data'] = vae_train_datas
            skewfit_variant['vae_test_data'] = vae_test_datas

        logger.add_tabular_output(
            'progress.csv',
            relative_to_snapshot_dir=True,
        )
        skewfit_variant['vae_path'] = vaes  # just pass the VAE directly
    else:  # load pre-trained vaes
        print("load pretrain scene-/objce-VAE from: {}".format(
            skewfit_variant['vae_path']))
        data = torch.load(osp.join(skewfit_variant['vae_path'], 'params.pkl'))
        vae_original = data['vae_original']
        vae_segmented = data['lstm_segmented']
        skewfit_variant['vae_path'] = [vae_segmented, vae_original]

        generate_vae_dataset_fctn = train_vae_variant.get(
            'generate_vae_data_fctn', generate_vae_dataset)
        generate_lstm_dataset_fctn = train_vae_variant.get(
            'generate_lstm_data_fctn')
        assert generate_lstm_dataset_fctn is not None, "Must provide a custom generate lstm pretraining dataset function!"

        train_data_lstm, test_data_lstm, info_lstm = generate_lstm_dataset_fctn(
            train_vae_variant['generate_lstm_dataset_kwargs'],
            segmented=True,
            segmentation_method=skewfit_variant['segmentation_method'])

        train_data_ori, test_data_ori, info_ori = generate_vae_dataset_fctn(
            train_vae_variant['generate_vae_dataset_kwargs'])

        train_datas = [train_data_lstm, train_data_ori]
        test_datas = [test_data_lstm, test_data_ori]

        if skewfit_variant.get('save_vae_data', False):
            skewfit_variant['vae_train_data'] = train_datas
            skewfit_variant['vae_test_data'] = test_datas
Example #12
0
def state_td3bc_experiment(variant):
    if variant.get('env_id', None):
        import gym
        import multiworld
        multiworld.register_all_envs()
        eval_env = gym.make(variant['env_id'])
        expl_env = gym.make(variant['env_id'])
    else:
        eval_env_kwargs = variant.get('eval_env_kwargs', variant['env_kwargs'])
        eval_env = variant['env_class'](**eval_env_kwargs)
        expl_env = variant['env_class'](**variant['env_kwargs'])

    observation_key = 'state_observation'
    desired_goal_key = 'state_desired_goal'
    achieved_goal_key = desired_goal_key.replace("desired", "achieved")
    es_strat =  variant.get('es', 'ou')
    if es_strat == 'ou':
        es = OUStrategy(
            action_space=expl_env.action_space,
            max_sigma=variant['exploration_noise'],
            min_sigma=variant['exploration_noise'],
        )
    elif es_strat == 'gauss_eps':
        es = GaussianAndEpislonStrategy(
            action_space=expl_env.action_space,
            max_sigma=.2,
            min_sigma=.2,  # constant sigma
            epsilon=.3,
        )
    else:
        raise ValueError("invalid exploration strategy provided")
    obs_dim = expl_env.observation_space.spaces['observation'].low.size
    goal_dim = expl_env.observation_space.spaces['desired_goal'].low.size
    action_dim = expl_env.action_space.low.size
    qf1 = FlattenMlp(
        input_size=obs_dim + goal_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    qf2 = FlattenMlp(
        input_size=obs_dim + goal_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    target_qf1 = FlattenMlp(
        input_size=obs_dim + goal_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    target_qf2 = FlattenMlp(
        input_size=obs_dim + goal_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    policy = TanhMlpPolicy(
        input_size=obs_dim + goal_dim,
        output_size=action_dim,
        **variant['policy_kwargs']
    )
    target_policy = TanhMlpPolicy(
        input_size=obs_dim + goal_dim,
        output_size=action_dim,
        **variant['policy_kwargs']
    )
    expl_policy = PolicyWrappedWithExplorationStrategy(
        exploration_strategy=es,
        policy=policy,
    )
    replay_buffer = ObsDictRelabelingBuffer(
        env=eval_env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        **variant['replay_buffer_kwargs']
    )
    demo_train_buffer = ObsDictRelabelingBuffer(
        env=eval_env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        max_size=variant['replay_buffer_kwargs']['max_size']
    )
    demo_test_buffer = ObsDictRelabelingBuffer(
        env=eval_env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        max_size=variant['replay_buffer_kwargs']['max_size'],
    )
    if variant.get('td3_bc', True):
        td3_trainer = TD3BCTrainer(
            env=expl_env,
            policy=policy,
            qf1=qf1,
            qf2=qf2,
            replay_buffer=replay_buffer,
            demo_train_buffer=demo_train_buffer,
            demo_test_buffer=demo_test_buffer,
            target_qf1=target_qf1,
            target_qf2=target_qf2,
            target_policy=target_policy,
            **variant['td3_bc_trainer_kwargs']
        )
    else:
        td3_trainer = TD3(
            policy=policy,
            qf1=qf1,
            qf2=qf2,
            target_qf1=target_qf1,
            target_qf2=target_qf2,
            target_policy=target_policy,
            **variant['td3_trainer_kwargs']
        )
    trainer = HERTrainer(td3_trainer)
    eval_path_collector = GoalConditionedPathCollector(
        eval_env,
        policy,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
    )
    expl_path_collector = GoalConditionedPathCollector(
        expl_env,
        expl_policy,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        **variant['algo_kwargs']
    )

    if variant.get("save_video", True):
        if variant.get("presampled_goals", None):
            variant['image_env_kwargs']['presampled_goals'] = load_local_or_remote_file(variant['presampled_goals']).item()
        image_eval_env = ImageEnv(eval_env, **variant["image_env_kwargs"])
        image_eval_path_collector = GoalConditionedPathCollector(
            image_eval_env,
            policy,
            observation_key='state_observation',
            desired_goal_key='state_desired_goal',
        )
        image_expl_env = ImageEnv(expl_env, **variant["image_env_kwargs"])
        image_expl_path_collector = GoalConditionedPathCollector(
            image_expl_env,
            expl_policy,
            observation_key='state_observation',
            desired_goal_key='state_desired_goal',
        )
        video_func = VideoSaveFunction(
            image_eval_env,
            variant,
            image_expl_path_collector,
            image_eval_path_collector,
        )
        algorithm.post_train_funcs.append(video_func)

    algorithm.to(ptu.device)
    if variant.get('load_demos', False):
        td3_trainer.load_demos()
    if variant.get('pretrain_policy', False):
        td3_trainer.pretrain_policy_with_bc()
    if variant.get('pretrain_rl', False):
        td3_trainer.pretrain_q_with_bc_data()
    algorithm.train()
def experiment(variant):
    import multiworld
    multiworld.register_all_envs()
    env = gym.make('Point2DEnv-ImageFixedGoal-v0')
    input_width, input_height = env.image_shape

    action_dim = int(np.prod(env.action_space.shape))
    cnn_params = variant['cnn_params']
    cnn_params.update(
        input_width=input_width,
        input_height=input_height,
        input_channels=3,
        output_conv_channels=True,
        output_size=None,
    )
    if variant['shared_qf_conv']:
        qf_cnn = CNN(**cnn_params)
        qf_obs_processor = nn.Sequential(qf_cnn, Flatten())
        qf1 = MlpQfWithObsProcessor(obs_processor=qf_obs_processor,
                                    output_size=1,
                                    input_size=action_dim +
                                    qf_cnn.conv_output_flat_size,
                                    **variant['qf_kwargs'])
        qf2 = MlpQfWithObsProcessor(obs_processor=qf_obs_processor,
                                    output_size=1,
                                    input_size=action_dim +
                                    qf_cnn.conv_output_flat_size,
                                    **variant['qf_kwargs'])
        target_qf_cnn = CNN(**cnn_params)
        target_qf_obs_processor = nn.Sequential(target_qf_cnn, Flatten())
        target_qf1 = MlpQfWithObsProcessor(
            obs_processor=target_qf_obs_processor,
            output_size=1,
            input_size=action_dim + qf_cnn.conv_output_flat_size,
            **variant['qf_kwargs'])
        target_qf2 = MlpQfWithObsProcessor(
            obs_processor=target_qf_obs_processor,
            output_size=1,
            input_size=action_dim + qf_cnn.conv_output_flat_size,
            **variant['qf_kwargs'])
    else:
        qf1_cnn = CNN(**cnn_params)
        cnn_output_dim = qf1_cnn.conv_output_flat_size
        qf1 = MlpQfWithObsProcessor(obs_processor=nn.Sequential(
            qf1_cnn, Flatten()),
                                    output_size=1,
                                    input_size=action_dim + cnn_output_dim,
                                    **variant['qf_kwargs'])
        qf2 = MlpQfWithObsProcessor(obs_processor=nn.Sequential(
            CNN(**cnn_params), Flatten()),
                                    output_size=1,
                                    input_size=action_dim + cnn_output_dim,
                                    **variant['qf_kwargs'])
        target_qf1 = MlpQfWithObsProcessor(
            obs_processor=nn.Sequential(CNN(**cnn_params), Flatten()),
            output_size=1,
            input_size=action_dim + cnn_output_dim,
            **variant['qf_kwargs'])
        target_qf2 = MlpQfWithObsProcessor(
            obs_processor=nn.Sequential(CNN(**cnn_params), Flatten()),
            output_size=1,
            input_size=action_dim + cnn_output_dim,
            **variant['qf_kwargs'])
    action_dim = int(np.prod(env.action_space.shape))
    policy_cnn = CNN(**cnn_params)
    policy = TanhGaussianPolicyAdapter(nn.Sequential(policy_cnn, Flatten()),
                                       policy_cnn.conv_output_flat_size,
                                       action_dim, **variant['policy_kwargs'])
    eval_env = expl_env = env

    eval_policy = MakeDeterministic(policy)
    eval_path_collector = MdpPathCollector(
        eval_env, eval_policy, **variant['eval_path_collector_kwargs'])
    replay_buffer = EnvReplayBuffer(
        variant['replay_buffer_size'],
        expl_env,
    )
    trainer = SACTrainer(env=eval_env,
                         policy=policy,
                         qf1=qf1,
                         qf2=qf2,
                         target_qf1=target_qf1,
                         target_qf2=target_qf2,
                         **variant['trainer_kwargs'])
    if variant['collection_mode'] == 'batch':
        expl_path_collector = MdpPathCollector(
            expl_env, policy, **variant['expl_path_collector_kwargs'])
        algorithm = TorchBatchRLAlgorithm(
            trainer=trainer,
            exploration_env=expl_env,
            evaluation_env=eval_env,
            exploration_data_collector=expl_path_collector,
            evaluation_data_collector=eval_path_collector,
            replay_buffer=replay_buffer,
            **variant['algo_kwargs'])
    elif variant['collection_mode'] == 'online':
        expl_path_collector = MdpStepCollector(
            expl_env, policy, **variant['expl_path_collector_kwargs'])
        algorithm = TorchOnlineRLAlgorithm(
            trainer=trainer,
            exploration_env=expl_env,
            evaluation_env=eval_env,
            exploration_data_collector=expl_path_collector,
            evaluation_data_collector=eval_path_collector,
            replay_buffer=replay_buffer,
            **variant['algo_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
def main(training_data_dir, validation_data_dir, test_data_dir, imsize):

    if not os.path.exists(training_data_dir): os.makedirs(training_data_dir)
    if not os.path.exists(validation_data_dir):
        os.makedirs(validation_data_dir)
    if not os.path.exists(test_data_dir): os.makedirs(test_data_dir)

    backSub = cv2.createBackgroundSubtractorMOG2(history=10)

    # Registering required nmultiworld environments.
    multiworld.register_all_envs()
    base_env_background = gym.make('SawyerPushHurdlePuckAndRobotInvisible-v0')
    env_background = ImageEnv(base_env_background,
                              imsize=imsize,
                              init_camera=camera,
                              transpose=True,
                              normalize=True)
    env_background.reset()

    base_env_hurdle = gym.make('SawyerPushHurdlePuckInvisible-v0')
    env_hurdle = ImageEnv(base_env_hurdle,
                          imsize=imsize,
                          init_camera=camera,
                          transpose=True,
                          normalize=True)
    env_hurdle.reset()

    # Generating training, validation and test data
    print("Training background subctractor")
    for i in range(10):
        action = env_background.action_space.sample()
        next_obs, reward, done, info = env_background.step(action)
        image = next_obs['observation']
        image = unnormalize_image(image)
        bg_mask = backSub.apply(image, learningRate=-1)

    print("Generating training data")
    for i in range(1000):
        action = env_hurdle.action_space.sample()
        next_obs, reward, done, info = env_hurdle.step(action)
        image = next_obs['observation']
        image = unnormalize_image(image)
        fg_mask = backSub.apply(image, learningRate=0)
        fg_mask = fg_mask / fg_mask.max()
        cv2.imwrite(training_data_dir + "/rgb_" + str(i) + ".jpg", image)
        cv2.imwrite(training_data_dir + "/mask_" + str(i) + ".jpg", fg_mask)

    print("Generating validation data")
    for i in range(200):
        action = env_hurdle.action_space.sample()
        next_obs, reward, done, info = env_hurdle.step(action)
        image = next_obs['observation']
        image = unnormalize_image(image)
        fg_mask = backSub.apply(image, learningRate=0)
        fg_mask = fg_mask / fg_mask.max()
        cv2.imwrite(validation_data_dir + "/rgb_" + str(i) + ".jpg", image)
        cv2.imwrite(validation_data_dir + "/mask_" + str(i) + ".jpg", fg_mask)

    print("Generating testing data")
    for i in range(200):
        action = env_hurdle.action_space.sample()
        next_obs, reward, done, info = env_hurdle.step(action)
        image = next_obs['observation']
        image = unnormalize_image(image)
        fg_mask = backSub.apply(image, learningRate=0)
        fg_mask = fg_mask / fg_mask.max()
        cv2.imwrite(test_data_dir + "/rgb_" + str(i) + ".jpg", image)
        cv2.imwrite(test_data_dir + "/mask_" + str(i) + ".jpg", fg_mask)

    print("Completed generating data at:", training_data_dir)
def experiment(variant):
    import multiworld
    multiworld.register_all_envs()
    # unwrap the TimeLimitEnv wrapper since we manually termiante after 50 steps
    eval_env = gym.make('SawyerPushXYZEnv-v0')
    expl_env = gym.make('SawyerPushXYZEnv-v0')

    observation_key = 'state_observation'
    desired_goal_key = 'desired_goal'
    achieved_goal_key = desired_goal_key.replace("desired", "achieved")
    replay_buffer = ObsDictRelabelingBuffer(
        env=eval_env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        **variant['replay_buffer_kwargs']
    )
    obs_dim = eval_env.observation_space.spaces['observation'].low.size
    action_dim = eval_env.action_space.low.size
    goal_dim = eval_env.observation_space.spaces['desired_goal'].low.size
    qf1 = FlattenMlp(
        input_size=obs_dim + action_dim + goal_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    qf2 = FlattenMlp(
        input_size=obs_dim + action_dim + goal_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    target_qf1 = FlattenMlp(
        input_size=obs_dim + action_dim + goal_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    target_qf2 = FlattenMlp(
        input_size=obs_dim + action_dim + goal_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    policy = TanhGaussianPolicy(
        obs_dim=obs_dim + goal_dim,
        action_dim=action_dim,
        **variant['policy_kwargs']
    )
    eval_policy = MakeDeterministic(policy)
    trainer = SACTrainer(
        env=eval_env,
        policy=policy,
        qf1=qf1,
        qf2=qf2,
        target_qf1=target_qf1,
        target_qf2=target_qf2,
        **variant['sac_trainer_kwargs']
    )
    trainer = HERTrainer(trainer)
    eval_path_collector = GoalConditionedPathCollector(
        eval_env,
        eval_policy,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
    )
    expl_path_collector = GoalConditionedPathCollector(
        expl_env,
        policy,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        **variant['algo_kwargs']
    )
    algorithm.to(ptu.device)
    algorithm.train()
Example #16
0
def generate_vae_dataset_from_params(
        env_class=None,
        env_kwargs=None,
        env_id=None,
        N=10000,
        test_p=0.9,
        use_cached=True,
        imsize=84,
        num_channels=1,
        show=False,
        init_camera=None,
        dataset_path=None,
        oracle_dataset=False,
        n_random_steps=100,
        vae_dataset_specific_env_kwargs=None,
        save_file_prefix=None,
):
    from multiworld.core.image_env import ImageEnv, unormalize_image
    import time

    assert oracle_dataset == True

    if env_kwargs is None:
        env_kwargs = {}
    if save_file_prefix is None:
        save_file_prefix = env_id
    if save_file_prefix is None:
        save_file_prefix = env_class.__name__
    filename = "/tmp/{}_N{}_{}_imsize{}_oracle{}.npy".format(
        save_file_prefix,
        str(N),
        init_camera.__name__ if init_camera else '',
        imsize,
        oracle_dataset,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        np.random.shuffle(dataset)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        np.random.shuffle(dataset)
        print("loaded data from saved file", filename)
    else:
        now = time.time()

        if env_id is not None:
            import gym
            import multiworld
            multiworld.register_all_envs()
            env = gym.make(env_id)
        else:
            if vae_dataset_specific_env_kwargs is None:
                vae_dataset_specific_env_kwargs = {}
            for key, val in env_kwargs.items():
                if key not in vae_dataset_specific_env_kwargs:
                    vae_dataset_specific_env_kwargs[key] = val
            env = env_class(**vae_dataset_specific_env_kwargs)
        if not isinstance(env, ImageEnv):
            env = ImageEnv(
                env,
                imsize,
                init_camera=init_camera,
                transpose=True,
                normalize=True,
            )
        setup_pickup_image_env(env, num_presampled_goals=N)
        env.reset()
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8)
        for i in range(N):
            img = env._presampled_goals['image_desired_goal'][i]
            dataset[i, :] = unormalize_image(img)
            if show:
                img = img.reshape(3, imsize, imsize).transpose()
                img = img[::-1, :, ::-1]
                cv2.imshow('img', img)
                cv2.waitKey(1)
                time.sleep(.2)
                # radius = input('waiting...')
        print("done making training data", filename, time.time() - now)
        np.random.shuffle(dataset)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
def generate_vae_dataset(variant):
    """
    If not provided a pre-train vae dataset generation function, this function will be used to collect
    the dataset for training vae.
    """
    import rlkit.torch.pytorch_util as ptu
    import gym
    import multiworld
    multiworld.register_all_envs()

    print("generating vae dataset with original images")

    env_class = variant.get('env_class', None)
    env_kwargs = variant.get('env_kwargs', None)
    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    test_p = variant.get('test_p', 0.9)
    use_cached = variant.get('use_cached', True)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    show = variant.get('show', False)
    init_camera = variant.get('init_camera', None)
    dataset_path = variant.get('dataset_path', None)
    oracle_dataset_using_set_to_goal = variant.get(
        'oracle_dataset_using_set_to_goal', False)
    random_rollout_data = variant.get('random_rollout_data', False)
    random_and_oracle_policy_data = variant.get(
        'random_and_oracle_policy_data', False)
    random_and_oracle_policy_data_split = variant.get(
        'random_and_oracle_policy_data_split', 0)
    policy_file = variant.get('policy_file', None)
    n_random_steps = variant.get('n_random_steps', 100)
    vae_dataset_specific_env_kwargs = variant.get(
        'vae_dataset_specific_env_kwargs', None)
    save_file_prefix = variant.get('save_file_prefix', None)
    non_presampled_goal_img_is_garbage = variant.get(
        'non_presampled_goal_img_is_garbage', None)
    tag = variant.get('tag', '')

    info = {}
    if dataset_path is not None:
        print('load vae training dataset from: ', dataset_path)
        pjhome = os.environ['PJHOME']
        dataset = np.load(osp.join(pjhome, dataset_path),
                          allow_pickle=True).item()
        if isinstance(dataset, dict):
            dataset = dataset['image_desired_goal']
        dataset = unormalize_image(dataset)
        N = dataset.shape[0]
    else:
        if env_kwargs is None:
            env_kwargs = {}
        if save_file_prefix is None:
            save_file_prefix = env_id
        if save_file_prefix is None:
            save_file_prefix = env_class.__name__
        filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format(
            save_file_prefix,
            str(N),
            init_camera.__name__ if init_camera else '',
            imsize,
            random_and_oracle_policy_data_split,
            tag,
        )
        if use_cached and osp.isfile(filename):
            dataset = np.load(filename)
            print("loaded data from saved file", filename)
        else:
            now = time.time()

            if env_id is not None:
                import gym
                import multiworld
                multiworld.register_all_envs()
                env = gym.make(env_id)
            else:
                if vae_dataset_specific_env_kwargs is None:
                    vae_dataset_specific_env_kwargs = {}
                for key, val in env_kwargs.items():
                    if key not in vae_dataset_specific_env_kwargs:
                        vae_dataset_specific_env_kwargs[key] = val
                env = env_class(**vae_dataset_specific_env_kwargs)
            if not isinstance(env, ImageEnv):
                env = ImageEnv(
                    env,
                    imsize,
                    init_camera=init_camera,
                    transpose=True,
                    normalize=True,
                    non_presampled_goal_img_is_garbage=
                    non_presampled_goal_img_is_garbage,
                )
            else:
                imsize = env.imsize
                env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage
            env.reset()
            info['env'] = env
            if random_and_oracle_policy_data:
                policy_file = load_local_or_remote_file(policy_file)
                policy = policy_file['policy']
                policy.to(ptu.device)
            if random_rollout_data:
                from rlkit.exploration_strategies.ou_strategy import OUStrategy
                policy = OUStrategy(env.action_space)

            dataset = np.zeros((N, imsize * imsize * num_channels),
                               dtype=np.uint8)

            for i in range(N):
                if random_and_oracle_policy_data:
                    num_random_steps = int(N *
                                           random_and_oracle_policy_data_split)
                    if i < num_random_steps:
                        env.reset()
                        for _ in range(n_random_steps):
                            obs = env.step(env.action_space.sample())[0]
                    else:
                        obs = env.reset()
                        policy.reset()
                        for _ in range(n_random_steps):
                            policy_obs = np.hstack((
                                obs['state_observation'],
                                obs['state_desired_goal'],
                            ))
                            action, _ = policy.get_action(policy_obs)
                            obs, _, _, _ = env.step(action)
                elif oracle_dataset_using_set_to_goal:
                    print(i)
                    goal = env.sample_goal()
                    env.set_to_goal(goal)
                    obs = env._get_obs()

                elif random_rollout_data:
                    if i % n_random_steps == 0:
                        g = dict(
                            state_desired_goal=env.sample_goal_for_rollout())
                        env.set_to_goal(g)
                        policy.reset()
                        # env.reset()
                    u = policy.get_action_from_raw_action(
                        env.action_space.sample())
                    obs = env.step(u)[0]
                else:
                    print("using totally random rollouts")
                    for _ in range(n_random_steps):
                        obs = env.step(env.action_space.sample())[0]

                img = obs[
                    'image_observation']  # NOTE yufei: this is already normalized image, of detype np.float64.

                dataset[i, :] = unormalize_image(img)

            np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
def generate_LSTM_vae_only_dataset(variant,
                                   segmented=False,
                                   segmentation_method='color'):
    from multiworld.core.image_env import ImageEnv, unormalize_image

    env_id = variant.get('env_id', None)
    N = variant.get('N', 500)
    test_p = variant.get('test_p', 0.9)
    imsize = variant.get('imsize', 48)
    num_channels = variant.get('num_channels', 3)
    init_camera = variant.get('init_camera', None)
    occlusion_prob = variant.get('occlusion_prob', 0)
    occlusion_level = variant.get('occlusion_level', 0.5)
    segmentation_kwargs = variant.get('segmentation_kwargs', {})
    if segmentation_kwargs.get('segment') is not None:
        segmented = segmentation_kwargs.get('segment')

    assert env_id is not None, 'you must provide an env id!'

    obj = 'puck-pos'
    if env_id == 'SawyerDoorHookResetFreeEnv-v1':
        obj = 'door-angle'

    pjhome = os.environ['PJHOME']
    if segmented:
        if 'unet' in segmentation_method:
            seg_name = 'seg-unet'
        else:
            seg_name = 'seg-' + segmentation_method
    else:
        seg_name = 'no-seg'

    if env_id == 'SawyerDoorHookResetFreeEnv-v1':
        seg_name += '-2'

    data_file_path = osp.join(
        pjhome, 'data/local/pre-train-lstm',
        'vae-only-{}-{}-{}-{}-{}.npy'.format(env_id, seg_name, N,
                                             occlusion_prob, occlusion_level))
    obj_state_path = osp.join(
        pjhome, 'data/local/pre-train-lstm',
        'vae-only-{}-{}-{}-{}-{}-{}.npy'.format(env_id, seg_name, N,
                                                occlusion_prob,
                                                occlusion_level, obj))

    print(data_file_path)
    if osp.exists(data_file_path):
        all_data = np.load(data_file_path)
        if len(all_data) >= N:
            print("load stored data at: ", data_file_path)
            n = int(len(all_data) * test_p)
            train_dataset = all_data[:n]
            test_dataset = all_data[n:]
            obj_states = np.load(obj_state_path)
            info = {'obj_state': obj_states}
            return train_dataset, test_dataset, info

    if segmented:
        print(
            "generating lstm vae pretrain only dataset with segmented images using method: ",
            segmentation_method)
        if segmentation_method == 'unet':
            segment_func = segment_image_unet
        else:
            raise NotImplementedError
    else:
        print("generating lstm vae pretrain only dataset with original images")

    info = {}
    dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8)
    imgs = []
    obj_states = None

    if env_id == 'SawyerDoorHookResetFreeEnv-v1':
        from rlkit.util.io import load_local_or_remote_file
        pjhome = os.environ['PJHOME']
        pre_sampled_goal_path = osp.join(
            pjhome, 'data/local/pre-train-vae/door_original_dataset.npy')
        goal_dict = np.load(pre_sampled_goal_path, allow_pickle=True).item()
        imgs = goal_dict['image_desired_goal']
        door_angles = goal_dict['state_desired_goal'][:, -1]
        obj_states = door_angles[:, np.newaxis]
    elif env_id == 'SawyerPickupEnvYZEasy-v0':
        from rlkit.util.io import load_local_or_remote_file
        pjhome = os.environ['PJHOME']
        pre_sampled_goal_path = osp.join(
            pjhome, 'data/local/pre-train-vae/pickup-original-dataset.npy')
        goal_dict = load_local_or_remote_file(pre_sampled_goal_path).item()
        imgs = goal_dict['image_desired_goal']
        puck_pos = goal_dict['state_desired_goal'][:, 3:]
        obj_states = puck_pos

    else:
        import gym
        import multiworld
        multiworld.register_all_envs()
        env = gym.make(env_id)

        if not isinstance(env, ImageEnv):
            env = ImageEnv(
                env,
                imsize,
                init_camera=init_camera,
                transpose=True,
                normalize=True,
            )
        env.reset()
        info['env'] = env

        puck_pos = np.zeros((N, 2), dtype=np.float)
        for i in range(N):
            print("lstm vae pretrain only dataset generation, number: ", i)
            if env_id == 'SawyerPushHurdle-v0':
                obs, puck_p = _generate_sawyerhurdle_dataset(
                    env, return_puck_pos=True, segmented=segmented)
            elif env_id == 'SawyerPushHurdleMiddle-v0':
                obs, puck_p = _generate_sawyerhurdlemiddle_dataset(
                    env, return_puck_pos=True)
            elif env_id == 'SawyerPushNIPSEasy-v0':
                obs, puck_p = _generate_sawyerpushnipseasy_dataset(
                    env, return_puck_pos=True)
            elif env_id == 'SawyerPushHurdleResetFreeEnv-v0':
                obs, puck_p = _generate_sawyerhurldeblockresetfree_dataset(
                    env, return_puck_pos=True)
            else:
                raise NotImplementedError
            img = obs[
                'image_observation']  # NOTE: this is already normalized image, of detype np.float64.
            imgs.append(img)
            puck_pos[i] = puck_p

        obj_states = puck_pos

    # now we segment the images
    for i in range(N):
        print("segmenting image ", i)
        img = imgs[i]
        if segmented:
            dataset[i, :] = segment_func(img,
                                         normalize=False,
                                         **segmentation_kwargs)
            p = np.random.rand(
            )  # manually drop some images, so as to make occlusions
            if p < occlusion_prob:
                mask = (np.random.uniform(low=0, high=1, size=(imsize, imsize))
                        > occlusion_level).astype(np.uint8)
                img = dataset[i].reshape(3, imsize, imsize).transpose()
                img[mask < 1] = 0
                dataset[i] = img.transpose().flatten()

        else:
            dataset[i, :] = unormalize_image(img)

    # add the trajectory dimension
    dataset = dataset[:, np.newaxis, :]  # batch_size x traj_len = 1 x imlen
    obj_states = obj_states[:,
                            np.newaxis, :]  # batch_size x traj_len = 1 x imlen
    info['obj_state'] = obj_states

    n = int(N * test_p)
    train_dataset = dataset[:n]
    test_dataset = dataset[n:]

    if N >= 500:
        print('save data to: ', data_file_path)
        all_data = np.concatenate([train_dataset, test_dataset], axis=0)
        np.save(data_file_path, all_data)
        np.save(obj_state_path, obj_states)

    return train_dataset, test_dataset, info
Example #19
0
def _disentangled_her_twin_sac_experiment_v2(
        max_path_length,
        encoder_kwargs,
        disentangled_qf_kwargs,
        qf_kwargs,
        twin_sac_trainer_kwargs,
        replay_buffer_kwargs,
        policy_kwargs,
        evaluation_goal_sampling_mode,
        exploration_goal_sampling_mode,
        algo_kwargs,
        save_video=True,
        env_id=None,
        env_class=None,
        env_kwargs=None,
        observation_key='state_observation',
        desired_goal_key='state_desired_goal',
        achieved_goal_key='state_achieved_goal',
        # Video parameters
        latent_dim=2,
        save_video_kwargs=None,
        **kwargs
):
    import rlkit.samplers.rollout_functions as rf
    import rlkit.torch.pytorch_util as ptu
    from rlkit.data_management.obs_dict_replay_buffer import \
        ObsDictRelabelingBuffer
    from rlkit.torch.networks import ConcatMlp
    from rlkit.torch.sac.policies import TanhGaussianPolicy
    from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm

    if save_video_kwargs is None:
        save_video_kwargs = {}

    if env_kwargs is None:
        env_kwargs = {}
    assert env_id or env_class

    if env_id:
        import gym
        import multiworld
        multiworld.register_all_envs()
        train_env = gym.make(env_id)
        eval_env = gym.make(env_id)
    else:
        eval_env = env_class(**env_kwargs)
        train_env = env_class(**env_kwargs)

    obs_dim = train_env.observation_space.spaces[observation_key].low.size
    goal_dim = train_env.observation_space.spaces[desired_goal_key].low.size
    action_dim = train_env.action_space.low.size

    encoder = ConcatMlp(
        input_size=goal_dim,
        output_size=latent_dim,
        **encoder_kwargs
    )

    qf1 = DisentangledMlpQf(
        encoder=encoder,
        preprocess_obs_dim=obs_dim,
        action_dim=action_dim,
        qf_kwargs=qf_kwargs,
        **disentangled_qf_kwargs
    )
    qf2 = DisentangledMlpQf(
        encoder=encoder,
        preprocess_obs_dim=obs_dim,
        action_dim=action_dim,
        qf_kwargs=qf_kwargs,
        **disentangled_qf_kwargs
    )
    target_qf1 = DisentangledMlpQf(
        encoder=Detach(encoder),
        preprocess_obs_dim=obs_dim,
        action_dim=action_dim,
        qf_kwargs=qf_kwargs,
        **disentangled_qf_kwargs
    )
    target_qf2 = DisentangledMlpQf(
        encoder=Detach(encoder),
        preprocess_obs_dim=obs_dim,
        action_dim=action_dim,
        qf_kwargs=qf_kwargs,
        **disentangled_qf_kwargs
    )

    policy = TanhGaussianPolicy(
        obs_dim=obs_dim + goal_dim,
        action_dim=action_dim,
        **policy_kwargs
    )

    replay_buffer = ObsDictRelabelingBuffer(
        env=train_env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        **replay_buffer_kwargs
    )
    sac_trainer = SACTrainer(
        env=train_env,
        policy=policy,
        qf1=qf1,
        qf2=qf2,
        target_qf1=target_qf1,
        target_qf2=target_qf2,
        **twin_sac_trainer_kwargs
    )
    trainer = HERTrainer(sac_trainer)

    eval_path_collector = GoalConditionedPathCollector(
        eval_env,
        MakeDeterministic(policy),
        max_path_length,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        goal_sampling_mode=evaluation_goal_sampling_mode,
    )
    expl_path_collector = GoalConditionedPathCollector(
        train_env,
        policy,
        max_path_length,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        goal_sampling_mode=exploration_goal_sampling_mode,
    )

    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=train_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        max_path_length=max_path_length,
        **algo_kwargs,
    )
    algorithm.to(ptu.device)

    if save_video:
        save_vf_heatmap = save_video_kwargs.get('save_vf_heatmap', True)

        def v_function(obs):
            action = policy.get_actions(obs)
            obs, action = ptu.from_numpy(obs), ptu.from_numpy(action)
            return qf1(obs, action, return_individual_q_vals=True)
        add_heatmap = partial(add_heatmap_imgs_to_o_dict, v_function=v_function)
        rollout_function = rf.create_rollout_function(
            rf.multitask_rollout,
            max_path_length=max_path_length,
            observation_key=observation_key,
            desired_goal_key=desired_goal_key,
            full_o_postprocess_func=add_heatmap if save_vf_heatmap else None,
        )

        img_keys = ['v_vals'] + [
            'v_vals_dim_{}'.format(dim) for dim
            in range(latent_dim)
        ]
        eval_video_func = get_save_video_function(
            rollout_function,
            eval_env,
            MakeDeterministic(policy),
            tag="eval",
            get_extra_imgs=partial(get_extra_imgs, img_keys=img_keys),
            **save_video_kwargs
        )
        train_video_func = get_save_video_function(
            rollout_function,
            train_env,
            policy,
            tag="train",
            get_extra_imgs=partial(get_extra_imgs, img_keys=img_keys),
            **save_video_kwargs
        )
        decoder = ConcatMlp(
            input_size=obs_dim,
            output_size=obs_dim,
            hidden_sizes=[128, 128],
        )
        decoder.to(ptu.device)

        # algorithm.post_train_funcs.append(train_decoder(variant, encoder, decoder))
        # algorithm.post_train_funcs.append(plot_encoder_function(variant, encoder))
        # algorithm.post_train_funcs.append(plot_buffer_function(
            # save_video_period, 'state_achieved_goal'))
        # algorithm.post_train_funcs.append(plot_buffer_function(
            # save_video_period, 'state_desired_goal'))
        algorithm.post_train_funcs.append(eval_video_func)
        algorithm.post_train_funcs.append(train_video_func)



    algorithm.train()
Example #20
0
def _disentangled_grill_her_twin_sac_experiment(
        max_path_length,
        encoder_kwargs,
        disentangled_qf_kwargs,
        qf_kwargs,
        twin_sac_trainer_kwargs,
        replay_buffer_kwargs,
        policy_kwargs,
        vae_evaluation_goal_sampling_mode,
        vae_exploration_goal_sampling_mode,
        base_env_evaluation_goal_sampling_mode,
        base_env_exploration_goal_sampling_mode,
        algo_kwargs,
        env_id=None,
        env_class=None,
        env_kwargs=None,
        observation_key='state_observation',
        desired_goal_key='state_desired_goal',
        achieved_goal_key='state_achieved_goal',
        latent_dim=2,
        vae_wrapped_env_kwargs=None,
        vae_path=None,
        vae_n_vae_training_kwargs=None,
        vectorized=False,
        save_video=True,
        save_video_kwargs=None,
        have_no_disentangled_encoder=False,
        **kwargs):
    if env_kwargs is None:
        env_kwargs = {}
    assert env_id or env_class

    if env_id:
        import gym
        import multiworld
        multiworld.register_all_envs()
        train_env = gym.make(env_id)
        eval_env = gym.make(env_id)
    else:
        eval_env = env_class(**env_kwargs)
        train_env = env_class(**env_kwargs)

    train_env.goal_sampling_mode = base_env_exploration_goal_sampling_mode
    eval_env.goal_sampling_mode = base_env_evaluation_goal_sampling_mode

    if vae_path:
        vae = load_local_or_remote_file(vae_path)
    else:
        vae = get_n_train_vae(latent_dim=latent_dim,
                              env=eval_env,
                              **vae_n_vae_training_kwargs)

    train_env = VAEWrappedEnv(train_env,
                              vae,
                              imsize=train_env.imsize,
                              **vae_wrapped_env_kwargs)
    eval_env = VAEWrappedEnv(eval_env,
                             vae,
                             imsize=train_env.imsize,
                             **vae_wrapped_env_kwargs)

    obs_dim = train_env.observation_space.spaces[observation_key].low.size
    goal_dim = train_env.observation_space.spaces[desired_goal_key].low.size
    action_dim = train_env.action_space.low.size

    encoder = FlattenMlp(input_size=obs_dim,
                         output_size=latent_dim,
                         **encoder_kwargs)

    def make_qf():
        if have_no_disentangled_encoder:
            return FlattenMlp(
                input_size=obs_dim + goal_dim + action_dim,
                output_size=1,
                **qf_kwargs,
            )
        else:
            return DisentangledMlpQf(goal_processor=encoder,
                                     preprocess_obs_dim=obs_dim,
                                     action_dim=action_dim,
                                     qf_kwargs=qf_kwargs,
                                     vectorized=vectorized,
                                     **disentangled_qf_kwargs)

    qf1 = make_qf()
    qf2 = make_qf()
    target_qf1 = make_qf()
    target_qf2 = make_qf()

    policy = TanhGaussianPolicy(obs_dim=obs_dim + goal_dim,
                                action_dim=action_dim,
                                **policy_kwargs)

    replay_buffer = ObsDictRelabelingBuffer(
        env=train_env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        vectorized=vectorized,
        **replay_buffer_kwargs)
    sac_trainer = SACTrainer(env=train_env,
                             policy=policy,
                             qf1=qf1,
                             qf2=qf2,
                             target_qf1=target_qf1,
                             target_qf2=target_qf2,
                             **twin_sac_trainer_kwargs)
    trainer = HERTrainer(sac_trainer)

    eval_path_collector = VAEWrappedEnvPathCollector(
        eval_env,
        MakeDeterministic(policy),
        max_path_length,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        goal_sampling_mode=vae_evaluation_goal_sampling_mode,
    )
    expl_path_collector = VAEWrappedEnvPathCollector(
        train_env,
        policy,
        max_path_length,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        goal_sampling_mode=vae_exploration_goal_sampling_mode,
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=train_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        max_path_length=max_path_length,
        **algo_kwargs,
    )
    algorithm.to(ptu.device)

    if save_video:
        save_vf_heatmap = save_video_kwargs.get('save_vf_heatmap', True)

        if have_no_disentangled_encoder:

            def v_function(obs):
                action = policy.get_actions(obs)
                obs, action = ptu.from_numpy(obs), ptu.from_numpy(action)
                return qf1(obs, action)

            add_heatmap = partial(add_heatmap_img_to_o_dict,
                                  v_function=v_function)
        else:

            def v_function(obs):
                action = policy.get_actions(obs)
                obs, action = ptu.from_numpy(obs), ptu.from_numpy(action)
                return qf1(obs, action, return_individual_q_vals=True)

            add_heatmap = partial(
                add_heatmap_imgs_to_o_dict,
                v_function=v_function,
                vectorized=vectorized,
            )
        rollout_function = rf.create_rollout_function(
            rf.multitask_rollout,
            max_path_length=max_path_length,
            observation_key=observation_key,
            desired_goal_key=desired_goal_key,
            full_o_postprocess_func=add_heatmap if save_vf_heatmap else None,
        )
        img_keys = ['v_vals'] + [
            'v_vals_dim_{}'.format(dim) for dim in range(latent_dim)
        ]
        eval_video_func = get_save_video_function(rollout_function,
                                                  eval_env,
                                                  MakeDeterministic(policy),
                                                  get_extra_imgs=partial(
                                                      get_extra_imgs,
                                                      img_keys=img_keys),
                                                  tag="eval",
                                                  **save_video_kwargs)
        train_video_func = get_save_video_function(rollout_function,
                                                   train_env,
                                                   policy,
                                                   get_extra_imgs=partial(
                                                       get_extra_imgs,
                                                       img_keys=img_keys),
                                                   tag="train",
                                                   **save_video_kwargs)
        algorithm.post_train_funcs.append(eval_video_func)
        algorithm.post_train_funcs.append(train_video_func)
    algorithm.train()
Example #21
0
def her_sac_experiment(
        max_path_length,
        qf_kwargs,
        twin_sac_trainer_kwargs,
        replay_buffer_kwargs,
        policy_kwargs,
        evaluation_goal_sampling_mode,
        exploration_goal_sampling_mode,
        algo_kwargs,
        save_video=True,
        env_id=None,
        env_class=None,
        env_kwargs=None,
        observation_key='state_observation',
        desired_goal_key='state_desired_goal',
        achieved_goal_key='state_achieved_goal',
        # Video parameters
        save_video_kwargs=None,
        exploration_policy_kwargs=None,
        **kwargs
):
    if exploration_policy_kwargs is None:
        exploration_policy_kwargs = {}
    import rlkit.samplers.rollout_functions as rf
    import rlkit.torch.pytorch_util as ptu
    from rlkit.data_management.obs_dict_replay_buffer import \
        ObsDictRelabelingBuffer
    from rlkit.torch.networks import ConcatMlp
    from rlkit.torch.sac.policies import TanhGaussianPolicy
    from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm
    if not save_video_kwargs:
        save_video_kwargs = {}

    if env_kwargs is None:
        env_kwargs = {}

    assert env_id or env_class
    if env_id:
        import gym
        import multiworld
        multiworld.register_all_envs()
        train_env = gym.make(env_id)
        eval_env = gym.make(env_id)
    else:
        eval_env = env_class(**env_kwargs)
        train_env = env_class(**env_kwargs)

    obs_dim = (
            train_env.observation_space.spaces[observation_key].low.size
            + train_env.observation_space.spaces[desired_goal_key].low.size
    )
    action_dim = train_env.action_space.low.size
    qf1 = ConcatMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        **qf_kwargs
    )
    qf2 = ConcatMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        **qf_kwargs
    )
    target_qf1 = ConcatMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        **qf_kwargs
    )
    target_qf2 = ConcatMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        **qf_kwargs
    )
    policy = TanhGaussianPolicy(
        obs_dim=obs_dim,
        action_dim=action_dim,
        **policy_kwargs
    )

    replay_buffer = ObsDictRelabelingBuffer(
        env=train_env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        **replay_buffer_kwargs
    )
    trainer = SACTrainer(
        env=train_env,
        policy=policy,
        qf1=qf1,
        qf2=qf2,
        target_qf1=target_qf1,
        target_qf2=target_qf2,
        **twin_sac_trainer_kwargs
    )
    trainer = HERTrainer(trainer)

    eval_path_collector = GoalConditionedPathCollector(
        eval_env,
        MakeDeterministic(policy),
        max_path_length,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        goal_sampling_mode=evaluation_goal_sampling_mode,
    )
    exploration_policy = create_exploration_policy(
        train_env, policy, **exploration_policy_kwargs)
    expl_path_collector = GoalConditionedPathCollector(
        train_env,
        exploration_policy,
        max_path_length,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        goal_sampling_mode=exploration_goal_sampling_mode,
    )

    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=train_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        max_path_length=max_path_length,
        **algo_kwargs
    )
    algorithm.to(ptu.device)

    if save_video:
        rollout_function = rf.create_rollout_function(
            rf.multitask_rollout,
            max_path_length=max_path_length,
            observation_key=observation_key,
            desired_goal_key=desired_goal_key,
            return_dict_obs=True,
        )
        eval_video_func = get_save_video_function(
            rollout_function,
            eval_env,
            MakeDeterministic(policy),
            tag="eval",
            **save_video_kwargs
        )
        train_video_func = get_save_video_function(
            rollout_function,
            train_env,
            exploration_policy,
            tag="expl",
            **save_video_kwargs
        )

        # algorithm.post_train_funcs.append(plot_buffer_function(
            # save_video_period, 'state_achieved_goal'))
        # algorithm.post_train_funcs.append(plot_buffer_function(
            # save_video_period, 'state_desired_goal'))
        algorithm.post_train_funcs.append(eval_video_func)
        algorithm.post_train_funcs.append(train_video_func)

    algorithm.train()
Example #22
0
        self.hand_and_puck_space = Box(hand_and_puck_low, hand_and_puck_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])

    def step(self, action):
        delta_z = self.hand_z_position - self.data.mocap_pos[0, 2]
        action = np.hstack((action, delta_z))
        return super().step(action)


if __name__ == '__main__':
    register_all_envs()
    env = gym.make('SawyerPushAndReachArenaEnv-v0', goal_type='puck', dense_reward=True, task_agnostic=False)
    for i in range(100):
        # env.reset_to_new_start_state(start_pos=[.2, .8, 0.07, .1, 0.6])
        env.reset_to_new_start_state(start_pos=np.random.uniform(env.goal_low, env.goal_high))
        env.set_goal(np.random.uniform(env.goal_low, env.goal_high))
        for _ in range(10):
            env.step(np.random.uniform([-1, -1], [1, 1]))
        env.render()

Example #23
0
def getdata(variant):
    skewfit_variant = variant['skewfit_variant']
    print('-------------------------------')
    skewfit_preprocess_variant(skewfit_variant)
    skewfit_variant['render'] = True
    vae_environment = get_envs(skewfit_variant)
    print('done loading vae_env')

    env_class = variant.get('env_class', None)
    env_kwargs = variant.get('env_kwargs', None)
    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    test_p = variant.get('test_p', 0.9)
    use_cached = variant.get('use_cached', True)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    show = variant.get('show', False)
    init_camera = variant.get('init_camera', None)
    dataset_path = variant.get('dataset_path', None)
    oracle_dataset_using_set_to_goal = variant.get(
        'oracle_dataset_using_set_to_goal', False)
    random_rollout_data = variant.get('random_rollout_data', False)
    random_and_oracle_policy_data = variant.get(
        'random_and_oracle_policy_data', False)
    random_and_oracle_policy_data_split = variant.get(
        'random_and_oracle_policy_data_split', 0)
    policy_file = variant.get('policy_file', None)
    n_random_steps = variant.get('n_random_steps', 100)
    vae_dataset_specific_env_kwargs = variant.get(
        'vae_dataset_specific_env_kwargs', None)
    save_file_prefix = variant.get('save_file_prefix', None)
    non_presampled_goal_img_is_garbage = variant.get(
        'non_presampled_goal_img_is_garbage', None)
    tag = variant.get('tag', '')
    from multiworld.core.image_env import ImageEnv, unormalize_image
    import rlkit.torch.pytorch_util as ptu
    info = {}
    if dataset_path is not None:
        dataset = load_local_or_remote_file(dataset_path)
        N = dataset.shape[0]
    else:
        if env_kwargs is None:
            env_kwargs = {}
        if save_file_prefix is None:
            save_file_prefix = env_id
        if save_file_prefix is None:
            save_file_prefix = env_class.__name__
        filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format(
            save_file_prefix,
            str(N),
            init_camera.__name__ if init_camera else '',
            imsize,
            random_and_oracle_policy_data_split,
            tag,
        )
        if True:
            now = time.time()

            if env_id is not None:
                import gym
                import multiworld
                multiworld.register_all_envs()
                env = gym.make(env_id)
            else:
                if vae_dataset_specific_env_kwargs is None:
                    vae_dataset_specific_env_kwargs = {}
                for key, val in env_kwargs.items():
                    if key not in vae_dataset_specific_env_kwargs:
                        vae_dataset_specific_env_kwargs[key] = val
                env = env_class(**vae_dataset_specific_env_kwargs)
            if not isinstance(env, ImageEnv):
                print("using(ImageEnv)")
                env = ImageEnv(
                    env,
                    imsize,
                    init_camera=init_camera,
                    transpose=True,
                    normalize=True,
                    non_presampled_goal_img_is_garbage=
                    non_presampled_goal_img_is_garbage,
                )
            else:
                imsize = env.imsize
                env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage
            env.reset()
            info['env'] = env
            if random_and_oracle_policy_data:
                policy_file = load_local_or_remote_file(policy_file)
                policy = policy_file['policy']
                policy.to(ptu.device)
            if random_rollout_data:
                from rlkit.exploration_strategies.ou_strategy import OUStrategy
                policy = OUStrategy(env.action_space)
            dataset = np.zeros((N, imsize * imsize * num_channels),
                               dtype=np.uint8)

            for i in range(10):
                NP = []
                if True:
                    print(i)
                    #print('th step')
                    goal = env.sample_goal()
                    # print("goal___________________________")
                    # print(goal)
                    # print("goal___________________________")
                    env.set_to_goal(goal)
                    obs = env._get_obs()
                    #img = img.reshape(3, imsize, imsize).transpose()
                    # img = img[::-1, :, ::-1]
                    # cv2.imshow('img', img)
                    # cv2.waitKey(1)
                    img_1 = obs['image_observation']
                    img_1 = img_1.reshape(3, imsize, imsize).transpose()
                    NP.append(img_1)
                    if i % 3 == 0:
                        cv2.imshow('img1', img_1)
                        cv2.waitKey(1)
                    #img_1_reconstruct = vae_environment._reconstruct_img(obs['image_observation']).transpose()
                    encoded_1 = vae_environment._get_encoded(
                        obs['image_observation'])
                    print(encoded_1)
                    NP.append(encoded_1)
                    img_1_reconstruct = vae_environment._get_img(
                        encoded_1).transpose()
                    NP.append(img_1_reconstruct)
                    #dataset[i, :] = unormalize_image(img)
                    # img_1 = img_1.reshape(3, imsize, imsize).transpose()
                    if i % 3 == 0:
                        cv2.imshow('img1_reconstruction', img_1_reconstruct)
                        cv2.waitKey(1)
                    env.reset()
                    instr = env.generate_new_state(goal)
                    if i % 3 == 0:
                        print(instr)
                    obs = env._get_obs()
                    # obs = env._get_obs()
                    img_2 = obs['image_observation']
                    img_2 = img_2.reshape(3, imsize, imsize).transpose()
                    NP.append(img_2)
                    if i % 3 == 0:
                        cv2.imshow('img2', img_2)
                        cv2.waitKey(1)
                    #img_2_reconstruct = vae_environment._reconstruct_img(obs['image_observation']).transpose()
                    encoded_2 = vae_environment._get_encoded(
                        obs['image_observation'])
                    NP.append(encoded_2)
                    img_2_reconstruct = vae_environment._get_img(
                        encoded_2).transpose()
                    NP.append(img_2_reconstruct)
                    NP.append(instr)
                    # img_2 = img_2.reshape(3, imsize, imsize).transpose()
                    if i % 3 == 0:
                        cv2.imshow('img2_reconstruct', img_2_reconstruct)
                        cv2.waitKey(1)
                    NP = np.array(NP)
                    idx = str(i)
                    name = "/home/xiaomin/Downloads/IFIG_DATA_1/" + idx + ".npy"
                    np.save(open(name, 'wb'), NP)
                    # radius = input('waiting...')

                # #get the in between functions
            import dill
            import pickle
            get_encoded = dill.dumps(vae_environment._get_encoded)
            with open(
                    "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_encoded_1000_epochs_one_puck.txt",
                    "wb") as fp:
                pickle.dump(get_encoded, fp)
            with open(
                    "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_encoded_1000_epochs_one_puck.txt",
                    "rb") as fp:
                b = pickle.load(fp)
            func_get_encoded = dill.loads(b)
            encoded = func_get_encoded(obs['image_observation'])
            print(encoded)
            print('------------------------------')
            get_img = dill.dumps(vae_environment._get_img)
            with open(
                    "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_img_1000_epochs_one_puck.txt",
                    "wb") as fp:
                pickle.dump(get_img, fp)
            with open(
                    "/home/xiaomin/Downloads/IFIG_encoder_decoder/get_img_1000_epochs_one_puck.txt",
                    "rb") as fp:
                c = pickle.load(fp)
            func_get_img = dill.loads(c)

            img_1_reconstruct = func_get_img(encoded).transpose()
            print(img_1_reconstruct)
            #dataset[i, :] = unormalize_image(img)
            # img_1 = img_1.reshape(3, imsize, imsize).transpose()
            cv2.imshow('test', img_1_reconstruct)
            cv2.waitKey(0)

            print("done making training data", filename, time.time() - now)
            np.save(filename, dataset)
Example #24
0
def _use_disentangled_encoder_distance(
        max_path_length,
        encoder_kwargs,
        disentangled_qf_kwargs,
        qf_kwargs,
        sac_trainer_kwargs,
        replay_buffer_kwargs,
        policy_kwargs,
        evaluation_goal_sampling_mode,
        exploration_goal_sampling_mode,
        algo_kwargs,
        env_id=None,
        env_class=None,
        env_kwargs=None,
        encoder_key_prefix='encoder',
        encoder_input_prefix='state',
        latent_dim=2,
        reward_mode=EncoderWrappedEnv.ENCODER_DISTANCE_REWARD,
        # Video parameters
        save_video=True,
        save_video_kwargs=None,
        save_vf_heatmap=True,
        **kwargs):
    if save_video_kwargs is None:
        save_video_kwargs = {}
    if env_kwargs is None:
        env_kwargs = {}
    assert env_id or env_class
    vectorized = (
        reward_mode == EncoderWrappedEnv.VECTORIZED_ENCODER_DISTANCE_REWARD)

    if env_id:
        import gym
        import multiworld
        multiworld.register_all_envs()
        raw_train_env = gym.make(env_id)
        raw_eval_env = gym.make(env_id)
    else:
        raw_eval_env = env_class(**env_kwargs)
        raw_train_env = env_class(**env_kwargs)

    raw_train_env.goal_sampling_mode = exploration_goal_sampling_mode
    raw_eval_env.goal_sampling_mode = evaluation_goal_sampling_mode

    raw_obs_dim = (
        raw_train_env.observation_space.spaces['state_observation'].low.size)
    action_dim = raw_train_env.action_space.low.size

    encoder = ConcatMlp(input_size=raw_obs_dim,
                        output_size=latent_dim,
                        **encoder_kwargs)
    encoder = Identity()
    encoder.input_size = raw_obs_dim
    encoder.output_size = raw_obs_dim

    np_encoder = EncoderFromNetwork(encoder)
    train_env = EncoderWrappedEnv(
        raw_train_env,
        np_encoder,
        encoder_input_prefix,
        key_prefix=encoder_key_prefix,
        reward_mode=reward_mode,
    )
    eval_env = EncoderWrappedEnv(
        raw_eval_env,
        np_encoder,
        encoder_input_prefix,
        key_prefix=encoder_key_prefix,
        reward_mode=reward_mode,
    )
    observation_key = '{}_observation'.format(encoder_key_prefix)
    desired_goal_key = '{}_desired_goal'.format(encoder_key_prefix)
    achieved_goal_key = '{}_achieved_goal'.format(encoder_key_prefix)
    obs_dim = train_env.observation_space.spaces[observation_key].low.size
    goal_dim = train_env.observation_space.spaces[desired_goal_key].low.size

    def make_qf():
        return DisentangledMlpQf(encoder=encoder,
                                 preprocess_obs_dim=obs_dim,
                                 action_dim=action_dim,
                                 qf_kwargs=qf_kwargs,
                                 vectorized=vectorized,
                                 **disentangled_qf_kwargs)

    qf1 = make_qf()
    qf2 = make_qf()
    target_qf1 = make_qf()
    target_qf2 = make_qf()

    policy = TanhGaussianPolicy(obs_dim=obs_dim + goal_dim,
                                action_dim=action_dim,
                                **policy_kwargs)

    replay_buffer = ObsDictRelabelingBuffer(
        env=train_env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        vectorized=vectorized,
        **replay_buffer_kwargs)
    sac_trainer = SACTrainer(env=train_env,
                             policy=policy,
                             qf1=qf1,
                             qf2=qf2,
                             target_qf1=target_qf1,
                             target_qf2=target_qf2,
                             **sac_trainer_kwargs)
    trainer = HERTrainer(sac_trainer)

    eval_path_collector = GoalConditionedPathCollector(
        eval_env,
        MakeDeterministic(policy),
        max_path_length,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        goal_sampling_mode='env',
    )
    expl_path_collector = GoalConditionedPathCollector(
        train_env,
        policy,
        max_path_length,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        goal_sampling_mode='env',
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=train_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        max_path_length=max_path_length,
        **algo_kwargs)
    algorithm.to(ptu.device)

    if save_video:

        def v_function(obs):
            action = policy.get_actions(obs)
            obs, action = ptu.from_numpy(obs), ptu.from_numpy(action)
            return qf1(obs, action, return_individual_q_vals=True)

        add_heatmap = partial(
            add_heatmap_imgs_to_o_dict,
            v_function=v_function,
            vectorized=vectorized,
        )
        rollout_function = rf.create_rollout_function(
            rf.multitask_rollout,
            max_path_length=max_path_length,
            observation_key=observation_key,
            desired_goal_key=desired_goal_key,
            full_o_postprocess_func=add_heatmap if save_vf_heatmap else None,
        )
        img_keys = ['v_vals'] + [
            'v_vals_dim_{}'.format(dim) for dim in range(latent_dim)
        ]
        eval_video_func = get_save_video_function(rollout_function,
                                                  eval_env,
                                                  MakeDeterministic(policy),
                                                  get_extra_imgs=partial(
                                                      get_extra_imgs,
                                                      img_keys=img_keys),
                                                  tag="eval",
                                                  **save_video_kwargs)
        train_video_func = get_save_video_function(rollout_function,
                                                   train_env,
                                                   policy,
                                                   get_extra_imgs=partial(
                                                       get_extra_imgs,
                                                       img_keys=img_keys),
                                                   tag="train",
                                                   **save_video_kwargs)
        algorithm.post_train_funcs.append(eval_video_func)
        algorithm.post_train_funcs.append(train_video_func)
    algorithm.train()
Example #25
0
# 10/09/19    Tim Liu    changed variable bit_size to num_bits
# 10/14/19    Tim Liu    began modifying for SawyerReach environment
# 10/16/19    Tim Liu    added take_action
# 10/16/19    Tim Liu    changed num_epochs to 150, STEPS_PER to 50, and done_threshold -0.01
# 10/21/19    Tim Liu    added rendering argument

import numpy as np
import tensorflow as tf
# import tensorflow.contrib.slim as slim
import tf_slim as slim
from buffers import Buffer
from matplotlib import pyplot as plt
import multiworld
import gym

multiworld.register_all_envs()  # register the multiworld environment

flags = tf.app.flags
flags.DEFINE_string(
    "HER", "None",
    "different strategies of choosing goal. Possible values are :- future, final, episode or None. If None HER is not used"
)
flags.DEFINE_integer("num_epochs", 150, "Number of epochs to run training for")
flags.DEFINE_integer("log_interval", 5, "Epochs between printing log info")
flags.DEFINE_integer("opt_steps", 40, "Optimization steps in each epoch")
flags.DEFINE_integer("steps_per_episode", 50, "Number of steps per epoch")
flags.DEFINE_bool("render", False, "render the Sawyer arm")

FLAGS = flags.FLAGS

# ************   Define global variables and initialize    ************ #
def get_envs(variant):
    from multiworld.core.image_env import ImageEnv
    from rlkit.envs.vae_wrapper import VAEWrappedEnv
    from rlkit.util.io import load_local_or_remote_file

    render = variant.get('render', False)
    vae_path = variant.get("vae_path", None)
    reward_params = variant.get("reward_params", dict())
    init_camera = variant.get("init_camera", None)
    do_state_exp = variant.get("do_state_exp", False)
    presample_goals = variant.get('presample_goals', False)
    presample_image_goals_only = variant.get('presample_image_goals_only',
                                             False)
    presampled_goals_path = variant.get('presampled_goals_path', None)

    vae = load_local_or_remote_file(
        vae_path) if type(vae_path) is str else vae_path
    if 'env_id' in variant:
        import gym
        import multiworld
        multiworld.register_all_envs()
        env = gym.make(variant['env_id'])
    else:
        env = variant["env_class"](**variant['env_kwargs'])
    if not do_state_exp:
        if isinstance(env, ImageEnv):
            image_env = env
        else:
            image_env = ImageEnv(
                env,
                variant.get('imsize'),
                init_camera=init_camera,
                transpose=True,
                normalize=True,
            )
        if presample_goals:
            """
            This will fail for online-parallel as presampled_goals will not be
            serialized. Also don't use this for online-vae.
            """
            if presampled_goals_path is None:
                image_env.non_presampled_goal_img_is_garbage = True
                vae_env = VAEWrappedEnv(image_env,
                                        vae,
                                        imsize=image_env.imsize,
                                        decode_goals=render,
                                        render_goals=render,
                                        render_rollouts=render,
                                        reward_params=reward_params,
                                        **variant.get('vae_wrapped_env_kwargs',
                                                      {}))
                presampled_goals = variant['generate_goal_dataset_fctn'](
                    env=vae_env,
                    env_id=variant.get('env_id', None),
                    **variant['goal_generation_kwargs'])
                del vae_env
            else:
                presampled_goals = load_local_or_remote_file(
                    presampled_goals_path).item()
            del image_env
            image_env = ImageEnv(env,
                                 variant.get('imsize'),
                                 init_camera=init_camera,
                                 transpose=True,
                                 normalize=True,
                                 presampled_goals=presampled_goals,
                                 **variant.get('image_env_kwargs', {}))
            vae_env = VAEWrappedEnv(image_env,
                                    vae,
                                    imsize=image_env.imsize,
                                    decode_goals=render,
                                    render_goals=render,
                                    render_rollouts=render,
                                    reward_params=reward_params,
                                    presampled_goals=presampled_goals,
                                    **variant.get('vae_wrapped_env_kwargs',
                                                  {}))
            print("Presampling all goals only")
        else:
            vae_env = VAEWrappedEnv(image_env,
                                    vae,
                                    imsize=image_env.imsize,
                                    decode_goals=render,
                                    render_goals=render,
                                    render_rollouts=render,
                                    reward_params=reward_params,
                                    **variant.get('vae_wrapped_env_kwargs',
                                                  {}))
            if presample_image_goals_only:
                presampled_goals = variant['generate_goal_dataset_fctn'](
                    image_env=vae_env.wrapped_env,
                    **variant['goal_generation_kwargs'])
                image_env.set_presampled_goals(presampled_goals)
                print("Presampling image goals only")
            else:
                print("Not using presampled goals")

        env = vae_env

    return env
Example #27
0
def generate_vae_dataset(variant):
    print(variant)
    from tqdm import tqdm
    env_class = variant.get('env_class', None)
    env_kwargs = variant.get('env_kwargs', None)
    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    batch_size = variant.get('batch_size', 128)
    test_p = variant.get('test_p', 0.9)
    use_cached = variant.get('use_cached', True)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    show = variant.get('show', False)
    init_camera = variant.get('init_camera', None)
    dataset_path = variant.get('dataset_path', None)
    augment_data = variant.get('augment_data', False)
    data_filter_fn = variant.get('data_filter_fn', lambda x: x)
    delete_after_loading = variant.get('delete_after_loading', False)
    oracle_dataset_using_set_to_goal = variant.get(
        'oracle_dataset_using_set_to_goal', False)
    random_rollout_data = variant.get('random_rollout_data', False)
    random_rollout_data_set_to_goal = variant.get(
        'random_rollout_data_set_to_goal', True)
    random_and_oracle_policy_data = variant.get(
        'random_and_oracle_policy_data', False)
    random_and_oracle_policy_data_split = variant.get(
        'random_and_oracle_policy_data_split', 0)
    policy_file = variant.get('policy_file', None)
    n_random_steps = variant.get('n_random_steps', 100)
    vae_dataset_specific_env_kwargs = variant.get(
        'vae_dataset_specific_env_kwargs', None)
    save_file_prefix = variant.get('save_file_prefix', None)
    non_presampled_goal_img_is_garbage = variant.get(
        'non_presampled_goal_img_is_garbage', None)

    conditional_vae_dataset = variant.get('conditional_vae_dataset', False)
    use_env_labels = variant.get('use_env_labels', False)
    use_linear_dynamics = variant.get('use_linear_dynamics', False)
    enviorment_dataset = variant.get('enviorment_dataset', False)
    save_trajectories = variant.get('save_trajectories', False)
    save_trajectories = save_trajectories or use_linear_dynamics or conditional_vae_dataset
    tag = variant.get('tag', '')

    assert N % n_random_steps == 0, "Fix N/horizon or dataset generation will fail"

    from multiworld.core.image_env import ImageEnv, unormalize_image
    import rlkit.torch.pytorch_util as ptu
    from rlkit.util.io import load_local_or_remote_file
    from rlkit.data_management.dataset import (
        TrajectoryDataset, ImageObservationDataset, InitialObservationDataset,
        EnvironmentDataset, ConditionalDynamicsDataset,
        InitialObservationNumpyDataset, InfiniteBatchLoader,
        InitialObservationNumpyJitteringDataset)
    info = {}
    use_test_dataset = False
    if dataset_path is not None:
        if type(dataset_path) == str:
            dataset = load_local_or_remote_file(
                dataset_path, delete_after_loading=delete_after_loading)
            dataset = dataset.item()
            N = dataset['observations'].shape[0] * dataset[
                'observations'].shape[1]
            n_random_steps = dataset['observations'].shape[1]
        if isinstance(dataset_path, list):
            dataset = concatenate_datasets(dataset_path)
            N = dataset['observations'].shape[0] * dataset[
                'observations'].shape[1]
            n_random_steps = dataset['observations'].shape[1]
        if isinstance(dataset_path, dict):

            if type(dataset_path['train']) == str:
                dataset = load_local_or_remote_file(
                    dataset_path['train'],
                    delete_after_loading=delete_after_loading)
                dataset = dataset.item()
            elif isinstance(dataset_path['train'], list):
                dataset = concatenate_datasets(dataset_path['train'])

            if type(dataset_path['test']) == str:
                test_dataset = load_local_or_remote_file(
                    dataset_path['test'],
                    delete_after_loading=delete_after_loading)
                test_dataset = test_dataset.item()
            elif isinstance(dataset_path['test'], list):
                test_dataset = concatenate_datasets(dataset_path['test'])

            N = dataset['observations'].shape[0] * dataset[
                'observations'].shape[1]
            n_random_steps = dataset['observations'].shape[1]
            use_test_dataset = True
    else:
        if env_kwargs is None:
            env_kwargs = {}
        if save_file_prefix is None:
            save_file_prefix = env_id
        if save_file_prefix is None:
            save_file_prefix = env_class.__name__
        filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format(
            save_file_prefix,
            str(N),
            init_camera.__name__
            if init_camera and hasattr(init_camera, '__name__') else '',
            imsize,
            random_and_oracle_policy_data_split,
            tag,
        )
        if use_cached and osp.isfile(filename):
            dataset = load_local_or_remote_file(
                filename, delete_after_loading=delete_after_loading)
            if conditional_vae_dataset:
                dataset = dataset.item()
            print("loaded data from saved file", filename)
        else:
            now = time.time()
            if env_id is not None:
                import gym
                import multiworld
                multiworld.register_all_envs()
                env = gym.make(env_id)
            else:
                if vae_dataset_specific_env_kwargs is None:
                    vae_dataset_specific_env_kwargs = {}
                for key, val in env_kwargs.items():
                    if key not in vae_dataset_specific_env_kwargs:
                        vae_dataset_specific_env_kwargs[key] = val
                env = env_class(**vae_dataset_specific_env_kwargs)
            if not isinstance(env, ImageEnv):
                env = ImageEnv(
                    env,
                    imsize,
                    init_camera=init_camera,
                    transpose=True,
                    normalize=True,
                    non_presampled_goal_img_is_garbage=
                    non_presampled_goal_img_is_garbage,
                )
            else:
                imsize = env.imsize
                env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage
            env.reset()
            info['env'] = env
            if random_and_oracle_policy_data:
                policy_file = load_local_or_remote_file(policy_file)
                policy = policy_file['policy']
                policy.to(ptu.device)
            if random_rollout_data:
                from rlkit.exploration_strategies.ou_strategy import OUStrategy
                policy = OUStrategy(env.action_space)

            if save_trajectories:
                dataset = {
                    'observations':
                    np.zeros((N // n_random_steps, n_random_steps,
                              imsize * imsize * num_channels),
                             dtype=np.uint8),
                    'actions':
                    np.zeros((N // n_random_steps, n_random_steps,
                              env.action_space.shape[0]),
                             dtype=np.float),
                    'env':
                    np.zeros(
                        (N // n_random_steps, imsize * imsize * num_channels),
                        dtype=np.uint8),
                }
            else:
                dataset = np.zeros((N, imsize * imsize * num_channels),
                                   dtype=np.uint8)
            labels = []
            for i in tqdm(range(N)):
                if random_and_oracle_policy_data:
                    num_random_steps = int(N *
                                           random_and_oracle_policy_data_split)
                    if i < num_random_steps:
                        env.reset()
                        for _ in range(n_random_steps):
                            obs = env.step(env.action_space.sample())[0]
                    else:
                        obs = env.reset()
                        policy.reset()
                        for _ in range(n_random_steps):
                            policy_obs = np.hstack((
                                obs['state_observation'],
                                obs['state_desired_goal'],
                            ))
                            action, _ = policy.get_action(policy_obs)
                            obs, _, _, _ = env.step(action)
                elif random_rollout_data:  #ADD DATA WHERE JUST PUCK MOVES
                    if i % n_random_steps == 0:
                        env.reset()
                        policy.reset()
                        env_img = env._get_obs()['image_observation']
                        if random_rollout_data_set_to_goal:
                            env.set_to_goal(env.get_goal())
                    obs = env._get_obs()
                    u = policy.get_action_from_raw_action(
                        env.action_space.sample())
                    env.step(u)
                elif oracle_dataset_using_set_to_goal:
                    print(i)

                    goal = env.sample_goal()
                    env.set_to_goal(goal)
                    obs = env._get_obs()
                else:
                    env.reset()
                    for _ in range(n_random_steps):
                        obs = env.step(env.action_space.sample())[0]

                img = obs['image_observation']
                if use_env_labels:
                    labels.append(obs['label'])
                if save_trajectories:
                    dataset['observations'][
                        i // n_random_steps,
                        i % n_random_steps, :] = unormalize_image(img)
                    dataset['actions'][i // n_random_steps,
                                       i % n_random_steps, :] = u
                    dataset['env'][i // n_random_steps, :] = unormalize_image(
                        env_img)
                else:
                    dataset[i, :] = unormalize_image(img)

                if show:
                    img = img.reshape(3, imsize, imsize).transpose()
                    img = img[::-1, :, ::-1]
                    cv2.imshow('img', img)
                    cv2.waitKey(1)
                    # radius = input('waiting...')
            print("done making training data", filename, time.time() - now)
            np.save(filename, dataset)
            #np.save(filename[:-4] + 'labels.npy', np.array(labels))

    info['train_labels'] = []
    info['test_labels'] = []

    dataset = data_filter_fn(dataset)
    if use_linear_dynamics and conditional_vae_dataset:
        num_trajectories = N // n_random_steps
        n = int(num_trajectories * test_p)
        train_dataset = ConditionalDynamicsDataset({
            'observations':
            dataset['observations'][:n, :, :],
            'actions':
            dataset['actions'][:n, :, :],
            'env':
            dataset['env'][:n, :]
        })
        test_dataset = ConditionalDynamicsDataset({
            'observations':
            dataset['observations'][n:, :, :],
            'actions':
            dataset['actions'][n:, :, :],
            'env':
            dataset['env'][n:, :]
        })

        num_trajectories = N // n_random_steps
        n = int(num_trajectories * test_p)
        indices = np.arange(num_trajectories)
        np.random.shuffle(indices)
        train_i, test_i = indices[:n], indices[n:]

        try:
            train_dataset = ConditionalDynamicsDataset({
                'observations':
                dataset['observations'][train_i, :, :],
                'actions':
                dataset['actions'][train_i, :, :],
                'env':
                dataset['env'][train_i, :]
            })
            test_dataset = ConditionalDynamicsDataset({
                'observations':
                dataset['observations'][test_i, :, :],
                'actions':
                dataset['actions'][test_i, :, :],
                'env':
                dataset['env'][test_i, :]
            })
        except:
            train_dataset = ConditionalDynamicsDataset({
                'observations':
                dataset['observations'][train_i, :, :],
                'actions':
                dataset['actions'][train_i, :, :],
            })
            test_dataset = ConditionalDynamicsDataset({
                'observations':
                dataset['observations'][test_i, :, :],
                'actions':
                dataset['actions'][test_i, :, :],
            })
    elif use_linear_dynamics:
        num_trajectories = N // n_random_steps
        n = int(num_trajectories * test_p)
        train_dataset = TrajectoryDataset({
            'observations':
            dataset['observations'][:n, :, :],
            'actions':
            dataset['actions'][:n, :, :]
        })
        test_dataset = TrajectoryDataset({
            'observations':
            dataset['observations'][n:, :, :],
            'actions':
            dataset['actions'][n:, :, :]
        })
    elif enviorment_dataset:
        n = int(n_random_steps * test_p)
        train_dataset = EnvironmentDataset({
            'observations':
            dataset['observations'][:, :n, :],
        })
        test_dataset = EnvironmentDataset({
            'observations':
            dataset['observations'][:, n:, :],
        })
    elif conditional_vae_dataset:
        num_trajectories = N // n_random_steps
        n = int(num_trajectories * test_p)
        indices = np.arange(num_trajectories)
        np.random.shuffle(indices)
        train_i, test_i = indices[:n], indices[n:]

        if augment_data:
            dataset_class = InitialObservationNumpyJitteringDataset
        else:
            dataset_class = InitialObservationNumpyDataset

        if 'env' not in dataset:
            dataset['env'] = dataset['observations'][:, 0]
        if use_test_dataset and ('env' not in test_dataset):
            test_dataset['env'] = test_dataset['observations'][:, 0]

        if use_test_dataset:
            train_dataset = dataset_class({
                'observations':
                dataset['observations'],
                'env':
                dataset['env']
            })

            test_dataset = dataset_class({
                'observations':
                test_dataset['observations'],
                'env':
                test_dataset['env']
            })
        else:
            train_dataset = dataset_class({
                'observations':
                dataset['observations'][train_i, :, :],
                'env':
                dataset['env'][train_i, :]
            })

            test_dataset = dataset_class({
                'observations':
                dataset['observations'][test_i, :, :],
                'env':
                dataset['env'][test_i, :]
            })

        train_batch_loader_kwargs = variant.get(
            'train_batch_loader_kwargs',
            dict(
                batch_size=batch_size,
                num_workers=0,
            ))
        test_batch_loader_kwargs = variant.get(
            'test_batch_loader_kwargs',
            dict(
                batch_size=batch_size,
                num_workers=0,
            ))

        train_data_loader = data.DataLoader(train_dataset,
                                            shuffle=True,
                                            drop_last=True,
                                            **train_batch_loader_kwargs)
        test_data_loader = data.DataLoader(test_dataset,
                                           shuffle=True,
                                           drop_last=True,
                                           **test_batch_loader_kwargs)

        train_dataset = InfiniteBatchLoader(train_data_loader)
        test_dataset = InfiniteBatchLoader(test_data_loader)
    else:
        n = int(N * test_p)
        train_dataset = ImageObservationDataset(dataset[:n, :])
        test_dataset = ImageObservationDataset(dataset[n:, :])
    return train_dataset, test_dataset, info
Example #28
0
def experiment(variant):
    import multiworld
    multiworld.register_all_envs()
    eval_env = gym.make('SawyerPushXYZEnv-v0')
    expl_env = gym.make('SawyerPushXYZEnv-v0')
    observation_key = 'state_observation'
    desired_goal_key = 'state_desired_goal'
    achieved_goal_key = desired_goal_key.replace("desired", "achieved")
    es = GaussianAndEpislonStrategy(
        action_space=expl_env.action_space,
        max_sigma=.2,
        min_sigma=.2,  # constant sigma
        epsilon=.3,
    )
    obs_dim = expl_env.observation_space.spaces['observation'].low.size
    goal_dim = expl_env.observation_space.spaces['desired_goal'].low.size
    action_dim = expl_env.action_space.low.size
    qf1 = FlattenMlp(
        input_size=obs_dim + goal_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    qf2 = FlattenMlp(
        input_size=obs_dim + goal_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    target_qf1 = FlattenMlp(
        input_size=obs_dim + goal_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    target_qf2 = FlattenMlp(
        input_size=obs_dim + goal_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs']
    )
    policy = TanhMlpPolicy(
        input_size=obs_dim + goal_dim,
        output_size=action_dim,
        **variant['policy_kwargs']
    )
    target_policy = TanhMlpPolicy(
        input_size=obs_dim + goal_dim,
        output_size=action_dim,
        **variant['policy_kwargs']
    )
    expl_policy = PolicyWrappedWithExplorationStrategy(
        exploration_strategy=es,
        policy=policy,
    )
    replay_buffer = ObsDictRelabelingBuffer(
        env=eval_env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        **variant['replay_buffer_kwargs']
    )
    trainer = TD3Trainer(
        policy=policy,
        qf1=qf1,
        qf2=qf2,
        target_qf1=target_qf1,
        target_qf2=target_qf2,
        target_policy=target_policy,
        **variant['trainer_kwargs']
    )
    trainer = HERTrainer(trainer)
    eval_path_collector = GoalConditionedPathCollector(
        eval_env,
        policy,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
    )
    expl_path_collector = GoalConditionedPathCollector(
        expl_env,
        expl_policy,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        **variant['algo_kwargs']
    )
    algorithm.to(ptu.device)
    algorithm.train()
Example #29
0
def generate_sawyerhurdle_dataset(variant,
                                  segmented=False,
                                  segmentation_method='unet'):
    from multiworld.core.image_env import ImageEnv, unormalize_image

    env_id = variant.get('env_id', None)
    N = variant.get('N', 10000)
    test_p = variant.get('test_p', 0.9)
    imsize = variant.get('imsize', 84)
    num_channels = variant.get('num_channels', 3)
    init_camera = variant.get('init_camera', None)
    segmentation_kwargs = variant.get('segmentation_kwargs', {})

    pjhome = os.environ['PJHOME']
    seg_name = 'seg-' + segmentation_method if segmented else 'no-seg'
    data_file_path = osp.join(pjhome, 'data/local/pre-train-vae',
                              '{}-{}-{}.npy'.format(env_id, seg_name, N))
    puck_pos_path = osp.join(
        pjhome, 'data/local/pre-train-vae',
        '{}-{}-{}-puck-pos.npy'.format(env_id, seg_name, N))

    if osp.exists(data_file_path):
        all_data = np.load(data_file_path)
        if len(all_data) >= N:
            print("load stored data at: ", data_file_path)
            n = int(len(all_data) * test_p)
            train_dataset = all_data[:n]
            test_dataset = all_data[n:]
            puck_pos = np.load(puck_pos_path)
            info = {'puck_pos': puck_pos}
            return train_dataset, test_dataset, info

    if segmented:
        print("generating vae dataset with segmented images using method: ",
              segmentation_method)
        if segmentation_method == 'unet':
            segment_func = segment_image_unet
        else:
            raise NotImplementedError
    else:
        print("generating vae dataset with original images")

    assert env_id is not None
    import gym
    import multiworld
    multiworld.register_all_envs()
    env = gym.make(env_id)

    if not isinstance(env, ImageEnv):
        env = ImageEnv(
            env,
            imsize,
            init_camera=init_camera,
            transpose=True,
            normalize=True,
        )

    info = {}
    env.reset()
    info['env'] = env

    dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8)
    puck_pos = np.zeros((N, 2), dtype=np.float)

    for i in range(N):
        print("sawyer hurdle custom vae data set generation, number: ", i)
        if env_id == 'SawyerPushHurdle-v0':
            obs, puck_p = _generate_sawyerhurdle_dataset(env,
                                                         return_puck_pos=True)
        elif env_id == 'SawyerPushHurdleMiddle-v0':
            obs, puck_p = _generate_sawyerhurdlemiddle_dataset(
                env, return_puck_pos=True)
        else:
            raise NotImplementedError
        img = obs[
            'image_observation']  # NOTE yufei: this is already normalized image, of detype np.float64.

        if segmented:
            dataset[i, :] = segment_func(img,
                                         normalize=False,
                                         **segmentation_kwargs)
        else:
            dataset[i, :] = unormalize_image(img)
        puck_pos[i] = puck_p

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]

    info['puck_pos'] = puck_pos

    if N >= 2000:
        print('save data to: ', data_file_path)
        all_data = np.concatenate([train_dataset, test_dataset], axis=0)
        np.save(data_file_path, all_data)
        np.save(puck_pos_path, puck_pos)

    return train_dataset, test_dataset, info