Ejemplo n.º 1
0
def load_vae_meta_data(variant):
    from railrl.misc.asset_loader import local_path_from_s3_or_local_path
    import os.path as osp
    import json

    rl_variant = variant['rl_variant']
    if 'vae_path' in rl_variant:
        local_path = local_path_from_s3_or_local_path(
            osp.join(rl_variant['vae_path'], 'variant.json'))
        with open(local_path) as f:
            data = json.load(f)
            variant['vae_exp_prefix'] = data['exp_prefix']
            variant['vae_exp_id'] = data['exp_id']
            variant['vae_seed'] = data['seed']
            if 'vae_variant' in data:
                variant['vae_variant'] = data['vae_variant']
            else:
                variant['vae_variant'] = data['train_vae_variant']
    if 'reproj_vae_path' in rl_variant:
        local_path = local_path_from_s3_or_local_path(
            osp.join(rl_variant['reproj_vae_path'], 'variant.json'))
        with open(local_path) as f:
            data = json.load(f)
            variant['reproj_vae_exp_prefix'] = data['exp_prefix']
            variant['reproj_vae_exp_id'] = data['exp_id']
            variant['reproj_vae_seed'] = data['seed']
Ejemplo n.º 2
0
    def update_networks_func(algo, epoch):
        if epoch % algo.epoch_freq != 0 and epoch != algo.num_epochs - 1:
            exit()
        if epoch == algo.num_epochs - 1:
            filename = local_path_from_s3_or_local_path(osp.join(variant['ckpt'], 'params.pkl'))
        else:
            filename = local_path_from_s3_or_local_path(osp.join(variant['ckpt'], 'itr_%d.pkl' % epoch))
        print("updating networks from {}".format(filename))
        data = joblib.load(filename)
        assert (data["epoch"] == epoch)
        algo.qf1 = data['qf1']
        algo.qf2 = data['qf2']
        algo.policy = data['trained_policy']
        algo.target_policy = data["target_policy"]
        algo.exploration_policy = data["exploration_policy"]

        if 'n_env_steps_total' in data:
            algo._n_env_steps_total = data["n_env_steps_total"]

        if isinstance(algo.eval_policy, SubgoalPlanner):
            algo.eval_policy.qf = algo.qf1
            algo.eval_policy.mf_policy = algo.policy
        else:
            algo.eval_policy = data["eval_policy"]

        if ptu.gpu_enabled():
            algo.cuda()
        if hasattr(algo, "update_sampler_and_rollout_function"):
            algo.update_sampler_and_rollout_function()
Ejemplo n.º 3
0
def generate_vae_dataset(
        N=10000, test_p=0.9, use_cached=True, imsize=84, show=False,
        dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerDoorEnv, env_kwargs=None,
        init_camera=sawyer_door_env_camera_v2,
):
    if policy_path is not None:
        filename = "/tmp/sawyer_door_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str(N) + ".npy"
    elif action_space_sampling:
        filename = "/tmp/sawyer_door_pull_open_zoomed_in_action_space_sampling" + str(N) + ".npy"
    else:
        filename = "/tmp/sawyer_door_pull_open" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env, imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        info['env'] = env
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        env.wrapped_env.reset()
        dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8)
        for i in range(N):
            if i % 20==0:
                env.reset_model()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(
                    action
                )
            # env.set_to_goal_angle(env.get_goal()['state_desired_goal'])
            img = env._get_flat_img()
            dataset[i, :] = unormalize_image(img)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 4
0
def process_variant(variant):
    rl_variant = variant['rl_variant']

    if args.debug:
        rl_variant['algo_kwargs']['base_kwargs']['num_rollouts_per_eval'] = 1
        rl_variant['vis_kwargs']['num_samples_for_video'] = 2
        rl_variant['vae_wrapped_env_kwargs'][
            'num_samples_for_latent_histogram'] = 100

    assert rl_variant['eval_algo'] in [
        'mf-tdm',
        'mb-tdm',
    ]

    if 'ckpt_and_vae_path' in rl_variant:
        rl_variant['ckpt'] = rl_variant['ckpt_and_vae_path'][0]
        rl_variant['vae_path'] = rl_variant['ckpt_and_vae_path'][1]
        del rl_variant['ckpt_and_vae_path']

    update_variant_from_ckpt(variant)
    update_variant_from_vae(variant)

    local_path = local_path_from_s3_or_local_path(
        osp.join(rl_variant['ckpt'], 'variant.json'))
    with open(local_path) as f:
        ckpt_variant = json.load(f)
    ckpt_rl_variant = ckpt_variant.get('rl_variant', ckpt_variant)
    if 'mb' in rl_variant['eval_algo']:
        if 'max_tau' not in rl_variant['algo_kwargs']['tdm_kwargs']:
            rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] = \
                rl_variant['algo_kwargs']['base_kwargs']['max_path_length'] - 1
            if 'extra_time' in rl_variant['SubgoalPlanner_kwargs']:
                rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] -= \
                    rl_variant['SubgoalPlanner_kwargs']['extra_time']
        if 'max_tau_per_subprob' not in rl_variant['SubgoalPlanner_kwargs']:
            rl_variant['SubgoalPlanner_kwargs']['max_tau_per_subprob'] = \
                ckpt_rl_variant['algo_kwargs']['tdm_kwargs']['max_tau']
    else:
        if 'max_tau' not in rl_variant['algo_kwargs']['tdm_kwargs']:
            rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] = \
                ckpt_rl_variant['algo_kwargs']['tdm_kwargs']['max_tau']

    eval_algo = rl_variant['eval_algo']
    if eval_algo == 'mb-tdm':
        rl_variant['eval_policy'] = 'SubgoalPlanner'
        rl_variant['do_state_exp'] = False
    elif eval_algo == 'mf-tdm':
        pass

    variant['eval_algo_base'] = rl_variant['eval_algo']
    variant['eval_algo_tag'] = 'mt=' + str(
        rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'])
    if 'mb' in rl_variant['eval_algo']:
        variant['eval_algo_tag'] = '-'.join([
            variant['eval_algo_tag'], 'mtps=' +
            str(rl_variant['SubgoalPlanner_kwargs']['max_tau_per_subprob'])
        ])
    variant['eval_algo'] = '-'.join(
        [variant['eval_algo_base'], variant['eval_algo_tag']])
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    init_camera=sawyer_init_camera_zoomed_in,
    dataset_path=None,
    env_kwargs=None,
):
    if env_kwargs is None:
        env_kwargs = {}
    filename = "/tmp/sawyer_push_variable{}_{}.npy".format(
        str(N),
        init_camera.__name__,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = SawyerPushXYVariableEnv(hide_goal=True, **env_kwargs)
        env = ImageMujocoEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            goal = env.sample_goal_for_rollout()
            hand_pos = env.sample_hand_xy()
            env.set_to_goal(goal, reset_hand=False)
            env.set_hand_xy(hand_pos)
            # img = env.reset()
            img = env.step(env.action_space.sample())[0]
            dataset[i, :] = img
            if show:
                img = img.reshape(3, 84, 84).transpose()
                img = img[::-1, :, ::-1]
                cv2.imshow('img', img)
                cv2.waitKey(1)
                # radius = input('waiting...')
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    init_camera=sawyer_init_camera_zoomed_in,
    dataset_path=None,
    env_kwargs=None,
):
    """
    Oracle means that we use `set_to_goal` rather than doing random rollouts.
    """
    if env_kwargs is None:
        env_kwargs = {}
    filename = "/tmp/sawyer_reset_free_push{}_{}.npy".format(
        str(N),
        init_camera.__name__,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = SawyerResetFreePushEnv(hide_goal=True, **env_kwargs)
        env = ImageMujocoEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            goal = env.sample_goal_for_rollout()
            env.set_to_goal(goal)
            img = env.reset()
            dataset[i, :] = img
            if show:
                img = img.reshape(3, 84, 84).transpose()
                img = img[::-1, :, ::-1]
                cv2.imshow('img', img)
                cv2.waitKey(1)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 7
0
def update_variant_from_ckpt(variant):
    rl_variant = variant['rl_variant']
    local_path = local_path_from_s3_or_local_path(osp.join(rl_variant['ckpt'], 'variant.json'))
    with open(local_path) as f:
        ckpt_variant = json.load(f)
    ckpt_rl_variant = ckpt_variant.get('rl_variant', None)
    if ckpt_rl_variant is None:
        ckpt_rl_variant = ckpt_variant.get('grill_variant', ckpt_variant) # backwards compatibility

    env_kwargs = ckpt_variant['env_kwargs']
    env_kwargs.update(variant['env_kwargs'])
    variant['env_kwargs'] = env_kwargs

    rl_variant['algorithm'] = ckpt_rl_variant['algorithm']
    variant['ckpt_exp_prefix'] = ckpt_variant['exp_prefix']
    variant['ckpt_exp_id'] = ckpt_variant['exp_id']
    variant['ckpt_seed'] = ckpt_variant['seed']

    if 'vae_path' in ckpt_rl_variant:
        rl_variant['vae_path'] = ckpt_rl_variant['vae_path']

    if 'vae_variant' in ckpt_variant:
        variant['vae_variant'] = ckpt_variant['vae_variant']
    elif 'train_vae_variant' in ckpt_variant:  # backwards compatibility
        variant['vae_variant'] = ckpt_variant['train_vae_variant']

    if 'num_updates_per_env_step' in ckpt_rl_variant['algo_kwargs']['base_kwargs']:
        rl_variant['algo_kwargs']['base_kwargs']['num_updates_per_env_step'] = \
            ckpt_rl_variant['algo_kwargs']['base_kwargs']['num_updates_per_env_step']

    if 'max_path_length' not in rl_variant['algo_kwargs']['base_kwargs']:
        rl_variant['algo_kwargs']['base_kwargs']['max_path_length'] = \
            ckpt_rl_variant['algo_kwargs']['base_kwargs']['max_path_length']

    if rl_variant.get('test_ckpt', False) and rl_variant['algo_kwargs']['base_kwargs']['num_epochs'] == 1:
        rl_variant['algo_kwargs']['base_kwargs']['num_epochs'] = \
            ckpt_rl_variant['algo_kwargs']['base_kwargs']['num_epochs']

    rl_variant['exploration_type'] = ckpt_rl_variant['exploration_type']
    rl_variant['exploration_noise'] = ckpt_rl_variant['exploration_noise']

    if 'reward_params' in ckpt_rl_variant:
        rl_variant['reward_params'] = ckpt_rl_variant['reward_params']
    if 'vae_wrapped_env_kwargs' in ckpt_rl_variant:
        for k in ckpt_rl_variant['vae_wrapped_env_kwargs']:
            if k in ['test_noisy_encoding', 'num_samples_for_latent_histogram'] \
                    and k in rl_variant['vae_wrapped_env_kwargs']:
                pass
            else:
                rl_variant['vae_wrapped_env_kwargs'][k] = \
                    ckpt_rl_variant['vae_wrapped_env_kwargs'][k]

    rl_variant['algo_kwargs']['base_kwargs']['reward_scale'] = \
        ckpt_rl_variant['algo_kwargs']['base_kwargs'].get('reward_scale', 1.0)

    if 'env_class' not in variant and 'env_id' not in variant and 'env_id' in ckpt_variant:
        variant['env_id'] = ckpt_variant['env_id'].replace('Train', 'Test')
Ejemplo n.º 8
0
def compute_sampled_latents(vae_env):
    vae_env.num_active_dims = 0
    for std in vae_env.vae.dist_std:
        if std > 0.15:
            vae_env.num_active_dims += 1

    vae_env.active_dims = vae_env.vae.dist_std.argsort()[-vae_env.num_active_dims:][::-1]
    vae_env.inactive_dims = vae_env.vae.dist_std.argsort()[:-vae_env.num_active_dims][::-1]

    if vae_env.use_vae_dataset and vae_env.vae_dataset_path is not None:
        from multiworld.core.image_env import normalize_image
        from railrl.misc.asset_loader import local_path_from_s3_or_local_path
        filename = local_path_from_s3_or_local_path(vae_env.vae_dataset_path)
        dataset = np.load(filename).item()
        vae_env.num_samples_for_latent_histogram = min(dataset['next_obs'].shape[0], vae_env.num_samples_for_latent_histogram)
        sampled_idx = np.random.choice(dataset['next_obs'].shape[0], vae_env.num_samples_for_latent_histogram)
        if vae_env.vae_input_key_prefix == 'state':
            vae_dataset_samples = dataset['next_obs'][sampled_idx]
        else:
            vae_dataset_samples = normalize_image(dataset['next_obs'][sampled_idx])
        del dataset
    else:
        vae_dataset_samples = None

    n = vae_env.num_samples_for_latent_histogram

    if vae_dataset_samples is not None:
        imgs = vae_dataset_samples
    else:
        if vae_env.vae_input_key_prefix == 'state':
            imgs = vae_env.wrapped_env.wrapped_env.sample_goals(n)['state_desired_goal']
        else:
            imgs = vae_env.wrapped_env.sample_goals(n)['image_desired_goal']

    batch_size = 2500
    latents, latents_noisy, latents_reproj = None, None, None
    for i in range(0, n, batch_size):
        batch_latents_mean, batch_latents_logvar = vae_env.encode_imgs(imgs[i:i + batch_size], clip_std=False)
        batch_latents_noisy = vae_env.reparameterize(batch_latents_mean, batch_latents_logvar, noisy=True)
        if vae_env.use_reprojection_network:
            batch_latents_reproj = ptu.get_numpy(vae_env.reproject_encoding(ptu.np_to_var(batch_latents_noisy)))
        if latents is None:
            latents = batch_latents_mean
            latents_noisy = batch_latents_noisy
            if vae_env.use_reprojection_network:
                latents_reproj = batch_latents_reproj
        else:
            latents = np.concatenate((latents, batch_latents_mean), axis=0)
            latents_noisy = np.concatenate((latents_noisy, batch_latents_noisy), axis=0)
            if vae_env.use_reprojection_network:
                latents_reproj = np.concatenate((latents_reproj, batch_latents_reproj), axis=0)

    vae_env.sampled_latents = latents
    vae_env.sampled_latents_noisy = latents_noisy
    vae_env.sampled_latents_reproj = latents_reproj
def generate_vae_dataset(
        env_class,
        N=10000,
        test_p=0.9,
        use_cached=True,
        observation_key='observation',
        init_camera=None,
        dataset_path=None,
        env_kwargs=None,
        oracle_dataset=False,
        n_random_steps=100,
):
    if env_kwargs is None:
        env_kwargs = {}
    filename = "/tmp/{}_{}_{}_oracle{}.npy".format(
        env_class.__name__,
        str(N),
        init_camera.__name__ if init_camera else '',
        oracle_dataset,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = env_class(**env_kwargs)
        env.reset()
        info['env'] = env
        observation_dim = env.observation_space.spaces[observation_key].low.size
        dataset = np.zeros((N, observation_dim))
        for i in range(N):
            if oracle_dataset:
                goal = env.sample_goal()
                env.set_to_goal(goal)
            else:
                env.reset()
                for _ in range(n_random_steps):
                    env.step(env.action_space.sample())[0]
            obs = env.step(env.action_space.sample())[0][observation_key]
            dataset[i, :] = obs
            print(i)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    np.random.shuffle(dataset)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 10
0
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    init_camera=sawyer_init_camera_zoomed_in,
    dataset_path=None,
):
    filename = "/tmp/sawyer_push_new_easy{}_{}.npy".format(
        str(N),
        init_camera.__name__,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = SawyerPushXYEasyEnv(hide_goal=True)
        env = ImageMujocoEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            env.reset()
            for _ in range(100):
                action = env.wrapped_env.action_space.sample()
                # action[0] = 0
                # action[1] = 1
                env.wrapped_env.step(action)
            img = env.step(env.action_space.sample())[0]
            dataset[i, :] = img
            print(i)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 11
0
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
):
    filename = "/tmp/sawyer_push_new_easy_wider2_" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = SawyerPushXYEasyEnv(hide_goal=True)
        env = ImageMujocoEnv(
            env,
            imsize,
            transpose=True,
            init_camera=sawyer_init_camera_zoomed_in,
            # init_camera=sawyer_init_camera,
            normalize=True,
        )
        info['env'] = env
        policy = OUStrategy(env.action_space)

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            # env.reset()
            if i % 100 == 0:
                g = env.sample_goal_for_rollout()
                env.set_goal(g)
                policy.reset()
            u = policy.get_action_from_raw_action(env.action_space.sample())
            img = env.step(u)[0]
            dataset[i, :] = img
            if show:
                # env.render()
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 12
0
    def pretrain(self):
        if self.oracle_transition_data is not None:
            filename = local_path_from_s3_or_local_path(
                self.oracle_transition_data)
            data = np.load(filename).item()
            print("adding data to replay buffer...")

            states, actions, next_states = data['states'], data[
                'actions'], data['next_states']
            idx = np.random.permutation(len(states))
            states, actions, next_states = states[idx], actions[
                idx], next_states[idx]
            cap = self.replay_buffer.max_size
            states, actions, next_states = states[:
                                                  cap], actions[:
                                                                cap], next_states[:
                                                                                  cap]

            dummy_goal = self.env.sample_goal_for_rollout()
            for (s, a, next_s, i) in zip(states, actions, next_states,
                                         range(len(states))):
                if i % 10000 == 0:
                    print(i)
                obs = dict(
                    observation=s,
                    desired_goal=dummy_goal['desired_goal'],
                    achieved_goal=s,
                    state_observation=s,
                    state_desired_goal=dummy_goal['state_desired_goal'],
                    state_achieved_goal=s,
                )
                next_obs = dict(
                    observation=next_s,
                    desired_goal=dummy_goal['desired_goal'],
                    achieved_goal=next_s,
                    state_observation=next_s,
                    state_desired_goal=dummy_goal['state_desired_goal'],
                    state_achieved_goal=next_s,
                )

                self._handle_step(
                    obs,
                    a,
                    np.array([0]),
                    next_obs,
                    np.array([0]),
                    agent_info={},
                    env_info={},
                )
                self._handle_rollout_ending()
Ejemplo n.º 13
0
def dump_reconstructions(vae_env, epoch, n_recon=16):
    from railrl.core import logger
    import os.path as osp
    from torchvision.utils import save_image

    if vae_env.use_vae_dataset and vae_env.vae_dataset_path is not None:
        from multiworld.core.image_env import normalize_image
        from railrl.misc.asset_loader import local_path_from_s3_or_local_path
        filename = local_path_from_s3_or_local_path(vae_env.vae_dataset_path)
        dataset = np.load(filename).item()
        sampled_idx = np.random.choice(dataset['next_obs'].shape[0], n_recon)
        if vae_env.vae_input_key_prefix == 'state':
            states = dataset['next_obs'][sampled_idx]
            imgs = ptu.np_to_var(
                vae_env.wrapped_env.states_to_images(states)
            )
            recon_samples, _, _ = vae_env.vae(ptu.np_to_var(states))
            recon_imgs = ptu.np_to_var(
                vae_env.wrapped_env.states_to_images(ptu.get_numpy(recon_samples))
            )
        else:
            imgs = ptu.np_to_var(
                normalize_image(dataset['next_obs'][sampled_idx])
            )
            recon_imgs, _, _, _ = vae_env.vae(imgs)
        del dataset
    else:
        return

    comparison = torch.cat([
        imgs.narrow(start=0, length=vae_env.wrapped_env.image_length, dimension=1).contiguous().view(
            -1,
            vae_env.wrapped_env.channels,
            vae_env.wrapped_env.imsize,
            vae_env.wrapped_env.imsize
        ),
        recon_imgs.contiguous().view(
            n_recon,
            vae_env.wrapped_env.channels,
            vae_env.wrapped_env.imsize,
            vae_env.wrapped_env.imsize
        )[:n_recon]
    ])

    if epoch is not None:
        save_dir = osp.join(logger.get_snapshot_dir(), 'r_%d.png' % epoch)
    else:
        save_dir = osp.join(logger.get_snapshot_dir(), 'r.png')
    save_image(comparison.data.cpu(), save_dir, nrow=n_recon)
Ejemplo n.º 14
0
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
):
    filename = "/tmp/sawyer_xy_pos_control_imgs" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = SawyerReachXYEnv(hide_goal_markers=True)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_sawyer_camera_v1,
            normalize=True,
        )
        info['env'] = env
        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            # Move the goal out of the image
            env.reset()
            for _ in range(50):
                env.wrapped_env.step(env.wrapped_env.action_space.sample())
            img = env.step(env.action_space.sample())[0]['image_observation']

            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 15
0
def update_variant_from_vae(variant):
    rl_variant = variant['rl_variant']
    if 'vae_path' in rl_variant:
        local_path = local_path_from_s3_or_local_path(osp.join(rl_variant['vae_path'], 'variant.json'))
        with open(local_path) as f:
            data = json.load(f)
            variant['vae_exp_prefix'] = data['exp_prefix']
            variant['vae_exp_id'] = data['exp_id']
            variant['vae_seed'] = data['seed']
            if 'vae_variant' in data:
                data_vae_variant = data['vae_variant']
            else:
                data_vae_variant = data['train_vae_variant'] # backwards compatibility
            variant['vae_variant'] = data_vae_variant
            vae_wrapped_env_kwargs = rl_variant['vae_wrapped_env_kwargs']
            vae_wrapped_env_kwargs['vae_dataset_path'] = \
                data_vae_variant['generate_vae_dataset_kwargs']['dataset_path']
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
    action_space_sampling=False,
    init_camera=None,
    env_class=None,
    env_kwargs=None,
):
    filename = "/tmp/sawyer_xyz_pos_control_new_zoom_cam" + str(N) + '.npy'
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        if env_kwargs == None:
            env_kwargs = dict()
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8)
        if action_space_sampling:
            action_space = Box(np.array([-.1, .5, 0]), np.array([.1, .7, .5]))
            for i in range(N):
                env.set_to_goal(env.sample_goal())
                img = env._get_flat_img()
                dataset[i, :] = unormalize_image(img)
                if show:
                    cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                    cv2.waitKey(1)
                print(i)
            info['env'] = env
        else:
            policy = RandomPolicy(env.action_space)
            es = OUStrategy(action_space=env.action_space, theta=0)
            exploration_policy = PolicyWrappedWithExplorationStrategy(
                exploration_strategy=es,
                policy=policy,
            )
            for i in range(N):
                # Move the goal out of the image
                env.wrapped_env.set_goal(np.array([100, 100, 100]))
                if i % 50 == 0:
                    print('Reset')
                    env.reset()
                    exploration_policy.reset()
                for _ in range(1):
                    action = exploration_policy.get_action()[0] * 10
                    env.wrapped_env.step(action)
                img = env.step(env.action_space.sample())[0]
                dataset[i, :] = img
                if show:
                    cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                    cv2.waitKey(1)
                print(i)

        print("done making training data", time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 17
0
def tdm_td3_experiment(variant):
    import railrl.samplers.rollout_functions as rf
    import railrl.torch.pytorch_util as ptu
    from railrl.data_management.obs_dict_replay_buffer import \
        ObsDictRelabelingBuffer
    from railrl.exploration_strategies.base import (
        PolicyWrappedWithExplorationStrategy)
    from railrl.state_distance.tdm_networks import TdmQf, TdmPolicy
    from railrl.state_distance.tdm_td3 import TdmTd3
    from railrl.my_td3 import Actor, Critic, MY_TD3
    from railrl.state_distance.subgoal_planner import SubgoalPlanner
    from railrl.misc.asset_loader import local_path_from_s3_or_local_path
    from railrl.my_tdm_td3 import MyTdmTd3
    import joblib

    preprocess_rl_variant(variant)
    env = get_envs(variant)
    es = get_exploration_strategy(variant, env)

    observation_key = variant.get('observation_key', 'latent_observation')
    desired_goal_key = variant.get('desired_goal_key', 'latent_desired_goal')
    achieved_goal_key = desired_goal_key.replace("desired", "achieved")

    vectorized = 'vectorized' in env.reward_type
    variant['algo_kwargs']['tdm_kwargs']['vectorized'] = vectorized
    variant['replay_buffer_kwargs']['vectorized'] = vectorized

    args = {'latent_dim': 16, 'device': 'cuda'}  #OWN

    if 'ckpt' in variant:
        if 'ckpt_epoch' in variant:
            epoch = variant['ckpt_epoch']
            filename = local_path_from_s3_or_local_path(
                osp.join(variant['ckpt'], 'itr_%d.pkl' % epoch))
        else:
            filename = local_path_from_s3_or_local_path(
                osp.join(variant['ckpt'], 'params.pkl'))
        print("Loading ckpt from", filename)
        data = joblib.load(filename)
        qf1 = data['qf1']
        qf2 = data['qf2']
        policy = data['policy']
        variant['algo_kwargs']['base_kwargs'][
            'reward_scale'] = policy.reward_scale
    else:
        obs_dim = (env.observation_space.spaces[observation_key].low.size)
        goal_dim = (env.observation_space.spaces[desired_goal_key].low.size)
        action_dim = env.action_space.low.size
        max_action = env.action_space.high

        variant['qf_kwargs']['vectorized'] = vectorized
        norm_order = env.norm_order
        variant['qf_kwargs']['norm_order'] = norm_order
        env.reset()
        _, rew, _, _ = env.step(env.action_space.sample())
        if hasattr(rew, "__len__"):
            variant['qf_kwargs']['output_dim'] = len(rew)
        '''qf1 = TdmQf(
            env=env,
            observation_dim=obs_dim,
            goal_dim=goal_dim,
            action_dim=action_dim,
            **variant['qf_kwargs']
        )
        qf2 = TdmQf(
            env=env,
            observation_dim=obs_dim,
            goal_dim=goal_dim,
            action_dim=action_dim,
            **variant['qf_kwargs']
        )
        policy = TdmPolicy(
            env=env,
            observation_dim=obs_dim,
            goal_dim=goal_dim,
            action_dim=action_dim,
            reward_scale=variant['algo_kwargs']['base_kwargs'].get('reward_scale', 1.0),
            **variant['policy_kwargs']
        )'''
        policy = Actor(obs_dim,
                       action_dim,
                       goal_dim,
                       1,
                       max_action=max_action,
                       device=args['device'],
                       reward_scale=10.0,
                       networks_hidden=[400, 300]).cuda()

        qf1 = Critic(obs_dim, action_dim, goal_dim, 1, 4, args['device'],
                     [400, 300]).cuda()
        qf2 = Critic(obs_dim, action_dim, goal_dim, 1, 4, args['device'],
                     [400, 300]).cuda()

    eval_policy = None
    if variant.get('eval_policy', None) == 'SubgoalPlanner':
        eval_policy = SubgoalPlanner(
            env,
            qf1,
            policy,
            observation_key=observation_key,
            desired_goal_key=desired_goal_key,
            achieved_goal_key=achieved_goal_key,
            state_based=variant.get("do_state_exp", False),
            max_tau=variant['algo_kwargs']['tdm_kwargs']['max_tau'],
            reward_scale=variant['algo_kwargs']['base_kwargs'].get(
                'reward_scale', 1.0),
            **variant['SubgoalPlanner_kwargs'])

    exploration_policy = PolicyWrappedWithExplorationStrategy(
        exploration_strategy=es,
        policy=policy,
    )

    replay_buffer = ObsDictRelabelingBuffer(
        env=env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        **variant['replay_buffer_kwargs'])

    algo_kwargs = variant['algo_kwargs']
    algo_kwargs['replay_buffer'] = replay_buffer
    base_kwargs = algo_kwargs['base_kwargs']
    base_kwargs['training_env'] = env
    base_kwargs['render'] = variant.get("render", False)
    base_kwargs['render_during_eval'] = variant.get("render_during_eval",
                                                    False)
    tdm_kwargs = algo_kwargs['tdm_kwargs']
    tdm_kwargs['observation_key'] = observation_key
    tdm_kwargs['desired_goal_key'] = desired_goal_key
    '''algorithm = TdmTd3(
        env,
        qf1=qf1,
        qf2=qf2,
        policy=policy,
        exploration_policy=exploration_policy,
        eval_policy=eval_policy,
        **variant['algo_kwargs']
    )'''
    algorithm = MyTdmTd3(actor=policy,
                         critic1=qf1,
                         critic2=qf2,
                         max_action=max_action,
                         args=args,
                         env=env,
                         exploration_policy=exploration_policy,
                         eval_policy=eval_policy,
                         **variant['algo_kwargs'])

    if variant.get("test_ckpt", False):
        algorithm.post_epoch_funcs.append(get_update_networks_func(variant))

    vis_variant = variant.get('vis_kwargs', {})
    vis_list = vis_variant.get('vis_list', [])
    if vis_variant.get("save_video", True):
        rollout_function = rf.create_rollout_function(
            rf.tdm_rollout,
            init_tau=algorithm._sample_max_tau_for_rollout(),
            decrement_tau=algorithm.cycle_taus_for_rollout,
            cycle_tau=algorithm.cycle_taus_for_rollout,
            max_path_length=algorithm.max_path_length,
            observation_key=algorithm.observation_key,
            desired_goal_key=algorithm.desired_goal_key,
            vis_list=vis_list,
            dont_terminate=True,
        )
        video_func = get_video_save_func(
            rollout_function,
            env,
            variant,
        )
        algorithm.post_epoch_funcs.append(video_func)

    if ptu.gpu_enabled():
        print("using GPU")
        algorithm.cuda()
        if not variant.get("do_state_exp", False):
            env.vae.cuda()

    env.reset()
    if not variant.get("do_state_exp", False):
        env.dump_samples(epoch=None)
        env.dump_reconstructions(epoch=None)
        env.dump_latent_plots(epoch=None)

    algorithm.train()
Ejemplo n.º 18
0
def process_variant(variant):
    rl_variant = variant['rl_variant']

    if args.debug:
        rl_variant['algo_kwargs']['base_kwargs']['num_rollouts_per_eval'] = 1
        rl_variant['vis_kwargs']['num_samples_for_video'] = 2
        rl_variant['vae_wrapped_env_kwargs']['num_samples_for_latent_histogram'] = 100
        variant['train_reprojection_network_variant']['num_epochs'] = 1
        variant['train_reprojection_network_variant']['generate_reprojection_network_dataset_kwargs']['N'] = int(2 ** 8)

        if 'env_kwargs' in variant and 'num_goals_presampled' in variant['env_kwargs']:
            variant['env_kwargs']['num_goals_presampled'] = 10
        if 'goal_generation_kwargs' in rl_variant and \
                'num_goals_presampled' in rl_variant['goal_generation_kwargs']:
            rl_variant['goal_generation_kwargs']['num_goals_presampled'] = 10

    assert rl_variant['eval_algo'] in [
        'mb-tdm',
        'mf-tdm',
    ]

    update_variant_from_ckpt(variant)

    ckpt_path = local_path_from_s3_or_local_path(osp.join(rl_variant['ckpt'], 'variant.json'))
    with open(ckpt_path) as f:
        ckpt_variant = json.load(f)
    if 'rl_variant' in ckpt_variant:
        ckpt_rl_variant = ckpt_variant['rl_variant']
    else:
        ckpt_rl_variant = ckpt_variant['grill_variant'] # backwards compatibility
    if 'mb' in rl_variant['eval_algo']:
        if 'max_tau' not in rl_variant['algo_kwargs']['tdm_kwargs']:
            rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] = \
                rl_variant['algo_kwargs']['base_kwargs']['max_path_length'] - 1
            if 'extra_time' in rl_variant['SubgoalPlanner_kwargs']:
                rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] -= \
                    rl_variant['SubgoalPlanner_kwargs']['extra_time']
        if 'max_tau_per_subprob' not in rl_variant['SubgoalPlanner_kwargs']:
            rl_variant['SubgoalPlanner_kwargs']['max_tau_per_subprob'] = \
                ckpt_rl_variant['algo_kwargs']['tdm_kwargs']['max_tau']
    else:
        if 'max_tau' not in rl_variant['algo_kwargs']['tdm_kwargs']:
            rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'] = \
                ckpt_rl_variant['algo_kwargs']['tdm_kwargs']['max_tau']

    eval_algo = rl_variant['eval_algo']
    if eval_algo == 'mb-tdm':
        rl_variant['eval_policy'] = 'SubgoalPlanner'
        rl_variant['SubgoalPlanner_kwargs']['reproject_encoding'] = True
    elif eval_algo == 'mf-tdm':
        pass

    rl_variant['eval_algo_base'] = eval_algo
    rl_variant['eval_algo_tag'] = 'mt=' + str(rl_variant['algo_kwargs']['tdm_kwargs']['max_tau'])
    if 'mb' in rl_variant['eval_algo']:
        rl_variant['eval_algo_tag'] = '-'.join([
            rl_variant['eval_algo_tag'],
            'mtps=' + str(rl_variant['SubgoalPlanner_kwargs']['max_tau_per_subprob'])
        ])
    rl_variant['eval_algo'] = '-'.join([
        rl_variant['eval_algo_base'],
        rl_variant['eval_algo_tag']
    ])
    variant['eval_algo_base'] = rl_variant['eval_algo_base']
    variant['eval_algo_tag'] = rl_variant['eval_algo_tag']
    variant['eval_algo'] = rl_variant['eval_algo']
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
    policy_path=None,
    ratio_oracle_policy_data_to_random=1 / 2,
    action_space_sampling=False,
    env_class=None,
    env_kwargs=None,
    action_plus_random_sampling=False,
    init_camera=sawyer_door_env_camera,
):
    if policy_path is not None:
        filename = "/tmp/sawyer_door_push_open_oracle+random_policy_data_closer_zoom_action_limited" + str(
            N) + ".npy"
    elif action_space_sampling:
        filename = "/tmp/sawyer_door_push_open_zoomed_in_action_space_sampling" + str(
            N) + ".npy"
    else:
        filename = "/tmp/sawyer_door_push_open" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    elif action_space_sampling:
        env = SawyerDoorPushOpenEnv(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=False,
            init_camera=sawyer_door_env_camera,
            normalize=False,
        )
        action_space = Box(np.array([-env.max_x_pos, .5, .06]),
                           np.array([env.max_x_pos, env.max_y_pos, .06]))
        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            env.set_to_goal_pos(action_space.sample())  #move arm to spot
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env.get_image().flatten()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        info['env'] = env
    elif action_plus_random_sampling:
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        action_space = Box(np.array([-env.max_x_pos, .5, .06]),
                           np.array([env.max_x_pos, .6, .06]))
        action_sampled_data = int(N / 2)
        dataset = np.zeros((N, imsize * imsize * 3))
        print('Action Space Sampling')
        for i in range(action_sampled_data):
            env.set_to_goal_pos(action_space.sample())  # move arm to spot
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .6
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        print('Random Sampling')
        for i in range(action_sampled_data, N):
            if i % 20 == 0:
                env.reset()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(action)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .5
        info['env'] = env
    else:
        now = time.time()
        env = SawyerDoorPushOpenEnv(max_angle=.5)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=sawyer_door_env_camera,
            normalize=True,
        )
        info['env'] = env
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            if i % 100 == 0:
                env.reset()
                exploration_policy.reset()
            for _ in range(25):
                # env.wrapped_env.step(
                #     env.wrapped_env.action_space.sample()
                # )
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(action)
            goal = env.sample_goal_for_rollout()
            env.set_to_goal(goal)
            img = env.step(env.action_space.sample())[0]
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 20
0
def generate_vae_dataset(
        N=10000, test_p=0.9, use_cached=True, imsize=84, show=False,
        dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerPushAndPullDoorEnv, env_kwargs=None,
        action_plus_random_sampling=False, init_camera=sawyer_door_env_camera, ratio_action_sample_to_random=1 / 2, env_id=None,
):
    if policy_path is not None:
        filename = "/tmp/sawyer_door_push_and_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str(N) + ".npy"
    elif action_space_sampling:
        filename = "/tmp/sawyer_door_push_and_pull_open_zoomed_in_action_space_sampling" + str(N) + ".npy"
    else:
        filename = "/tmp/sawyer_door_push_and_pull_open" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    elif action_plus_random_sampling:
        if env_id is not None:
            import gym
            env = gym.make(env_id)
        else:
            env = env_class(**env_kwargs)
            env =  ImageEnv(
                env, imsize,
                transpose=True,
                init_camera=init_camera,
                normalize=True,
            )
        action_sampled_data = int(N*ratio_action_sample_to_random)
        dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8)
        print('Action Space Sampling')
        for i in range(action_sampled_data):
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env._get_flat_img()
            dataset[i, :] = unormalize_image(img)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        print('Random Sampling')
        for i in range(action_sampled_data, N):
            if i % 20==0:
                env.reset()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(
                    action
                )
            goal = env.sample_goal()
            env.set_to_goal_angle(goal['state_desired_goal'])
            img = env._get_flat_img()
            dataset[i, :] = unormalize_image(img)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .5
        info['env'] = env
    else:
        raise NotImplementedError()
    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 21
0
def generate_vae_dataset(
        env_class,
        N=10000,
        test_p=0.9,
        use_cached=True,
        imsize=84,
        show=False,
        init_camera=sawyer_init_camera_zoomed_in,
        dataset_path=None,
        env_kwargs=None,
        oracle_dataset=False,
        n_random_steps=100,
):
    if env_kwargs is None:
        env_kwargs = {}
    filename = "/tmp/{}_{}_{}_oracle{}.npy".format(
        env_class.__name__,
        str(N),
        init_camera.__name__,
        oracle_dataset,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            init_camera=init_camera,
            transpose=True,
            normalize=True,
        )
        env.reset()
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            if oracle_dataset:
                goal = env.sample_goal()
                env.set_to_goal(goal)
            else:
                env.reset()
                for _ in range(n_random_steps):
                    obs = env.step(env.action_space.sample())[0]
            obs = env.step(env.action_space.sample())[0]
            img = obs['image_observation']
            dataset[i, :] = img
            if show:
                img = img.reshape(3, 84, 84).transpose()
                img = img[::-1, :, ::-1]
                cv2.imshow('img', img)
                cv2.waitKey(1)
                # radius = input('waiting...')
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 22
0
def ih_td3_experiment(variant):
    import railrl.samplers.rollout_functions as rf
    import railrl.torch.pytorch_util as ptu
    from railrl.data_management.obs_dict_replay_buffer import \
        ObsDictRelabelingBuffer
    from railrl.exploration_strategies.base import (
        PolicyWrappedWithExplorationStrategy
    )
    from railrl.misc.asset_loader import local_path_from_s3_or_local_path
    import joblib
    from railrl.torch.her.her_td3 import HerTd3
    from railrl.torch.networks import FlattenMlp, TanhMlpPolicy
    from railrl.state_distance.subgoal_planner import InfiniteHorizonSubgoalPlanner

    preprocess_rl_variant(variant)
    env = get_envs(variant)
    es = get_exploration_strategy(variant, env)

    observation_key = variant.get('observation_key', 'latent_observation')
    desired_goal_key = variant.get('desired_goal_key', 'latent_desired_goal')
    achieved_goal_key = desired_goal_key.replace("desired", "achieved")

    vectorized = 'vectorized' in env.reward_type
    variant['replay_buffer_kwargs']['vectorized'] = vectorized

    if 'ckpt' in variant:
        if 'ckpt_epoch' in variant:
            epoch = variant['ckpt_epoch']
            filename = local_path_from_s3_or_local_path(osp.join(variant['ckpt'], 'itr_%d.pkl' % epoch))
        else:
            filename = local_path_from_s3_or_local_path(osp.join(variant['ckpt'], 'params.pkl'))
        print("Loading ckpt from", filename)
        data = joblib.load(filename)
        qf1 = data['qf1']
        qf2 = data['qf2']
        policy = data['policy']
    else:
        obs_dim = (
                env.observation_space.spaces[observation_key].low.size
                + env.observation_space.spaces[desired_goal_key].low.size
        )
        action_dim = env.action_space.low.size

        env.reset()
        _, rew, _, _ = env.step(env.action_space.sample())
        if hasattr(rew, "__len__"):
            output_size = len(rew)
        else:
            output_size = 1

        qf1 = FlattenMlp(
            input_size=obs_dim + action_dim,
            output_size=output_size,
            **variant['qf_kwargs']
        )
        qf2 = FlattenMlp(
            input_size=obs_dim + action_dim,
            output_size=output_size,
            **variant['qf_kwargs']
        )
        policy = TanhMlpPolicy(
            input_size=obs_dim,
            output_size=action_dim,
            **variant['policy_kwargs']
        )
        policy.reward_scale = variant['algo_kwargs']['base_kwargs'].get('reward_scale', 1.0)

    eval_policy = None
    if variant.get('eval_policy', None) == 'SubgoalPlanner':
        eval_policy = InfiniteHorizonSubgoalPlanner(
            env,
            qf1,
            policy,
            observation_key=observation_key,
            desired_goal_key=desired_goal_key,
            achieved_goal_key=achieved_goal_key,
            state_based=variant.get("do_state_exp", False),
            max_tau=variant['algo_kwargs']['base_kwargs']['max_path_length'] - 1,
            reward_scale=variant['algo_kwargs']['base_kwargs'].get('reward_scale', 1.0),
            **variant['SubgoalPlanner_kwargs']
        )

    exploration_policy = PolicyWrappedWithExplorationStrategy(
        exploration_strategy=es,
        policy=policy,
    )

    replay_buffer = ObsDictRelabelingBuffer(
        env=env,
        observation_key=observation_key,
        desired_goal_key=desired_goal_key,
        achieved_goal_key=achieved_goal_key,
        **variant['replay_buffer_kwargs']
    )

    algo_kwargs = variant['algo_kwargs']
    algo_kwargs['replay_buffer'] = replay_buffer
    base_kwargs = algo_kwargs['base_kwargs']
    base_kwargs['training_env'] = env
    base_kwargs['render'] = variant.get("render", False)
    base_kwargs['render_during_eval'] = variant.get("render_during_eval", False)
    her_kwargs = algo_kwargs['her_kwargs']
    her_kwargs['observation_key'] = observation_key
    her_kwargs['desired_goal_key'] = desired_goal_key
    algorithm = HerTd3(
        env,
        qf1=qf1,
        qf2=qf2,
        policy=policy,
        exploration_policy=exploration_policy,
        eval_policy=eval_policy,
        **variant['algo_kwargs']
    )

    if variant.get("test_ckpt", False):
        algorithm.post_epoch_funcs.append(get_update_networks_func(variant))

    vis_variant = variant.get('vis_kwargs', {})
    vis_list = vis_variant.get('vis_list', [])
    if vis_variant.get("save_video", True):
        rollout_function = rf.create_rollout_function(
            rf.multitask_rollout,
            max_path_length=algorithm.max_path_length,
            observation_key=algorithm.observation_key,
            desired_goal_key=algorithm.desired_goal_key,
            vis_list=vis_list,
            dont_terminate=True,
        )
        video_func = get_video_save_func(
            rollout_function,
            env,
            variant,
        )
        algorithm.post_epoch_funcs.append(video_func)

    if ptu.gpu_enabled():
        print("using GPU")
        algorithm.cuda()
        if not variant.get("do_state_exp", False):
            env.vae.cuda()

    env.reset()
    if not variant.get("do_state_exp", False):
        env.dump_samples(epoch=None)
        env.dump_latent_plots(epoch=None)
        env.dump_latent_plots(epoch=None)

    algorithm.train()
Ejemplo n.º 23
0
def generate_vae_dataset_from_params(
    env_class=None,
    env_kwargs=None,
    env_id=None,
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    num_channels=1,
    show=False,
    init_camera=None,
    dataset_path=None,
    oracle_dataset=False,
    n_random_steps=100,
    vae_dataset_specific_env_kwargs=None,
    save_file_prefix=None,
    use_linear_dynamics=False,
):
    from multiworld.core.image_env import ImageEnv, unormalize_image
    from railrl.misc.asset_loader import local_path_from_s3_or_local_path
    import time

    assert oracle_dataset == True

    if env_kwargs is None:
        env_kwargs = {}
    if save_file_prefix is None:
        save_file_prefix = env_id
    if save_file_prefix is None:
        save_file_prefix = env_class.__name__
    filename = "/tmp/{}_N{}_{}_imsize{}_oracle{}.npy".format(
        save_file_prefix,
        str(N),
        init_camera.__name__ if init_camera else '',
        imsize,
        oracle_dataset,
    )
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
        np.random.shuffle(dataset)
        N = dataset.shape[0]
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        np.random.shuffle(dataset)
        print("loaded data from saved file", filename)
    else:
        now = time.time()

        if env_id is not None:
            import gym
            import multiworld
            multiworld.register_all_envs()
            env = gym.make(env_id)
        else:
            if vae_dataset_specific_env_kwargs is None:
                vae_dataset_specific_env_kwargs = {}
            for key, val in env_kwargs.items():
                if key not in vae_dataset_specific_env_kwargs:
                    vae_dataset_specific_env_kwargs[key] = val
            env = env_class(**vae_dataset_specific_env_kwargs)
        if not isinstance(env, ImageEnv):
            env = ImageEnv(
                env,
                imsize,
                init_camera=init_camera,
                transpose=True,
                normalize=True,
            )
        setup_pickup_image_env(env, num_presampled_goals=N)
        env.reset()
        info['env'] = env

        dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8)
        for i in range(N):
            img = env._presampled_goals['image_desired_goal'][i]
            dataset[i, :] = unormalize_image(img)
            if show:
                img = img.reshape(3, imsize, imsize).transpose()
                img = img[::-1, :, ::-1]
                cv2.imshow('img', img)
                cv2.waitKey(1)
                time.sleep(.2)
                # radius = input('waiting...')
        print("done making training data", filename, time.time() - now)
        np.random.shuffle(dataset)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 24
0
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
    env_class=None,
    env_kwargs=None,
    init_camera=sawyer_door_env_camera,
):
    filename = "/tmp/sawyer_door_push_open_and_reach" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        oracle_sampled_data = int(N / 2)
        dataset = np.zeros((N, imsize * imsize * 3))
        print('Goal Space Sampling')
        for i in range(oracle_sampled_data):
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .6
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        print('Random Sampling')
        for i in range(oracle_sampled_data, N):
            if i % 20 == 0:
                env.reset()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(action)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
Ejemplo n.º 25
0
def train_vae(variant):
    from railrl.misc.ml_util import PiecewiseLinearSchedule
    from railrl.torch.vae.conv_vae import ConvVAE
    from railrl.torch.vae.conv_vae_trainer import ConvVAETrainer
    from railrl.core import logger
    import railrl.torch.pytorch_util as ptu
    from multiworld.core.image_env import ImageEnv
    from railrl.envs.vae_wrappers import VAEWrappedEnv
    from railrl.misc.asset_loader import local_path_from_s3_or_local_path

    logger.remove_tabular_output('progress.csv', relative_to_snapshot_dir=True)
    logger.add_tabular_output('vae_progress.csv',
                              relative_to_snapshot_dir=True)

    env_id = variant['generate_vae_dataset_kwargs'].get('env_id', None)
    if env_id is not None:
        import gym
        env = gym.make(env_id)
    else:
        env_class = variant['generate_vae_dataset_kwargs']['env_class']
        env_kwargs = variant['generate_vae_dataset_kwargs']['env_kwargs']
        env = env_class(**env_kwargs)

    representation_size = variant["representation_size"]
    beta = variant["beta"]
    if 'beta_schedule_kwargs' in variant:
        beta_schedule = PiecewiseLinearSchedule(
            **variant['beta_schedule_kwargs'])
    else:
        beta_schedule = None

    # obtain training and testing data
    dataset_path = variant['generate_vae_dataset_kwargs'].get(
        'dataset_path', None)
    test_p = variant['generate_vae_dataset_kwargs'].get('test_p', 0.9)
    filename = local_path_from_s3_or_local_path(dataset_path)
    dataset = np.load(filename, allow_pickle=True).item()
    N = dataset['obs'].shape[0]
    n = int(N * test_p)
    train_data = {}
    test_data = {}
    for k in dataset.keys():
        train_data[k] = dataset[k][:n, :]
        test_data[k] = dataset[k][n:, :]

    # setup vae
    variant['vae_kwargs']['action_dim'] = train_data['actions'].shape[1]
    if variant.get('vae_type', None) == "VAE-state":
        from railrl.torch.vae.vae import VAE
        input_size = train_data['obs'].shape[1]
        variant['vae_kwargs']['input_size'] = input_size
        m = VAE(representation_size, **variant['vae_kwargs'])
    elif variant.get('vae_type', None) == "VAE2":
        from railrl.torch.vae.conv_vae2 import ConvVAE2
        variant['vae_kwargs']['imsize'] = variant['imsize']
        m = ConvVAE2(representation_size, **variant['vae_kwargs'])
    else:
        variant['vae_kwargs']['imsize'] = variant['imsize']
        m = ConvVAE(representation_size, **variant['vae_kwargs'])
    if ptu.gpu_enabled():
        m.cuda()

    # setup vae trainer
    if variant.get('vae_type', None) == "VAE-state":
        from railrl.torch.vae.vae_trainer import VAETrainer
        t = VAETrainer(train_data,
                       test_data,
                       m,
                       beta=beta,
                       beta_schedule=beta_schedule,
                       **variant['algo_kwargs'])
    else:
        t = ConvVAETrainer(train_data,
                           test_data,
                           m,
                           beta=beta,
                           beta_schedule=beta_schedule,
                           **variant['algo_kwargs'])

    # visualization
    vis_variant = variant.get('vis_kwargs', {})
    save_video = vis_variant.get('save_video', False)
    if isinstance(env, ImageEnv):
        image_env = env
    else:
        image_env = ImageEnv(
            env,
            variant['generate_vae_dataset_kwargs'].get('imsize'),
            init_camera=variant['generate_vae_dataset_kwargs'].get(
                'init_camera'),
            transpose=True,
            normalize=True,
        )
    render = variant.get('render', False)
    reward_params = variant.get("reward_params", dict())
    vae_env = VAEWrappedEnv(image_env,
                            m,
                            imsize=image_env.imsize,
                            decode_goals=render,
                            render_goals=render,
                            render_rollouts=render,
                            reward_params=reward_params,
                            **variant.get('vae_wrapped_env_kwargs', {}))
    vae_env.reset()
    vae_env.add_mode("video_env", 'video_env')
    vae_env.add_mode("video_vae", 'video_vae')
    if save_video:
        import railrl.samplers.rollout_functions as rf
        from railrl.policies.simple import RandomPolicy
        random_policy = RandomPolicy(vae_env.action_space)
        rollout_function = rf.create_rollout_function(
            rf.multitask_rollout,
            max_path_length=100,
            observation_key='latent_observation',
            desired_goal_key='latent_desired_goal',
            vis_list=vis_variant.get('vis_list', []),
            dont_terminate=True,
        )

        dump_video_kwargs = variant.get("dump_video_kwargs", dict())
        dump_video_kwargs['imsize'] = vae_env.imsize
        dump_video_kwargs['vis_list'] = [
            'image_observation',
            'reconstr_image_observation',
            'image_latent_histogram_2d',
            'image_latent_histogram_mu_2d',
            'image_plt',
            'image_rew',
            'image_rew_euclidean',
            'image_rew_mahalanobis',
            'image_rew_logp',
            'image_rew_kl',
            'image_rew_kl_rev',
        ]

    def visualization_post_processing(save_vis, save_video, epoch):
        vis_list = vis_variant.get('vis_list', [])

        if save_vis:
            if vae_env.vae_input_key_prefix == 'state':
                vae_env.dump_reconstructions(epoch,
                                             n_recon=vis_variant.get(
                                                 'n_recon', 16))
            vae_env.dump_samples(epoch,
                                 n_samples=vis_variant.get('n_samples', 64))
            if 'latent_representation' in vis_list:
                vae_env.dump_latent_plots(epoch)
            if any(elem in vis_list for elem in [
                    'latent_histogram', 'latent_histogram_mu',
                    'latent_histogram_2d', 'latent_histogram_mu_2d'
            ]):
                vae_env.compute_latent_histogram()
            if not save_video and ('latent_histogram' in vis_list):
                vae_env.dump_latent_histogram(epoch=epoch,
                                              noisy=True,
                                              use_true_prior=True)
            if not save_video and ('latent_histogram_mu' in vis_list):
                vae_env.dump_latent_histogram(epoch=epoch,
                                              noisy=False,
                                              use_true_prior=True)

        if save_video and save_vis:
            from railrl.envs.vae_wrappers import temporary_mode
            from railrl.misc.video_gen import dump_video
            from railrl.core import logger

            vae_env.compute_goal_encodings()

            logdir = logger.get_snapshot_dir()
            filename = osp.join(logdir,
                                'video_{epoch}.mp4'.format(epoch=epoch))
            variant['dump_video_kwargs']['epoch'] = epoch
            temporary_mode(vae_env,
                           mode='video_env',
                           func=dump_video,
                           args=(vae_env, random_policy, filename,
                                 rollout_function),
                           kwargs=variant['dump_video_kwargs'])
            if not vis_variant.get('save_video_env_only', True):
                filename = osp.join(
                    logdir, 'video_{epoch}_vae.mp4'.format(epoch=epoch))
                temporary_mode(vae_env,
                               mode='video_vae',
                               func=dump_video,
                               args=(vae_env, random_policy, filename,
                                     rollout_function),
                               kwargs=variant['dump_video_kwargs'])

    # train vae
    for epoch in range(variant['num_epochs']):
        #for epoch in range(2000):
        save_vis = (epoch % vis_variant['save_period'] == 0
                    or epoch == variant['num_epochs'] - 1)
        save_vae = (epoch % variant['snapshot_gap'] == 0
                    or epoch == variant['num_epochs'] - 1)

        t.train_epoch(epoch)
        '''if epoch % 500 == 0 or epoch == variant['num_epochs']-1:
           t.test_epoch(
                epoch,
                save_reconstruction=save_vis,
                save_interpolation=save_vis,
                save_vae=save_vae,
            )
        if epoch % 200 == 0 or epoch == variant['num_epochs']-1:
            visualization_post_processing(save_video, save_video, epoch)'''

        t.test_epoch(
            epoch,
            save_reconstruction=save_vis,
            save_interpolation=save_vis,
            save_vae=save_vae,
        )
        if epoch % 300 == 0 or epoch == variant['num_epochs'] - 1:
            visualization_post_processing(save_vis, save_video, epoch)

    logger.save_extra_data(m, 'vae.pkl', mode='pickle')
    logger.remove_tabular_output(
        'vae_progress.csv',
        relative_to_snapshot_dir=True,
    )
    logger.add_tabular_output(
        'progress.csv',
        relative_to_snapshot_dir=True,
    )
    print("finished --------------------!!!!!!!!!!!!!!!")

    return m