예제 #1
0
def generate_vae_dataset(
        N=10000, test_p=0.9, use_cached=True, imsize=84, show=False,
        dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerDoorEnv, env_kwargs=None,
        init_camera=sawyer_door_env_camera_v2,
):
    if policy_path is not None:
        filename = "/tmp/sawyer_door_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str(N) + ".npy"
    elif action_space_sampling:
        filename = "/tmp/sawyer_door_pull_open_zoomed_in_action_space_sampling" + str(N) + ".npy"
    else:
        filename = "/tmp/sawyer_door_pull_open" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env, imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        info['env'] = env
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        env.wrapped_env.reset()
        dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8)
        for i in range(N):
            if i % 20==0:
                env.reset_model()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(
                    action
                )
            # env.set_to_goal_angle(env.get_goal()['state_desired_goal'])
            img = env._get_flat_img()
            dataset[i, :] = unormalize_image(img)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
def generate_goal_data_set(env=None, num_goals=1000, use_cached_dataset=False,
                           action_scale=1 / 10):
    if use_cached_dataset and osp.isfile(
            '/tmp/goals' + str(num_goals) + '.npy'):
        goal_dict = np.load('/tmp/goals' + str(num_goals) + '.npy').item()
        print("loaded data from saved file")
        return goal_dict
    cached_goal_keys = [
        'latent_desired_goal',
        'image_desired_goal',
        'state_desired_goal',
        'joint_desired_goal',
    ]
    goal_sizes = [
        env.observation_space.spaces['latent_desired_goal'].low.size,
        env.observation_space.spaces['image_desired_goal'].low.size,
        env.observation_space.spaces['state_desired_goal'].low.size,
        7
    ]
    observation_keys = [
        'latent_observation',
        'image_observation',
        'state_observation',
        'state_observation',
    ]
    goal_generation_dict = dict()
    for goal_key, goal_size, obs_key in zip(
            cached_goal_keys,
            goal_sizes,
            observation_keys,
    ):
        goal_generation_dict[goal_key] = [goal_size, obs_key]
    goal_dict = dict()
    policy = RandomPolicy(env.action_space)
    es = OUStrategy(action_space=env.action_space, theta=0)
    exploration_policy = PolicyWrappedWithExplorationStrategy(
        exploration_strategy=es,
        policy=policy,
    )
    for goal_key in goal_generation_dict:
        goal_size, obs_key = goal_generation_dict[goal_key]
        goal_dict[goal_key] = np.zeros((num_goals, goal_size))
    print('Generating Random Goals')
    for i in range(num_goals):
        if i % 50 == 0:
            print('Reset')
            env.reset_model()
            exploration_policy.reset()
        action = exploration_policy.get_action()[0] * action_scale
        obs, _, _, _ = env.step(
            action
        )
        print(i)
        for goal_key in goal_generation_dict:
            goal_size, obs_key = goal_generation_dict[goal_key]
            goal_dict[goal_key][i, :] = obs[obs_key]
    np.save('/tmp/goals' + str(num_goals) + '.npy', goal_dict)
    return goal_dict
def visualize_policy_error(qf, env, args):
    model = NumpyModelExtractor(qf, args.cheat, num_steps_left=args.tau)
    policy = RandomPolicy(env.action_space)
    actual_state = env.reset()

    predicted_states = []
    actual_states = []

    predicted_state = actual_state
    for _ in range(args.H):
        predicted_states.append(predicted_state.copy())
        actual_states.append(actual_state.copy())

        action, _ = policy.get_action(actual_state)
        predicted_state = model.next_state(predicted_state, action)
        actual_state = env.step(action)[0]

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)
    times = np.arange(args.H)

    num_state_dims = env.observation_space.low.size
    dims = list(range(num_state_dims))
    norm = colors.Normalize(vmin=0, vmax=num_state_dims)
    mapper = cm.ScalarMappable(norm=norm, cmap=cm.hsv)
    for dim in dims:
        plt.plot(
            times,
            predicted_states[:, dim],
            '--',
            label='Predicted, Dim {}'.format(dim),
            color=mapper.to_rgba(dim),
        )
        plt.plot(
            times,
            actual_states[:, dim],
            '-',
            label='Actual, Dim {}'.format(dim),
            color=mapper.to_rgba(dim),
        )
    plt.xlabel("Time Steps")
    plt.ylabel("Observation Value")
    plt.legend(loc='best')
    plt.show()
예제 #4
0
    def pretrain(self):
        if (self.num_paths_for_normalization == 0 or
            (self.obs_normalizer is None and self.action_normalizer is None)):
            return

        pretrain_paths = []
        random_policy = RandomPolicy(self.env.action_space)
        while len(pretrain_paths) < self.num_paths_for_normalization:
            path = rollout(self.env, random_policy, self.max_path_length)
            pretrain_paths.append(path)
        ob_mean, ob_std, delta_mean, delta_std, ac_mean, ac_std = (
            compute_normalization(pretrain_paths))
        if self.obs_normalizer is not None:
            self.obs_normalizer.set_mean(ob_mean)
            self.obs_normalizer.set_std(ob_std)
        if self.delta_normalizer is not None:
            self.delta_normalizer.set_mean(delta_mean)
            self.delta_normalizer.set_std(delta_std)
        if self.action_normalizer is not None:
            self.action_normalizer.set_mean(ac_mean)
            self.action_normalizer.set_std(ac_std)
from railrl.state_distance.rollout_util import multitask_rollout

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('file', type=str,
                        help='path to the snapshot file')
    parser.add_argument('--pause', action='store_true')
    args = parser.parse_args()
    if args.pause:
        import ipdb; ipdb.set_trace()

    data = joblib.load(args.file)
    env = data['env']
    qf = data['qf']
    policy = data['policy']
    tdm_policy = data['trained_policy']
    random_policy = RandomPolicy(env.action_space)
    vf = data['vf']
    path = multitask_rollout(
        env,
        random_policy,
        init_tau=0,
        max_path_length=100,
        animated=True,
    )
    goal = env.sample_goal_for_rollout()

    import ipdb; ipdb.set_trace()
    agent_infos = path['agent_infos']
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
    action_space_sampling=False,
    init_camera=None,
    env_class=None,
    env_kwargs=None,
):
    filename = "/tmp/sawyer_xyz_pos_control_new_zoom_cam" + str(N) + '.npy'
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        now = time.time()
        if env_kwargs == None:
            env_kwargs = dict()
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8)
        if action_space_sampling:
            action_space = Box(np.array([-.1, .5, 0]), np.array([.1, .7, .5]))
            for i in range(N):
                env.set_to_goal(env.sample_goal())
                img = env._get_flat_img()
                dataset[i, :] = unormalize_image(img)
                if show:
                    cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                    cv2.waitKey(1)
                print(i)
            info['env'] = env
        else:
            policy = RandomPolicy(env.action_space)
            es = OUStrategy(action_space=env.action_space, theta=0)
            exploration_policy = PolicyWrappedWithExplorationStrategy(
                exploration_strategy=es,
                policy=policy,
            )
            for i in range(N):
                # Move the goal out of the image
                env.wrapped_env.set_goal(np.array([100, 100, 100]))
                if i % 50 == 0:
                    print('Reset')
                    env.reset()
                    exploration_policy.reset()
                for _ in range(1):
                    action = exploration_policy.get_action()[0] * 10
                    env.wrapped_env.step(action)
                img = env.step(env.action_space.sample())[0]
                dataset[i, :] = img
                if show:
                    cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                    cv2.waitKey(1)
                print(i)

        print("done making training data", time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
    policy_path=None,
    ratio_oracle_policy_data_to_random=1 / 2,
    action_space_sampling=False,
    env_class=None,
    env_kwargs=None,
    action_plus_random_sampling=False,
    init_camera=sawyer_door_env_camera,
):
    if policy_path is not None:
        filename = "/tmp/sawyer_door_push_open_oracle+random_policy_data_closer_zoom_action_limited" + str(
            N) + ".npy"
    elif action_space_sampling:
        filename = "/tmp/sawyer_door_push_open_zoomed_in_action_space_sampling" + str(
            N) + ".npy"
    else:
        filename = "/tmp/sawyer_door_push_open" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    elif action_space_sampling:
        env = SawyerDoorPushOpenEnv(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=False,
            init_camera=sawyer_door_env_camera,
            normalize=False,
        )
        action_space = Box(np.array([-env.max_x_pos, .5, .06]),
                           np.array([env.max_x_pos, env.max_y_pos, .06]))
        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            env.set_to_goal_pos(action_space.sample())  #move arm to spot
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env.get_image().flatten()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        info['env'] = env
    elif action_plus_random_sampling:
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        action_space = Box(np.array([-env.max_x_pos, .5, .06]),
                           np.array([env.max_x_pos, .6, .06]))
        action_sampled_data = int(N / 2)
        dataset = np.zeros((N, imsize * imsize * 3))
        print('Action Space Sampling')
        for i in range(action_sampled_data):
            env.set_to_goal_pos(action_space.sample())  # move arm to spot
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .6
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        print('Random Sampling')
        for i in range(action_sampled_data, N):
            if i % 20 == 0:
                env.reset()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(action)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .5
        info['env'] = env
    else:
        now = time.time()
        env = SawyerDoorPushOpenEnv(max_angle=.5)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=sawyer_door_env_camera,
            normalize=True,
        )
        info['env'] = env
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        dataset = np.zeros((N, imsize * imsize * 3))
        for i in range(N):
            if i % 100 == 0:
                env.reset()
                exploration_policy.reset()
            for _ in range(25):
                # env.wrapped_env.step(
                #     env.wrapped_env.action_space.sample()
                # )
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(action)
            goal = env.sample_goal_for_rollout()
            env.set_to_goal(goal)
            img = env.step(env.action_space.sample())[0]
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        print("done making training data", filename, time.time() - now)
        np.save(filename, dataset)

    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
예제 #8
0
def generate_vae_dataset(
        N=10000, test_p=0.9, use_cached=True, imsize=84, show=False,
        dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerPushAndPullDoorEnv, env_kwargs=None,
        action_plus_random_sampling=False, init_camera=sawyer_door_env_camera, ratio_action_sample_to_random=1 / 2, env_id=None,
):
    if policy_path is not None:
        filename = "/tmp/sawyer_door_push_and_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str(N) + ".npy"
    elif action_space_sampling:
        filename = "/tmp/sawyer_door_push_and_pull_open_zoomed_in_action_space_sampling" + str(N) + ".npy"
    else:
        filename = "/tmp/sawyer_door_push_and_pull_open" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    elif action_plus_random_sampling:
        if env_id is not None:
            import gym
            env = gym.make(env_id)
        else:
            env = env_class(**env_kwargs)
            env =  ImageEnv(
                env, imsize,
                transpose=True,
                init_camera=init_camera,
                normalize=True,
            )
        action_sampled_data = int(N*ratio_action_sample_to_random)
        dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8)
        print('Action Space Sampling')
        for i in range(action_sampled_data):
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env._get_flat_img()
            dataset[i, :] = unormalize_image(img)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        print('Random Sampling')
        for i in range(action_sampled_data, N):
            if i % 20==0:
                env.reset()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(
                    action
                )
            goal = env.sample_goal()
            env.set_to_goal_angle(goal['state_desired_goal'])
            img = env._get_flat_img()
            dataset[i, :] = unormalize_image(img)
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .5
        info['env'] = env
    else:
        raise NotImplementedError()
    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info
def main():
    model_data = joblib.load(MODEL_PATH)
    model = model_data['model']
    tdm_data = joblib.load(TDM_PATH)
    env = tdm_data['env']
    qf = tdm_data['qf']
    variant_path = Path(TDM_PATH).parents[0] / 'variant.json'
    variant = json.load(variant_path.open())
    reward_scale = variant['sac_tdm_kwargs']['base_kwargs']['reward_scale']
    tdm = ImplicitModel(qf, None)
    random_policy = RandomPolicy(env.action_space)
    H = 10
    path = rollout(env, random_policy, max_path_length=H)

    model_distance_preds = []
    tdm_distance_preds = []
    for ob, action, next_ob in zip(
            path['observations'],
            path['actions'],
            path['next_observations'],
    ):
        obs = ob[None]
        actions = action[None]
        next_feature = env.convert_ob_to_goal(next_ob)
        model_next_ob_pred = ob + model.eval_np(obs, actions)[0]
        model_distance_pred = np.abs(
            env.convert_ob_to_goal(model_next_ob_pred) - next_feature)

        tdm_next_feature_pred = get_feasible_goal(env, tdm, ob, action)
        tdm_distance_pred = np.abs(tdm_next_feature_pred - next_feature)

        model_distance_preds.append(model_distance_pred)
        tdm_distance_preds.append(tdm_distance_pred)

    model_distances = np.array(model_distance_preds)
    tdm_distances = np.array(tdm_distance_preds)
    ts = np.arange(len(model_distance_preds))
    num_dim = model_distances[0].size
    ind = np.arange(num_dim)
    width = 0.35

    fig, ax = plt.subplots()
    means = model_distances.mean(axis=0)
    stds = model_distances.std(axis=0)
    rects1 = ax.bar(ind, means, width, color='r', yerr=stds)

    means = tdm_distances.mean(axis=0)
    stds = tdm_distances.std(axis=0)
    rects2 = ax.bar(ind + width, means, width, color='y', yerr=stds)
    ax.legend((rects1[0], rects2[0]), ('Model', 'TDM'))
    ax.set_xlabel("Dimension")
    ax.set_ylabel("Absolute Error")
    ax.set_xticks(ind + width / 2)
    ax.set_xticklabels(list(map(str, ind)))

    plt.show()

    plt.subplot(2, 1, 1)
    for i in range(num_dim):
        plt.plot(
            ts,
            model_distances[:, i],
            label=str(i),
        )
    plt.xlabel("Time")
    plt.ylabel("Absolute Error")
    plt.title("Model")
    plt.legend()

    plt.subplot(2, 1, 2)
    for i in range(num_dim):
        plt.plot(
            ts,
            tdm_distances[:, i],
            label=str(i),
        )
    plt.xlabel("Time")
    plt.ylabel("Absolute Error")
    plt.title("TDM")
    plt.legend()
    plt.show()

    goal = env.convert_ob_to_goal(path['observations'][H // 2].copy())
    path = rollout(env, random_policy, max_path_length=H)

    model_distance_preds = []
    tdm_distance_preds = []
    for ob, action, next_ob in zip(
            path['observations'],
            path['actions'],
            path['next_observations'],
    ):
        model_next_ob_pred = ob + model.eval_np(ob[None], action[None])[0]
        model_distance_pred = np.linalg.norm(
            env.convert_ob_to_goal(model_next_ob_pred) - goal)

        tdm_distance_pred = tdm.eval_np(
            ob[None],
            goal[None],
            np.zeros((1, 1)),
            action[None],
        )[0] / reward_scale

        model_distance_preds.append(model_distance_pred)
        tdm_distance_preds.append(tdm_distance_pred)

    fig, ax = plt.subplots()
    means = model_distances.mean(axis=0)
    stds = model_distances.std(axis=0)
    rects1 = ax.bar(ind, means, width, color='r', yerr=stds)

    means = tdm_distances.mean(axis=0)
    stds = tdm_distances.std(axis=0)
    rects2 = ax.bar(ind + width, means, width, color='y', yerr=stds)
    ax.legend((rects1[0], rects2[0]), ('Model', 'TDM'))
    ax.set_xlabel("Dimension")
    ax.set_ylabel("Error To Random Goal State")
    ax.set_xticks(ind + width / 2)
    ax.set_xticklabels(list(map(str, ind)))

    plt.show()
예제 #10
0
def train_vae(variant):
    from railrl.misc.ml_util import PiecewiseLinearSchedule
    from railrl.torch.vae.conv_vae import ConvVAE
    from railrl.torch.vae.conv_vae_trainer import ConvVAETrainer
    from railrl.core import logger
    import railrl.torch.pytorch_util as ptu
    from multiworld.core.image_env import ImageEnv
    from railrl.envs.vae_wrappers import VAEWrappedEnv
    from railrl.misc.asset_loader import local_path_from_s3_or_local_path

    logger.remove_tabular_output('progress.csv', relative_to_snapshot_dir=True)
    logger.add_tabular_output('vae_progress.csv',
                              relative_to_snapshot_dir=True)

    env_id = variant['generate_vae_dataset_kwargs'].get('env_id', None)
    if env_id is not None:
        import gym
        env = gym.make(env_id)
    else:
        env_class = variant['generate_vae_dataset_kwargs']['env_class']
        env_kwargs = variant['generate_vae_dataset_kwargs']['env_kwargs']
        env = env_class(**env_kwargs)

    representation_size = variant["representation_size"]
    beta = variant["beta"]
    if 'beta_schedule_kwargs' in variant:
        beta_schedule = PiecewiseLinearSchedule(
            **variant['beta_schedule_kwargs'])
    else:
        beta_schedule = None

    # obtain training and testing data
    dataset_path = variant['generate_vae_dataset_kwargs'].get(
        'dataset_path', None)
    test_p = variant['generate_vae_dataset_kwargs'].get('test_p', 0.9)
    filename = local_path_from_s3_or_local_path(dataset_path)
    dataset = np.load(filename, allow_pickle=True).item()
    N = dataset['obs'].shape[0]
    n = int(N * test_p)
    train_data = {}
    test_data = {}
    for k in dataset.keys():
        train_data[k] = dataset[k][:n, :]
        test_data[k] = dataset[k][n:, :]

    # setup vae
    variant['vae_kwargs']['action_dim'] = train_data['actions'].shape[1]
    if variant.get('vae_type', None) == "VAE-state":
        from railrl.torch.vae.vae import VAE
        input_size = train_data['obs'].shape[1]
        variant['vae_kwargs']['input_size'] = input_size
        m = VAE(representation_size, **variant['vae_kwargs'])
    elif variant.get('vae_type', None) == "VAE2":
        from railrl.torch.vae.conv_vae2 import ConvVAE2
        variant['vae_kwargs']['imsize'] = variant['imsize']
        m = ConvVAE2(representation_size, **variant['vae_kwargs'])
    else:
        variant['vae_kwargs']['imsize'] = variant['imsize']
        m = ConvVAE(representation_size, **variant['vae_kwargs'])
    if ptu.gpu_enabled():
        m.cuda()

    # setup vae trainer
    if variant.get('vae_type', None) == "VAE-state":
        from railrl.torch.vae.vae_trainer import VAETrainer
        t = VAETrainer(train_data,
                       test_data,
                       m,
                       beta=beta,
                       beta_schedule=beta_schedule,
                       **variant['algo_kwargs'])
    else:
        t = ConvVAETrainer(train_data,
                           test_data,
                           m,
                           beta=beta,
                           beta_schedule=beta_schedule,
                           **variant['algo_kwargs'])

    # visualization
    vis_variant = variant.get('vis_kwargs', {})
    save_video = vis_variant.get('save_video', False)
    if isinstance(env, ImageEnv):
        image_env = env
    else:
        image_env = ImageEnv(
            env,
            variant['generate_vae_dataset_kwargs'].get('imsize'),
            init_camera=variant['generate_vae_dataset_kwargs'].get(
                'init_camera'),
            transpose=True,
            normalize=True,
        )
    render = variant.get('render', False)
    reward_params = variant.get("reward_params", dict())
    vae_env = VAEWrappedEnv(image_env,
                            m,
                            imsize=image_env.imsize,
                            decode_goals=render,
                            render_goals=render,
                            render_rollouts=render,
                            reward_params=reward_params,
                            **variant.get('vae_wrapped_env_kwargs', {}))
    vae_env.reset()
    vae_env.add_mode("video_env", 'video_env')
    vae_env.add_mode("video_vae", 'video_vae')
    if save_video:
        import railrl.samplers.rollout_functions as rf
        from railrl.policies.simple import RandomPolicy
        random_policy = RandomPolicy(vae_env.action_space)
        rollout_function = rf.create_rollout_function(
            rf.multitask_rollout,
            max_path_length=100,
            observation_key='latent_observation',
            desired_goal_key='latent_desired_goal',
            vis_list=vis_variant.get('vis_list', []),
            dont_terminate=True,
        )

        dump_video_kwargs = variant.get("dump_video_kwargs", dict())
        dump_video_kwargs['imsize'] = vae_env.imsize
        dump_video_kwargs['vis_list'] = [
            'image_observation',
            'reconstr_image_observation',
            'image_latent_histogram_2d',
            'image_latent_histogram_mu_2d',
            'image_plt',
            'image_rew',
            'image_rew_euclidean',
            'image_rew_mahalanobis',
            'image_rew_logp',
            'image_rew_kl',
            'image_rew_kl_rev',
        ]

    def visualization_post_processing(save_vis, save_video, epoch):
        vis_list = vis_variant.get('vis_list', [])

        if save_vis:
            if vae_env.vae_input_key_prefix == 'state':
                vae_env.dump_reconstructions(epoch,
                                             n_recon=vis_variant.get(
                                                 'n_recon', 16))
            vae_env.dump_samples(epoch,
                                 n_samples=vis_variant.get('n_samples', 64))
            if 'latent_representation' in vis_list:
                vae_env.dump_latent_plots(epoch)
            if any(elem in vis_list for elem in [
                    'latent_histogram', 'latent_histogram_mu',
                    'latent_histogram_2d', 'latent_histogram_mu_2d'
            ]):
                vae_env.compute_latent_histogram()
            if not save_video and ('latent_histogram' in vis_list):
                vae_env.dump_latent_histogram(epoch=epoch,
                                              noisy=True,
                                              use_true_prior=True)
            if not save_video and ('latent_histogram_mu' in vis_list):
                vae_env.dump_latent_histogram(epoch=epoch,
                                              noisy=False,
                                              use_true_prior=True)

        if save_video and save_vis:
            from railrl.envs.vae_wrappers import temporary_mode
            from railrl.misc.video_gen import dump_video
            from railrl.core import logger

            vae_env.compute_goal_encodings()

            logdir = logger.get_snapshot_dir()
            filename = osp.join(logdir,
                                'video_{epoch}.mp4'.format(epoch=epoch))
            variant['dump_video_kwargs']['epoch'] = epoch
            temporary_mode(vae_env,
                           mode='video_env',
                           func=dump_video,
                           args=(vae_env, random_policy, filename,
                                 rollout_function),
                           kwargs=variant['dump_video_kwargs'])
            if not vis_variant.get('save_video_env_only', True):
                filename = osp.join(
                    logdir, 'video_{epoch}_vae.mp4'.format(epoch=epoch))
                temporary_mode(vae_env,
                               mode='video_vae',
                               func=dump_video,
                               args=(vae_env, random_policy, filename,
                                     rollout_function),
                               kwargs=variant['dump_video_kwargs'])

    # train vae
    for epoch in range(variant['num_epochs']):
        #for epoch in range(2000):
        save_vis = (epoch % vis_variant['save_period'] == 0
                    or epoch == variant['num_epochs'] - 1)
        save_vae = (epoch % variant['snapshot_gap'] == 0
                    or epoch == variant['num_epochs'] - 1)

        t.train_epoch(epoch)
        '''if epoch % 500 == 0 or epoch == variant['num_epochs']-1:
           t.test_epoch(
                epoch,
                save_reconstruction=save_vis,
                save_interpolation=save_vis,
                save_vae=save_vae,
            )
        if epoch % 200 == 0 or epoch == variant['num_epochs']-1:
            visualization_post_processing(save_video, save_video, epoch)'''

        t.test_epoch(
            epoch,
            save_reconstruction=save_vis,
            save_interpolation=save_vis,
            save_vae=save_vae,
        )
        if epoch % 300 == 0 or epoch == variant['num_epochs'] - 1:
            visualization_post_processing(save_vis, save_video, epoch)

    logger.save_extra_data(m, 'vae.pkl', mode='pickle')
    logger.remove_tabular_output(
        'vae_progress.csv',
        relative_to_snapshot_dir=True,
    )
    logger.add_tabular_output(
        'progress.csv',
        relative_to_snapshot_dir=True,
    )
    print("finished --------------------!!!!!!!!!!!!!!!")

    return m
def visualize_policy_error(model, env, horizon):
    policy = RandomPolicy(env.action_space)
    actual_state = env.reset()

    predicted_states = []
    actual_states = []

    predicted_state = actual_state
    for _ in range(horizon):
        predicted_states.append(predicted_state.copy())
        actual_states.append(actual_state.copy())

        action, _ = policy.get_action(actual_state)
        delta = get_np_prediction(model, predicted_state, action)
        predicted_state += delta
        actual_state = env.step(action)[0]

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)
    times = np.arange(horizon)

    num_state_dims = env.observation_space.low.size
    dims = list(range(num_state_dims))
    norm = colors.Normalize(vmin=0, vmax=num_state_dims)
    mapper = cm.ScalarMappable(norm=norm, cmap=cm.hsv)

    # Plot the predicted and actual values
    plt.subplot(2, 1, 1)
    for dim in dims:
        plt.plot(
            times,
            predicted_states[:, dim],
            '--',
            label='Predicted, Dim {}'.format(dim),
            color=mapper.to_rgba(dim),
        )
        plt.plot(
            times,
            actual_states[:, dim],
            '-',
            label='Actual, Dim {}'.format(dim),
            color=mapper.to_rgba(dim),
        )
    plt.xlabel("Time Steps")
    plt.ylabel("Observation Value")
    plt.legend(loc='best')

    # Plot the predicted and actual value errors
    plt.subplot(2, 1, 2)
    for dim in dims:
        plt.plot(
            times,
            np.abs(predicted_states[:, dim] - actual_states[:, dim]),
            '-',
            label='Dim {}'.format(dim),
            color=mapper.to_rgba(dim),
        )
    plt.xlabel("Time Steps")
    plt.ylabel("|Predicted - Actual| - Absolute Error")
    plt.legend(loc='best')
    plt.show()

    nrows = min(5, num_state_dims)
    ncols = math.ceil(num_state_dims / nrows)
    fig = plt.figure()
    for dim in dims:
        ax = fig.add_subplot(nrows, ncols, dim+1)
        ax.plot(
            times,
            predicted_states[:, dim],
            '--',
            label='Predicted, Dim {}'.format(dim),
        )
        ax.plot(
            times,
            actual_states[:, dim],
            '-',
            label='Actual, Dim {}'.format(dim),
        )
        ax.set_ylabel("Observation Value")
        ax.set_xlabel("Time Steps")
        ax.set_title("Dim {}".format(dim))
        ax_error = ax.twinx()
        ax_error.plot(
            times,
            np.abs(predicted_states[:, dim] - actual_states[:, dim]),
            '.',
            label='Error, Dim {}'.format(dim),
            color='r',
        )
        ax_error.set_ylabel("Error", color='r')
        ax_error.tick_params('y', colors='r')
        ax.legend(loc='best')
    plt.show()
예제 #12
0
def generate_vae_dataset(
    N=10000,
    test_p=0.9,
    use_cached=True,
    imsize=84,
    show=False,
    dataset_path=None,
    env_class=None,
    env_kwargs=None,
    init_camera=sawyer_door_env_camera,
):
    filename = "/tmp/sawyer_door_push_open_and_reach" + str(N) + ".npy"
    info = {}
    if dataset_path is not None:
        filename = local_path_from_s3_or_local_path(dataset_path)
        dataset = np.load(filename)
    elif use_cached and osp.isfile(filename):
        dataset = np.load(filename)
        print("loaded data from saved file", filename)
    else:
        env = env_class(**env_kwargs)
        env = ImageEnv(
            env,
            imsize,
            transpose=True,
            init_camera=init_camera,
            normalize=True,
        )
        oracle_sampled_data = int(N / 2)
        dataset = np.zeros((N, imsize * imsize * 3))
        print('Goal Space Sampling')
        for i in range(oracle_sampled_data):
            goal = env.sample_goal()
            env.set_to_goal(goal)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
        env._wrapped_env.min_y_pos = .6
        policy = RandomPolicy(env.action_space)
        es = OUStrategy(action_space=env.action_space, theta=0)
        exploration_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=policy,
        )
        print('Random Sampling')
        for i in range(oracle_sampled_data, N):
            if i % 20 == 0:
                env.reset()
                exploration_policy.reset()
            for _ in range(10):
                action = exploration_policy.get_action()[0]
                env.wrapped_env.step(action)
            img = env._get_flat_img()
            dataset[i, :] = img
            if show:
                cv2.imshow('img', img.reshape(3, 84, 84).transpose())
                cv2.waitKey(1)
            print(i)
    n = int(N * test_p)
    train_dataset = dataset[:n, :]
    test_dataset = dataset[n:, :]
    return train_dataset, test_dataset, info