def preprocess_obs_dict(obs_dict): """ Apply internal replay buffer representation changes: save images as bytes """ for obs_key, obs in obs_dict.items(): if 'image' in obs_key and obs is not None: obs_dict[obs_key] = unormalize_image(obs) return obs_dict
def generate_vae_data(variant): env_id = variant.get('env_id', None) N = variant.get('N', 1000) test_p = variant.get('test_p', 0.9) image_size = variant.get('image_size', 84) num_channels = variant.get('num_channels', 3) init_camera = variant.get('init_camera', None) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) n_random_steps = variant.get('n_random_steps', 100) show = variant.get('show', False) import rlkit.torch.pytorch_util as ptu info = {} env = gym.make(env_id) env = ImageEnv(env, image_size, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage=None) dataset = np.zeros((N, image_size * image_size * num_channels), dtype=np.uint8) # print('aa') for i in range(N): if oracle_dataset_using_set_to_goal: goal = env.sample_goal() print(goal) env.set_to_goal(goal) obs = env._get_obs() # print(obs) img = obs['image_observation'] print('length of image arr:', len(img)) ### this block to test image ############################# if show: # print(obs['image_observation']) img = img.reshape(3, image_size, image_size).transpose() print(img.size) img = img[::-1, :, ::-1] img = (img * 255).astype(np.uint8) img = Image.fromarray(img, 'RGB') print(img.size) # print(len(img)) # img.save('/home/manhlt/extra_disk/data/RIG_data/image_'+str(i)+'.png') ############################################################# dataset[i:] = unormalize_image(img) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset
def experiment(variant): from railrl.core import logger import railrl.torch.pytorch_util as ptu beta = variant["beta"] representation_size = variant["representation_size"] train_data, test_data, info = variant['generate_vae_dataset_fn']( variant['generate_vae_dataset_kwargs']) uniform_dataset = load_local_or_remote_file( variant['uniform_dataset_path']).item() uniform_dataset = unormalize_image(uniform_dataset['image_desired_goal']) logger.save_extra_data(info) logger.get_snapshot_dir() if 'beta_schedule_kwargs' in variant: # kwargs = variant['beta_schedule_kwargs'] # kwargs['y_values'][2] = variant['beta'] # kwargs['x_values'][1] = variant['flat_x'] # kwargs['x_values'][2] = variant['ramp_x'] + variant['flat_x'] variant['beta_schedule_kwargs']['y_values'][-1] = variant['beta'] beta_schedule = PiecewiseLinearSchedule( **variant['beta_schedule_kwargs']) else: beta_schedule = None m = variant['vae'](representation_size, decoder_output_activation=nn.Sigmoid(), **variant['vae_kwargs']) m.to(ptu.device) t = ConvVAETrainer(train_data, test_data, m, beta=beta, beta_schedule=beta_schedule, **variant['algo_kwargs']) save_period = variant['save_period'] for epoch in range(variant['num_epochs']): should_save_imgs = (epoch % save_period == 0) t.train_epoch(epoch) t.log_loss_under_uniform( m, uniform_dataset, variant['algo_kwargs']['priority_function_kwargs']) t.test_epoch(epoch, save_reconstruction=should_save_imgs, save_scatterplot=should_save_imgs) if should_save_imgs: t.dump_samples(epoch) if variant['dump_skew_debug_plots']: t.dump_best_reconstruction(epoch) t.dump_worst_reconstruction(epoch) t.dump_sampling_histogram(epoch) t.dump_uniform_imgs_and_reconstructions( dataset=uniform_dataset, epoch=epoch) t.update_train_weights()
def generate_uniform_dataset_pick_and_place(env_class=None, env_kwargs=None, num_imgs=1000, use_cached_dataset=False, init_camera=None, imsize=48, save_file_prefix=None, env_id=None, tag='', dataset_path=None): if dataset_path is not None: dataset = load_local_or_remote_file(dataset_path) return dataset import gym from gym.envs import registration # trigger registration import multiworld.envs.pygame import multiworld.envs.mujoco if not env_class or not env_kwargs: env = gym.make(env_id) else: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) env.non_presampled_goal_img_is_garbage = True if save_file_prefix is None and env_id is not None: save_file_prefix = env_id filename = "/tmp/{}_N{}_imsize{}uniform_images_{}.npy".format( save_file_prefix, str(num_imgs), env.imsize, tag, ) if use_cached_dataset and osp.isfile(filename): images = np.load(filename) print("Loaded data from {}".format(filename)) return images print('Sampling Uniform Dataset') dataset = unormalize_image( get_image_presampled_goals(env, num_imgs)['image_desired_goal']) np.save(filename, dataset) print("Saving file to {}".format(filename)) return dataset
def generate_vae_dataset(variant): env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) test_p = variant.get('test_p', 0.9) use_cached = variant.get('use_cached', True) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) show = variant.get('show', False) init_camera = variant.get('init_camera', None) dataset_path = variant.get('dataset_path', None) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) policy_file = variant.get('policy_file', None) n_random_steps = variant.get('n_random_steps', 100) vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) save_file_prefix = variant.get('save_file_prefix', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) tag = variant.get('tag', '') from multiworld.core.image_env import ImageEnv, unormalize_image import rlkit.torch.pytorch_util as ptu info = {} if dataset_path is not None: dataset = load_local_or_remote_file(dataset_path) N = dataset.shape[0] else: if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera else '', imsize, random_and_oracle_policy_data_split, tag, ) if use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if random_and_oracle_policy_data: policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) if random_rollout_data: from rlkit.exploration_strategies.ou_strategy import OUStrategy policy = OUStrategy(env.action_space) dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) for i in range(N): if random_and_oracle_policy_data: num_random_steps = int(N * random_and_oracle_policy_data_split) if i < num_random_steps: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] else: obs = env.reset() policy.reset() for _ in range(n_random_steps): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) elif oracle_dataset_using_set_to_goal: print(i) goal = env.sample_goal() env.set_to_goal(goal) obs = env._get_obs() elif random_rollout_data: if i % n_random_steps == 0: g = dict( state_desired_goal=env.sample_goal_for_rollout()) env.set_to_goal(g) policy.reset() # env.reset() u = policy.get_action_from_raw_action( env.action_space.sample()) obs = env.step(u)[0] else: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] img = obs['image_observation'] dataset[i, :] = unormalize_image(img) if show: img = img.reshape(3, imsize, imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_LSTM_vae_only_dataset(variant, segmented=False, segmentation_method='color'): from multiworld.core.image_env import ImageEnv, unormalize_image env_id = variant.get('env_id', None) N = variant.get('N', 500) test_p = variant.get('test_p', 0.9) imsize = variant.get('imsize', 48) num_channels = variant.get('num_channels', 3) init_camera = variant.get('init_camera', None) occlusion_prob = variant.get('occlusion_prob', 0) occlusion_level = variant.get('occlusion_level', 0.5) segmentation_kwargs = variant.get('segmentation_kwargs', {}) if segmentation_kwargs.get('segment') is not None: segmented = segmentation_kwargs.get('segment') assert env_id is not None, 'you must provide an env id!' obj = 'puck-pos' if env_id == 'SawyerDoorHookResetFreeEnv-v1': obj = 'door-angle' pjhome = os.environ['PJHOME'] if segmented: if 'unet' in segmentation_method: seg_name = 'seg-unet' else: seg_name = 'seg-' + segmentation_method else: seg_name = 'no-seg' if env_id == 'SawyerDoorHookResetFreeEnv-v1': seg_name += '-2' data_file_path = osp.join( pjhome, 'data/local/pre-train-lstm', 'vae-only-{}-{}-{}-{}-{}.npy'.format(env_id, seg_name, N, occlusion_prob, occlusion_level)) obj_state_path = osp.join( pjhome, 'data/local/pre-train-lstm', 'vae-only-{}-{}-{}-{}-{}-{}.npy'.format(env_id, seg_name, N, occlusion_prob, occlusion_level, obj)) print(data_file_path) if osp.exists(data_file_path): all_data = np.load(data_file_path) if len(all_data) >= N: print("load stored data at: ", data_file_path) n = int(len(all_data) * test_p) train_dataset = all_data[:n] test_dataset = all_data[n:] obj_states = np.load(obj_state_path) info = {'obj_state': obj_states} return train_dataset, test_dataset, info if segmented: print( "generating lstm vae pretrain only dataset with segmented images using method: ", segmentation_method) if segmentation_method == 'unet': segment_func = segment_image_unet else: raise NotImplementedError else: print("generating lstm vae pretrain only dataset with original images") info = {} dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) imgs = [] obj_states = None if env_id == 'SawyerDoorHookResetFreeEnv-v1': from rlkit.util.io import load_local_or_remote_file pjhome = os.environ['PJHOME'] pre_sampled_goal_path = osp.join( pjhome, 'data/local/pre-train-vae/door_original_dataset.npy') goal_dict = np.load(pre_sampled_goal_path, allow_pickle=True).item() imgs = goal_dict['image_desired_goal'] door_angles = goal_dict['state_desired_goal'][:, -1] obj_states = door_angles[:, np.newaxis] elif env_id == 'SawyerPickupEnvYZEasy-v0': from rlkit.util.io import load_local_or_remote_file pjhome = os.environ['PJHOME'] pre_sampled_goal_path = osp.join( pjhome, 'data/local/pre-train-vae/pickup-original-dataset.npy') goal_dict = load_local_or_remote_file(pre_sampled_goal_path).item() imgs = goal_dict['image_desired_goal'] puck_pos = goal_dict['state_desired_goal'][:, 3:] obj_states = puck_pos else: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) env.reset() info['env'] = env puck_pos = np.zeros((N, 2), dtype=np.float) for i in range(N): print("lstm vae pretrain only dataset generation, number: ", i) if env_id == 'SawyerPushHurdle-v0': obs, puck_p = _generate_sawyerhurdle_dataset( env, return_puck_pos=True, segmented=segmented) elif env_id == 'SawyerPushHurdleMiddle-v0': obs, puck_p = _generate_sawyerhurdlemiddle_dataset( env, return_puck_pos=True) elif env_id == 'SawyerPushNIPSEasy-v0': obs, puck_p = _generate_sawyerpushnipseasy_dataset( env, return_puck_pos=True) elif env_id == 'SawyerPushHurdleResetFreeEnv-v0': obs, puck_p = _generate_sawyerhurldeblockresetfree_dataset( env, return_puck_pos=True) else: raise NotImplementedError img = obs[ 'image_observation'] # NOTE: this is already normalized image, of detype np.float64. imgs.append(img) puck_pos[i] = puck_p obj_states = puck_pos # now we segment the images for i in range(N): print("segmenting image ", i) img = imgs[i] if segmented: dataset[i, :] = segment_func(img, normalize=False, **segmentation_kwargs) p = np.random.rand( ) # manually drop some images, so as to make occlusions if p < occlusion_prob: mask = (np.random.uniform(low=0, high=1, size=(imsize, imsize)) > occlusion_level).astype(np.uint8) img = dataset[i].reshape(3, imsize, imsize).transpose() img[mask < 1] = 0 dataset[i] = img.transpose().flatten() else: dataset[i, :] = unormalize_image(img) # add the trajectory dimension dataset = dataset[:, np.newaxis, :] # batch_size x traj_len = 1 x imlen obj_states = obj_states[:, np.newaxis, :] # batch_size x traj_len = 1 x imlen info['obj_state'] = obj_states n = int(N * test_p) train_dataset = dataset[:n] test_dataset = dataset[n:] if N >= 500: print('save data to: ', data_file_path) all_data = np.concatenate([train_dataset, test_dataset], axis=0) np.save(data_file_path, all_data) np.save(obj_state_path, obj_states) return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerPushAndPullDoorEnv, env_kwargs=None, action_plus_random_sampling=False, init_camera=sawyer_door_env_camera, ratio_action_sample_to_random=1 / 2, env_id=None, ): if policy_path is not None: filename = "/tmp/sawyer_door_push_and_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str( N) + ".npy" elif action_space_sampling: filename = "/tmp/sawyer_door_push_and_pull_open_zoomed_in_action_space_sampling" + str( N) + ".npy" else: filename = "/tmp/sawyer_door_push_and_pull_open" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) elif action_plus_random_sampling: if env_id is not None: import gym env = gym.make(env_id) else: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) action_sampled_data = int(N * ratio_action_sample_to_random) dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) print('Action Space Sampling') for i in range(action_sampled_data): goal = env.sample_goal() env.set_to_goal(goal) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) print('Random Sampling') for i in range(action_sampled_data, N): if i % 20 == 0: env.reset() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step(action) goal = env.sample_goal() env.set_to_goal_angle(goal['state_desired_goal']) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .5 info['env'] = env else: raise NotImplementedError() n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset_from_params( env_class=None, env_kwargs=None, env_id=None, N=10000, test_p=0.9, use_cached=True, imsize=84, num_channels=1, show=False, init_camera=None, dataset_path=None, oracle_dataset=False, n_random_steps=100, vae_dataset_specific_env_kwargs=None, save_file_prefix=None, ): from multiworld.core.image_env import ImageEnv, unormalize_image import time assert oracle_dataset == True if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_oracle{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera else '', imsize, oracle_dataset, ) info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) np.random.shuffle(dataset) N = dataset.shape[0] elif use_cached and osp.isfile(filename): dataset = np.load(filename) np.random.shuffle(dataset) print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) setup_pickup_image_env(env, num_presampled_goals=N) env.reset() info['env'] = env dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) for i in range(N): img = env._presampled_goals['image_desired_goal'][i] dataset[i, :] = unormalize_image(img) if show: img = img.reshape(3, imsize, imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) time.sleep(.2) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.random.shuffle(dataset) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, action_space_sampling=False, init_camera=None, env_class=None, env_kwargs=None, ): filename = "/tmp/sawyer_xyz_pos_control_new_zoom_cam" + str(N) + '.npy' info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_kwargs == None: env_kwargs = dict() env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) if action_space_sampling: action_space = Box(np.array([-.1, .5, 0]), np.array([.1, .7, .5])) for i in range(N): env.set_to_goal(env.sample_goal()) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) info['env'] = env else: policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) for i in range(N): # Move the goal out of the image env.wrapped_env.set_goal(np.array([100, 100, 100])) if i % 50 == 0: print('Reset') env.reset() exploration_policy.reset() for _ in range(1): action = exploration_policy.get_action()[0] * 10 env.wrapped_env.step(action) img = env.step(env.action_space.sample())[0] dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset(variant): print(variant) from tqdm import tqdm env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) batch_size = variant.get('batch_size', 128) test_p = variant.get('test_p', 0.9) use_cached = variant.get('use_cached', True) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) show = variant.get('show', False) init_camera = variant.get('init_camera', None) dataset_path = variant.get('dataset_path', None) augment_data = variant.get('augment_data', False) data_filter_fn = variant.get('data_filter_fn', lambda x: x) delete_after_loading = variant.get('delete_after_loading', False) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_rollout_data_set_to_goal = variant.get( 'random_rollout_data_set_to_goal', True) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) policy_file = variant.get('policy_file', None) n_random_steps = variant.get('n_random_steps', 100) vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) save_file_prefix = variant.get('save_file_prefix', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) conditional_vae_dataset = variant.get('conditional_vae_dataset', False) use_env_labels = variant.get('use_env_labels', False) use_linear_dynamics = variant.get('use_linear_dynamics', False) enviorment_dataset = variant.get('enviorment_dataset', False) save_trajectories = variant.get('save_trajectories', False) save_trajectories = save_trajectories or use_linear_dynamics or conditional_vae_dataset tag = variant.get('tag', '') assert N % n_random_steps == 0, "Fix N/horizon or dataset generation will fail" from multiworld.core.image_env import ImageEnv, unormalize_image import rlkit.torch.pytorch_util as ptu from rlkit.util.io import load_local_or_remote_file from rlkit.data_management.dataset import ( TrajectoryDataset, ImageObservationDataset, InitialObservationDataset, EnvironmentDataset, ConditionalDynamicsDataset, InitialObservationNumpyDataset, InfiniteBatchLoader, InitialObservationNumpyJitteringDataset) info = {} use_test_dataset = False if dataset_path is not None: if type(dataset_path) == str: dataset = load_local_or_remote_file( dataset_path, delete_after_loading=delete_after_loading) dataset = dataset.item() N = dataset['observations'].shape[0] * dataset[ 'observations'].shape[1] n_random_steps = dataset['observations'].shape[1] if isinstance(dataset_path, list): dataset = concatenate_datasets(dataset_path) N = dataset['observations'].shape[0] * dataset[ 'observations'].shape[1] n_random_steps = dataset['observations'].shape[1] if isinstance(dataset_path, dict): if type(dataset_path['train']) == str: dataset = load_local_or_remote_file( dataset_path['train'], delete_after_loading=delete_after_loading) dataset = dataset.item() elif isinstance(dataset_path['train'], list): dataset = concatenate_datasets(dataset_path['train']) if type(dataset_path['test']) == str: test_dataset = load_local_or_remote_file( dataset_path['test'], delete_after_loading=delete_after_loading) test_dataset = test_dataset.item() elif isinstance(dataset_path['test'], list): test_dataset = concatenate_datasets(dataset_path['test']) N = dataset['observations'].shape[0] * dataset[ 'observations'].shape[1] n_random_steps = dataset['observations'].shape[1] use_test_dataset = True else: if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera and hasattr(init_camera, '__name__') else '', imsize, random_and_oracle_policy_data_split, tag, ) if use_cached and osp.isfile(filename): dataset = load_local_or_remote_file( filename, delete_after_loading=delete_after_loading) if conditional_vae_dataset: dataset = dataset.item() print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if random_and_oracle_policy_data: policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) if random_rollout_data: from rlkit.exploration_strategies.ou_strategy import OUStrategy policy = OUStrategy(env.action_space) if save_trajectories: dataset = { 'observations': np.zeros((N // n_random_steps, n_random_steps, imsize * imsize * num_channels), dtype=np.uint8), 'actions': np.zeros((N // n_random_steps, n_random_steps, env.action_space.shape[0]), dtype=np.float), 'env': np.zeros( (N // n_random_steps, imsize * imsize * num_channels), dtype=np.uint8), } else: dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) labels = [] for i in tqdm(range(N)): if random_and_oracle_policy_data: num_random_steps = int(N * random_and_oracle_policy_data_split) if i < num_random_steps: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] else: obs = env.reset() policy.reset() for _ in range(n_random_steps): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) elif random_rollout_data: #ADD DATA WHERE JUST PUCK MOVES if i % n_random_steps == 0: env.reset() policy.reset() env_img = env._get_obs()['image_observation'] if random_rollout_data_set_to_goal: env.set_to_goal(env.get_goal()) obs = env._get_obs() u = policy.get_action_from_raw_action( env.action_space.sample()) env.step(u) elif oracle_dataset_using_set_to_goal: print(i) goal = env.sample_goal() env.set_to_goal(goal) obs = env._get_obs() else: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] img = obs['image_observation'] if use_env_labels: labels.append(obs['label']) if save_trajectories: dataset['observations'][ i // n_random_steps, i % n_random_steps, :] = unormalize_image(img) dataset['actions'][i // n_random_steps, i % n_random_steps, :] = u dataset['env'][i // n_random_steps, :] = unormalize_image( env_img) else: dataset[i, :] = unormalize_image(img) if show: img = img.reshape(3, imsize, imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.save(filename, dataset) #np.save(filename[:-4] + 'labels.npy', np.array(labels)) info['train_labels'] = [] info['test_labels'] = [] dataset = data_filter_fn(dataset) if use_linear_dynamics and conditional_vae_dataset: num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) train_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][:n, :, :], 'actions': dataset['actions'][:n, :, :], 'env': dataset['env'][:n, :] }) test_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][n:, :, :], 'actions': dataset['actions'][n:, :, :], 'env': dataset['env'][n:, :] }) num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) indices = np.arange(num_trajectories) np.random.shuffle(indices) train_i, test_i = indices[:n], indices[n:] try: train_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][train_i, :, :], 'actions': dataset['actions'][train_i, :, :], 'env': dataset['env'][train_i, :] }) test_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][test_i, :, :], 'actions': dataset['actions'][test_i, :, :], 'env': dataset['env'][test_i, :] }) except: train_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][train_i, :, :], 'actions': dataset['actions'][train_i, :, :], }) test_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][test_i, :, :], 'actions': dataset['actions'][test_i, :, :], }) elif use_linear_dynamics: num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) train_dataset = TrajectoryDataset({ 'observations': dataset['observations'][:n, :, :], 'actions': dataset['actions'][:n, :, :] }) test_dataset = TrajectoryDataset({ 'observations': dataset['observations'][n:, :, :], 'actions': dataset['actions'][n:, :, :] }) elif enviorment_dataset: n = int(n_random_steps * test_p) train_dataset = EnvironmentDataset({ 'observations': dataset['observations'][:, :n, :], }) test_dataset = EnvironmentDataset({ 'observations': dataset['observations'][:, n:, :], }) elif conditional_vae_dataset: num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) indices = np.arange(num_trajectories) np.random.shuffle(indices) train_i, test_i = indices[:n], indices[n:] if augment_data: dataset_class = InitialObservationNumpyJitteringDataset else: dataset_class = InitialObservationNumpyDataset if 'env' not in dataset: dataset['env'] = dataset['observations'][:, 0] if use_test_dataset and ('env' not in test_dataset): test_dataset['env'] = test_dataset['observations'][:, 0] if use_test_dataset: train_dataset = dataset_class({ 'observations': dataset['observations'], 'env': dataset['env'] }) test_dataset = dataset_class({ 'observations': test_dataset['observations'], 'env': test_dataset['env'] }) else: train_dataset = dataset_class({ 'observations': dataset['observations'][train_i, :, :], 'env': dataset['env'][train_i, :] }) test_dataset = dataset_class({ 'observations': dataset['observations'][test_i, :, :], 'env': dataset['env'][test_i, :] }) train_batch_loader_kwargs = variant.get( 'train_batch_loader_kwargs', dict( batch_size=batch_size, num_workers=0, )) test_batch_loader_kwargs = variant.get( 'test_batch_loader_kwargs', dict( batch_size=batch_size, num_workers=0, )) train_data_loader = data.DataLoader(train_dataset, shuffle=True, drop_last=True, **train_batch_loader_kwargs) test_data_loader = data.DataLoader(test_dataset, shuffle=True, drop_last=True, **test_batch_loader_kwargs) train_dataset = InfiniteBatchLoader(train_data_loader) test_dataset = InfiniteBatchLoader(test_data_loader) else: n = int(N * test_p) train_dataset = ImageObservationDataset(dataset[:n, :]) test_dataset = ImageObservationDataset(dataset[n:, :]) return train_dataset, test_dataset, info
def generate_vae_dataset(variant): """ If not provided a pre-train vae dataset generation function, this function will be used to collect the dataset for training vae. """ import rlkit.torch.pytorch_util as ptu import gym import multiworld multiworld.register_all_envs() print("generating vae dataset with original images") env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) test_p = variant.get('test_p', 0.9) use_cached = variant.get('use_cached', True) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) show = variant.get('show', False) init_camera = variant.get('init_camera', None) dataset_path = variant.get('dataset_path', None) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) policy_file = variant.get('policy_file', None) n_random_steps = variant.get('n_random_steps', 100) vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) save_file_prefix = variant.get('save_file_prefix', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) tag = variant.get('tag', '') info = {} if dataset_path is not None: print('load vae training dataset from: ', dataset_path) pjhome = os.environ['PJHOME'] dataset = np.load(osp.join(pjhome, dataset_path), allow_pickle=True).item() if isinstance(dataset, dict): dataset = dataset['image_desired_goal'] dataset = unormalize_image(dataset) N = dataset.shape[0] else: if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera else '', imsize, random_and_oracle_policy_data_split, tag, ) if use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if random_and_oracle_policy_data: policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) if random_rollout_data: from rlkit.exploration_strategies.ou_strategy import OUStrategy policy = OUStrategy(env.action_space) dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) for i in range(N): if random_and_oracle_policy_data: num_random_steps = int(N * random_and_oracle_policy_data_split) if i < num_random_steps: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] else: obs = env.reset() policy.reset() for _ in range(n_random_steps): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) elif oracle_dataset_using_set_to_goal: print(i) goal = env.sample_goal() env.set_to_goal(goal) obs = env._get_obs() elif random_rollout_data: if i % n_random_steps == 0: g = dict( state_desired_goal=env.sample_goal_for_rollout()) env.set_to_goal(g) policy.reset() # env.reset() u = policy.get_action_from_raw_action( env.action_space.sample()) obs = env.step(u)[0] else: print("using totally random rollouts") for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] img = obs[ 'image_observation'] # NOTE yufei: this is already normalized image, of detype np.float64. dataset[i, :] = unormalize_image(img) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerDoorEnv, env_kwargs=None, init_camera=sawyer_door_env_camera_v2, ): if policy_path is not None: filename = "/tmp/sawyer_door_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str( N) + ".npy" elif action_space_sampling: filename = "/tmp/sawyer_door_pull_open_zoomed_in_action_space_sampling" + str( N) + ".npy" else: filename = "/tmp/sawyer_door_pull_open" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) info['env'] = env policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) env.wrapped_env.reset() dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) for i in range(N): if i % 20 == 0: env.reset_model() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step(action) # env.set_to_goal_angle(env.get_goal()['state_desired_goal']) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_uniform_dataset_door(num_imgs=1000, use_cached_dataset=False, init_camera=None, imsize=48, policy_file=None, show=False, path_length=100, save_file_prefix=None, env_id=None, tag='', dataset_path=None): if dataset_path is not None: dataset = load_local_or_remote_file(dataset_path) return dataset import gym from gym.envs import registration # trigger registration import multiworld.envs.pygame import multiworld.envs.mujoco env = gym.make(env_id) env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) env.non_presampled_goal_img_is_garbage = True if save_file_prefix is None and env_id is not None: save_file_prefix = env_id filename = "/tmp/{}_N{}_imsize{}uniform_images_{}.npy".format( save_file_prefix, str(num_imgs), env.imsize, tag, ) if use_cached_dataset and osp.isfile(filename): images = np.load(filename) print("Loaded data from {}".format(filename)) return images policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) print('Sampling Uniform Dataset') dataset = np.zeros((num_imgs, 3 * env.imsize**2), dtype=np.uint8) for j in range(num_imgs): obs = env.reset() policy.reset() for i in range(path_length): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) img_f = obs['image_observation'] if show: img = obs['image_observation'] img = img.reshape(3, env.imsize, env.imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) print(j) dataset[j, :] = unormalize_image(img_f) temp = env.reset_free env.reset_free = True env.reset() env.reset_free = temp np.save(filename, dataset) print("Saving file to {}".format(filename)) return dataset
def generate_vae_dataset(variant): import cv2 env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) use_images = variant.get('use_images', True) imsize = variant.get('imsize', 84) show = variant.get('show', False) init_camera = variant.get('init_camera', None) oracle_dataset = variant.get('oracle_dataset', False) if 'n_random_steps' in variant: n_random_steps = variant['n_random_steps'] else: if oracle_dataset: n_random_steps = 3 else: n_random_steps = 100 vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) from multiworld.core.image_env import ImageEnv, unormalize_image info = {} from railrl.core import logger logdir = logger.get_snapshot_dir() filename = osp.join(logdir, "vae_dataset.npy") now = time.time() if env_id is not None: import gym env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if use_images: data_size = len(env.observation_space.spaces['image_observation'].low) dtype = np.uint8 else: data_size = len(env.observation_space.spaces['state_observation'].low) dtype = np.float32 state_size = len(env.observation_space.spaces['state_observation'].low) dataset = { 'obs': np.zeros((N, data_size), dtype=dtype), 'actions': np.zeros((N, len(env.action_space.low)), dtype=np.float32), 'next_obs': np.zeros((N, data_size), dtype=dtype), 'obs_state': np.zeros((N, state_size), dtype=np.float32), 'next_obs_state': np.zeros((N, state_size), dtype=np.float32), } for i in range(N): if i % (N / 50) == 0: print(i) if oracle_dataset: if i % 100 == 0: env.reset() goal = env.sample_goal() env.set_to_goal(goal) for _ in range(n_random_steps): env.step(env.action_space.sample()) else: env.reset() for _ in range(n_random_steps): env.step(env.action_space.sample()) obs = env._get_obs() if use_images: dataset['obs'][i, :] = unormalize_image(obs['image_observation']) else: dataset['obs'][i, :] = obs['state_observation'] dataset['obs_state'][i, :] = obs['state_observation'] action = env.action_space.sample() dataset['actions'][i, :] = action obs = env.step(action)[0] img = obs['image_observation'] if use_images: dataset['next_obs'][i, :] = unormalize_image(img) else: dataset['next_obs'][i, :] = obs['state_observation'] dataset['next_obs_state'][i, :] = obs['state_observation'] show = True #todo del own if show: img = img.reshape(3, imsize, imsize).transpose((1, 2, 0)) img = img[::, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1000) print("keys and shapes:") for k in dataset.keys(): print(k, dataset[k].shape) print("done making training data", filename, time.time() - now) np.save(filename, dataset)
def generate_sawyerhurdle_dataset(variant, segmented=False, segmentation_method='unet'): from multiworld.core.image_env import ImageEnv, unormalize_image env_id = variant.get('env_id', None) N = variant.get('N', 10000) test_p = variant.get('test_p', 0.9) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) init_camera = variant.get('init_camera', None) segmentation_kwargs = variant.get('segmentation_kwargs', {}) pjhome = os.environ['PJHOME'] seg_name = 'seg-' + segmentation_method if segmented else 'no-seg' data_file_path = osp.join(pjhome, 'data/local/pre-train-vae', '{}-{}-{}.npy'.format(env_id, seg_name, N)) puck_pos_path = osp.join( pjhome, 'data/local/pre-train-vae', '{}-{}-{}-puck-pos.npy'.format(env_id, seg_name, N)) if osp.exists(data_file_path): all_data = np.load(data_file_path) if len(all_data) >= N: print("load stored data at: ", data_file_path) n = int(len(all_data) * test_p) train_dataset = all_data[:n] test_dataset = all_data[n:] puck_pos = np.load(puck_pos_path) info = {'puck_pos': puck_pos} return train_dataset, test_dataset, info if segmented: print("generating vae dataset with segmented images using method: ", segmentation_method) if segmentation_method == 'unet': segment_func = segment_image_unet else: raise NotImplementedError else: print("generating vae dataset with original images") assert env_id is not None import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) info = {} env.reset() info['env'] = env dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) puck_pos = np.zeros((N, 2), dtype=np.float) for i in range(N): print("sawyer hurdle custom vae data set generation, number: ", i) if env_id == 'SawyerPushHurdle-v0': obs, puck_p = _generate_sawyerhurdle_dataset(env, return_puck_pos=True) elif env_id == 'SawyerPushHurdleMiddle-v0': obs, puck_p = _generate_sawyerhurdlemiddle_dataset( env, return_puck_pos=True) else: raise NotImplementedError img = obs[ 'image_observation'] # NOTE yufei: this is already normalized image, of detype np.float64. if segmented: dataset[i, :] = segment_func(img, normalize=False, **segmentation_kwargs) else: dataset[i, :] = unormalize_image(img) puck_pos[i] = puck_p n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] info['puck_pos'] = puck_pos if N >= 2000: print('save data to: ', data_file_path) all_data = np.concatenate([train_dataset, test_dataset], axis=0) np.save(data_file_path, all_data) np.save(puck_pos_path, puck_pos) return train_dataset, test_dataset, info