def __init__(self, opt): """Initialize this dataset class. Parameters: opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions """ all_robonet = load_metadata( "/data/vision/billf/scratch/yilundu/robonet/hdf5") sess = tf.InteractiveSession() database = all_robonet[all_robonet['adim'] == 4] self.database = database data = RoboNetDataset(batch_size=opt.batch_size, dataset_files_or_metadata=database, hparams={ 'img_size': [1024, 1024], 'load_T': 2, 'target_adim': 4, 'action_mismatch': 1 }) self.data = data self.images = data['images'] self.sess = sess self.dataloader = self self.full_robonet = opt.full_robonet
def make_dataloaders(self, config): DatasetClass = get_dataset_class(self.dataset_hparams.pop('dataset')) # data from new domain new_domain_metadata = self._filter_metadata(load_metadata(config['data_directory'])) # data from old domain old_domain_metadata = self._filter_metadata(load_metadata(config['batchmix_basedata'])) old_metadata_list = [] for m in old_domain_metadata: if m['robot'].frame.unique().tolist()[0] in self._hparams.robot_set: print('using robot', m['robot'].frame.unique().tolist()) old_metadata_list.append(m) assert len(new_domain_metadata) == 1 metadata_list = new_domain_metadata*len(old_metadata_list) + old_metadata_list # make sure that we're using the same amount of data from old and new return self._get_input_targets(DatasetClass, metadata_list, self.dataset_hparams)
def collect_same_viewpoint(robot, directory): hparams = tf.contrib.training.HParams(**default_loader_hparams()) meta_data = load_metadata(directory) exp_same_view = {} for f in os.listdir(directory): if robot in f: path = directory + f print(path) _, _, _, _, _, viewpoint = load_data_customized( path, meta_data.get_file_metadata(path), hparams) if viewpoint not in exp_same_view: exp_same_view[viewpoint] = [path] else: exp_same_view[viewpoint].append(path) return exp_same_view
description="tests hdf5 data loader without tensorflow dataset wrapper" ) parser.add_argument('file', type=str, help="path to hdf5 you want to load") parser.add_argument('--load_annotations', action='store_true', help="loads annotations if supplied") parser.add_argument( '--load_steps', type=int, default=0, help="loads <load_steps> steps from the dataset instead of everything") args = parser.parse_args() assert 'hdf5' in args.file data_folder = '/'.join(args.file.split('/')[:-1]) meta_data = datasets.load_metadata(data_folder) hparams = tf.contrib.training.HParams(**default_loader_hparams()) hparams.load_T = args.load_steps if args.load_annotations: hparams.load_annotations = True print(meta_data[meta_data['contains_annotation'] == True]) meta_data = meta_data[meta_data['contains_annotation'] == True] imgs, actions, states, annot = load_data( (args.file, meta_data.get_file_metadata(args.file)), hparams) else: imgs, actions, states = load_data( (args.file, meta_data.get_file_metadata(args.file)), hparams) print('actions', actions.shape) print('states', states.shape)
import tensorflow as tf sess = tf.Session() hparams = { 'RNG': 0, 'ret_fnames': True, 'load_T': 30, 'load_random_cam': True, 'sub_batch_size': 1, 'action_mismatch': 3, 'state_mismatch': 3, 'splits': [0.8, 0.1, 0.1], 'same_cam_across_sub_batch': True } all_robonet = load_metadata('/iris/u/surajn/data/robonet/hdf5') database = all_robonet[all_robonet['robot'] == 'sawyer'] database = database[database['adim'] == 4] data = RoboNetDataset(batch_size=1, dataset_files_or_metadata=database, hparams=hparams) train_images = tf.reshape( tf.image.resize(tf.reshape(data['images', 'train'], [1 * 30, 48, 64, 3]), size=(64, 64)), [1, 30, 64, 64, 3]) * 255.0 train_actions = data['actions', 'train'] test_images = tf.reshape( tf.image.resize(tf.reshape(data['images', 'test'], [1 * 30, 48, 64, 3]), size=(64, 64)), [1, 30, 64, 64, 3]) * 255.0 test_actions = data['actions', 'test'] # DATA_URL = ("/cvgl2/u/surajn/data/sv2p_train_data.hdf5")
"color_augmentation": 0.3, # std of color augmentation (set to 0 for no augmentations) "RNG": 0, "ret_fnames": True, "load_T": args.load_steps, "sub_batch_size": 8, "action_mismatch": 3, "state_mismatch": 3, "splits": [0.8, 0.1, 0.1], "same_cam_across_sub_batch": True, } if args.robots: from robonet.datasets import load_metadata meta_data = load_metadata(args.path) hparams["same_cam_across_sub_batch"] = True ds = RoboNetDataset( args.batch_size, [meta_data[meta_data["robot"] == r] for r in args.robots], hparams=hparams, ) else: ds = RoboNetDataset(args.batch_size, args.path, hparams=hparams) # TODO: Rewrite or deprecate for PyTorch # if args.time_test: # _timing_test(args.time_test, ds) # exit(0) # Uncomment if you want to test the the multiprocess DataLoader
def _init_sources(self): loaded_metadata = {} sources, source_probs = [], [] for source in self._batch_config: source_hparams = self._default_source_hparams() source_hparams.update(source) dir_path = os.path.realpath( os.path.expanduser(source_hparams['data_directory'])) meta_data = loaded_metadata[dir_path] = loaded_metadata.get( dir_path, load_metadata(dir_path)) for k, v in source_hparams.items(): if k not in self._default_source_hparams(): if k == 'object_classes': meta_data = meta_data.select_objects(v) elif isinstance(v, (list, tuple)): meta_data = meta_data[meta_data[k].frame.isin(v)] else: meta_data = meta_data[meta_data[k] == v] assert len(meta_data), "filters created empty data source!" if source_hparams['balance_by_attribute']: meta_data = [meta_data] for k in source_hparams['balance_by_attribute']: new_data = [] for m in meta_data: unique_elems = m[k].frame.unique().tolist() new_data.extend([m[m[k] == u] for u in unique_elems]) meta_data = new_data if source_hparams['source_prob']: new_prob = source_hparams['source_prob'] / float( len(meta_data)) source_hparams['source_prob'] = [ new_prob for _ in range(len(meta_data)) ] else: source_hparams['source_prob'] = [ None for _ in range(len(meta_data)) ] sources.extend(meta_data) source_probs.extend(source_hparams['source_prob']) else: source_probs.append(source_hparams['source_prob']) sources.append(meta_data) if any([s is not None for s in source_probs]): set_probs = [s for s in source_probs if s is not None] assert all([ 0 <= s <= 1 for s in set_probs ]) and sum(set_probs) <= 1, "invalid probability distribution!" if len(set_probs) != len(source_probs): remainder_prob = (1.0 - sum(set_probs)) / (len(source_probs) - len(set_probs)) for i in range(len(source_probs)): if source_probs[i] is None: source_probs[i] = remainder_prob else: source_probs = None return sources, source_probs
import tensorflow as tf from robonet.datasets.robonet_dataset import RoboNetDataset from robonet.datasets import load_metadata if __name__ == "__main__": sess = tf.InteractiveSession() all_robonet = load_metadata( "/data/vision/billf/scratch/yilundu/robonet/hdf5") database = all_robonet[all_robonet['adim'] == 4] data = RoboNetDataset(batch_size=16, dataset_files_or_metadata=database, hparams={ 'img_size': [1024, 1024], 'load_T': 2, 'target_adim': 4, 'action_mismatch': 1 }) images = data['images'] real_image = sess.run(images) import pdb pdb.set_trace() print("here") assert False
def get_data(dataset, mode, dataset_dir, batch_size=32, sequence_length_train=12, sequence_length_test=12, shuffle=True, initializable=False): assert dataset in ['bair', 'google', 'robonet'] assert mode in ['train', 'val', 'test'] if dataset == 'bair': d = BairDataReader(dataset_dir=dataset_dir, batch_size=batch_size, use_state=1, sequence_length_train=sequence_length_train, sequence_length_test=sequence_length_test, shuffle=shuffle, batch_repeat=1, initializable=initializable) elif dataset == 'google': d = GooglePushDataReader( dataset_dir=dataset_dir, # '/media/Data/datasets/google_push/push/', batch_size=batch_size, sequence_length_train=sequence_length_train, sequence_length_test=sequence_length_test, shuffle=shuffle, train_dir_name='push_train', test_dir_name='push_train', batch_repeat=1) elif dataset == 'robonet': train_database = load_metadata(os.path.expanduser('~/'), 'RoboNet/hdf5/train') val_database = load_metadata(os.path.expanduser('~/'), 'RoboNet/hdf5/val2') train_database = train_database[train_database['robot'] == 'fetch'] d_train = RoboNetDataset(batch_size=batch_size, dataset_files_or_metadata=train_database, hparams={ 'img_size': [64, 64], 'target_adim': 2, 'target_sdim': 3 }) d_val = RoboNetDataset(batch_size=batch_size, dataset_files_or_metadata=val_database, hparams={ 'img_size': [64, 64], 'target_adim': 2, 'target_sdim': 3 }) d.train_filenames = [ '/media/Data/datasets/bair/softmotion30_44k/train/traj_10174_to_10429.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_1024_to_1279.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_10430_to_10685.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_10686_to_10941.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_10942_to_11197.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_11198_to_11453.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_11454_to_11709.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_11710_to_11965.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_11966_to_12221.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_12222_to_12477.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_12478_to_12733.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_12734_to_12989.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_1280_to_1535.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_12990_to_13245.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_13341_to_13596.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_13597_to_13852.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_13853_to_14108.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_14109_to_14364.tfrecords' ] d.val_filenames = [ '/media/Data/datasets/bair/softmotion30_44k/train/traj_5983_to_6238.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_6239_to_6494.tfrecords', '/media/Data/datasets/bair/softmotion30_44k/train/traj_6495_to_6750.tfrecords' ] if dataset == 'robonet': frames = tf.squeeze( d_train['images']) # images, states, and actions are from paired actions = d_train['actions'] states = d_train['states'] val_frames = tf.squeeze(d_val['images']) val_actions = d_val['actions'] val_states = d_val['states'] steps = 545 val_steps = 545 else: steps = d.num_examples_per_epoch(mode) // d.batch_size iterator = d.build_tf_iterator(mode=mode) input_get_next_op = iterator.get_next() frames = input_get_next_op['images'] actions = input_get_next_op['actions'][:, :, :4] states = input_get_next_op['states'][:, :, :3] return frames, actions, states, steps, iterator
return exp_same_view if __name__ == "__main__": parser = argparse.ArgumentParser( description= "Collect data corresponding to specific robot, and organize them by viewpoints" ) parser.add_argument("directory", type=str, help="path to dataset folder") parser.add_argument("robot", type=str, help="robot") args = parser.parse_args() hparams = tf.contrib.training.HParams(**default_loader_hparams()) hparams.img_size = [240, 320] meta_data = load_metadata(args.directory) exp_same_view = collect_same_viewpoint(args.robot, args.directory) print(len(exp_same_view)) os.makedirs(args.robot, exist_ok=True) for vp in exp_same_view: target_folder = args.robot + "/" + vp os.makedirs(target_folder, exist_ok=True) visuals = min(NUM_VISUAL_PER_VIEW, len(exp_same_view[vp])) for i in range(visuals): f = exp_same_view[vp][i] exp_name = f.split("/")[-1][:-5] imgs, states, qposes, ws_min, ws_max, viewpoint = load_data_customized( f, meta_data.get_file_metadata(f), hparams) print("saving experiment:", exp_name)
from robonet.datasets.robonet_dataset import RoboNetDataset def load_hdf5(path): demo = {} hf = h5py.File(path, "r") demo["cam0_video"] = hf["env"]['cam0_video']['frames'] demo["cam1_video"] = hf["env"]['cam1_video']['frames'] demo["cam2_video"] = hf["env"]['cam2_video']['frames'] demo["cam3_video"] = hf["env"]['cam3_video']['frames'] demo["cam4_video"] = hf["env"]['cam4_video']['frames'] return demo if __name__ == "__main__": data_dir = 'hdf5/' all_robonet = load_metadata(data_dir) # path to folder you unzipped in step (1) sawyer_data = all_robonet[all_robonet['robot'] == 'sawyer'] # pythonic filtering supported sawyer_files = sawyer_data.get_shuffled_files() # gets shuffled list of sawyer files sawyer_data.get_file_metadata(sawyer_files[0]) f1 = h5py.File(sawyer_files[0], 'r') # f1.keys(): # <KeysViewHDF5 ['env', 'file_version', 'metadata', 'misc', 'policy']> # f1['env'].keys(): # <KeysViewHDF5 ['cam0_video', 'cam1_video', 'cam2_video', 'cam3_video', 'cam4_video', # 'finger_sensors', 'high_bound', 'low_bound', 'qpos', 'qvel', 'state']> # f1['env']['qpos']: # <HDF5 dataset "qpos": shape (31, 7), type "<f8"> demo = load_hdf5(sawyer_files[0])
parser.add_argument( "--load_steps", type=int, default=0, help="loads <load_steps> steps from the dataset instead of everything", ) args = parser.parse_args() hparams = tf.contrib.training.HParams(**default_loader_hparams()) hparams.load_T = args.load_steps hparams.img_size = [240, 320] assert "hdf5" in args.file exp_name = args.file.split("/")[-1][:-5] data_folder = "/".join(args.file.split("/")[:-1]) meta_data = load_metadata(data_folder) imgs, states, qposes, ws_min, ws_max, viewpoint = customized( args.file, meta_data.get_file_metadata(args.file), hparams) print("states", states.shape) print("images", imgs.shape) print("qposes", qposes.shape) print("saving experiment:", exp_name) np.save("images/states_" + exp_name, states) np.save("images/qposes_" + exp_name, qposes) writer = imageio.get_writer("images/" + exp_name + ".gif") for t in range(imgs.shape[0]): imageio.imwrite("images/" + exp_name + "_" + str(t) + ".png", imgs[t, 0])