def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder_in') parser.add_argument('--folder_out') parser.add_argument('--num_data', type=int, default=60000) parser.add_argument('--num_parallel', type=int, default=32) args = parser.parse_args() if not os.path.exists(args.folder_out): os.makedirs(args.folder_out) path_in = os.path.join(args.folder_in, 'multi_dsprites', 'multi_dsprites_colored_on_colored.tfrecords') dataset = multi_dsprites.dataset(path_in, 'colored_on_colored', map_parallel_calls=args.num_parallel) next_batch = get_next_batch(dataset, args.num_data) path_out = os.path.join(args.folder_out, 'dsprites') with tf.Session() as sess: data = sess.run(next_batch) create_dataset(data, path_out, args.num_parallel) return
def test_get_next_batch(): """测试使用多线程下和单线程时间消耗 """ for i in range(CAPTCHA_COUNT): start_time = time.time() batch_x, batch_y = get_next_batch() end_time = time.time() start_time2 = time.time() thread_batch_x, thread_batch_y = get_next_batch_thread() end_time2 = time.time() interval_time = end_time - start_time interval_time2 = end_time2 - start_time2 # 检查数据是否一致 assert all([all(r) for r in thread_batch_x == batch_x]) assert all([all(r) for r in thread_batch_y == batch_y]) # 检查时间是否更短 print_info("interval time: {}, time2: {}".format( interval_time, interval_time2))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder_in') parser.add_argument('--folder_out') parser.add_argument('--num_data', type=int, default=70000) parser.add_argument('--num_parallel', type=int, default=32) parser.add_argument('--split_objects', type=int, default=6) args = parser.parse_args() if not os.path.exists(args.folder_out): os.makedirs(args.folder_out) path_in = os.path.join(args.folder_in, 'clevr_with_masks', 'clevr_with_masks_train.tfrecords') dataset = clevr_with_masks.dataset(path_in, map_parallel_calls=args.num_parallel) next_batch = get_next_batch(dataset, args.num_data) path_out_all = { 'train': os.path.join(args.folder_out, 'clevr_train'), 'extra': os.path.join(args.folder_out, 'clevr_extra'), } with tf.Session() as sess: data_all = sess.run(next_batch) data_all['image'] = np.stack( [crop_and_resize(val) for val in data_all['image']]) data_all['mask'] = np.stack([ np.stack([crop_and_resize(sub_val) for sub_val in val]) for val in data_all['mask'] ]) num_objects = np.sum(data_all['visibility'][:, 1:], axis=1) sel_train = num_objects <= args.split_objects sel_extra = np.bitwise_not(sel_train) data_all = { 'train': {key: val[sel_train] for key, val in data_all.items()}, 'extra': {key: val[sel_extra] for key, val in data_all.items()}, } for phase, data in data_all.items(): path_out = path_out_all[phase] create_dataset(data, path_out, args.num_parallel) return
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder_in') parser.add_argument('--folder_out') parser.add_argument('--num_data', type=int, default=60000) parser.add_argument('--num_parallel', type=int, default=32) args = parser.parse_args() if not os.path.exists(args.folder_out): os.makedirs(args.folder_out) for split in ['train', 'empty_room', 'six_objects', 'identical_color']: if split == 'train': file_in = 'objects_room_{}.tfrecords'.format(split) else: file_in = 'objects_room_test_{}.tfrecords'.format(split) path_in = os.path.join(args.folder_in, 'objects_room', file_in) dataset = objects_room.dataset(path_in, split, map_parallel_calls=args.num_parallel) next_batch = get_next_batch(dataset, args.num_data) path_out = os.path.join(args.folder_out, 'room_{}'.format(split)) with tf.Session() as sess: data = sess.run(next_batch) create_dataset(data, path_out, args.num_parallel) return
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder_in') parser.add_argument('--folder_out') parser.add_argument('--num_data', type=int, default=60000) parser.add_argument('--num_parallel', type=int, default=32) args = parser.parse_args() if not os.path.exists(args.folder_out): os.makedirs(args.folder_out) path_in = os.path.join(args.folder_in, 'clevr_with_masks', 'clevr_with_masks_train.tfrecords') dataset = clevr_with_masks.dataset(path_in, map_parallel_calls=args.num_parallel) next_batch = get_next_batch(dataset, args.num_data) path_out = os.path.join(args.folder_out, 'clevr') with tf.Session() as sess: data = sess.run(next_batch) data['image'] = np.stack([crop_and_resize(val) for val in data['image']]) data['mask'] = np.stack([ np.stack([crop_and_resize(sub_val) for sub_val in val]) for val in data['mask'] ]) create_dataset(data, path_out, args.num_parallel) return