def export_ply_blocks(batched_data, data_num, filepath_pred, labels, setting): folder = os.path.join(os.path.dirname(filepath_pred), 'PLY') filename = os.path.splitext(os.path.basename(filepath_pred))[0] filepath_label_ply = os.path.join(folder, filename) data_utils.save_ply_property_batch(batched_data[:, :, 0:3], labels[...], filepath_label_ply, data_num[...], setting.num_class)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else '../../data/shapenet_partseg/' folders = [(root + 'train_data', root + 'train_label'), (root + 'val_data', root + 'val_label'), (root + 'test_data', root + 'test_label')] category_label_seg_max_dict = dict() max_point_num = 0 label_seg_min = sys.maxsize for data_folder, label_folder in folders: if not os.path.exists(data_folder): continue for category in sorted(os.listdir(data_folder)): if category not in category_label_seg_max_dict: category_label_seg_max_dict[category] = 0 data_category_folder = os.path.join(data_folder, category) category_label_seg_max = 0 for filename in sorted(os.listdir(data_category_folder)): data_filepath = os.path.join(data_category_folder, filename) coordinates = [xyz for xyz in open(data_filepath, 'r') if len(xyz.split(' ')) == 3] max_point_num = max(max_point_num, len(coordinates)) if label_folder is not None: label_filepath = os.path.join(label_folder, category, filename[0:-3] + 'seg') label_seg_this = np.loadtxt(label_filepath).astype(np.int32) assert (len(coordinates) == len(label_seg_this)) category_label_seg_max = max(category_label_seg_max, max(label_seg_this)) label_seg_min = min(label_seg_min, min(label_seg_this)) category_label_seg_max_dict[category] = max(category_label_seg_max_dict[category], category_label_seg_max) category_label_seg_max_list = [(key, category_label_seg_max_dict[key]) for key in sorted(category_label_seg_max_dict.keys())] category_label = dict() offset = 0 category_offset = dict() label_seg_max = max([category_label_seg_max for _, category_label_seg_max in category_label_seg_max_list]) with open(os.path.join(root, 'categories.txt'), 'w') as file_categories: for idx, (category, category_label_seg_max) in enumerate(category_label_seg_max_list): file_categories.write('%s %d\n' % (category, category_label_seg_max - label_seg_min + 1)) category_label[category] = idx category_offset[category] = offset offset = offset + category_label_seg_max - label_seg_min + 1 print('part_num:', offset) print('max_point_num:', max_point_num) print(category_label_seg_max_list) batch_size = 2048 data = np.zeros((batch_size, max_point_num, 3)) data_num = np.zeros((batch_size), dtype=np.int32) label = np.zeros((batch_size), dtype=np.int32) label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32) for data_folder, label_folder in folders: if not os.path.exists(data_folder): continue data_folder_ply = data_folder + '_ply' file_num = 0 for category in sorted(os.listdir(data_folder)): data_category_folder = os.path.join(data_folder, category) file_num = file_num + len(os.listdir(data_category_folder)) idx_h5 = 0 idx = 0 save_path = '%s/%s' % (os.path.dirname(data_folder), os.path.basename(data_folder)[0:-5]) filename_txt = '%s_files.txt' % (save_path) ply_filepath_list = [] with open(filename_txt, 'w') as filelist: for category in sorted(os.listdir(data_folder)): data_category_folder = os.path.join(data_folder, category) for filename in sorted(os.listdir(data_category_folder)): data_filepath = os.path.join(data_category_folder, filename) coordinates = [[float(value) for value in xyz.split(' ')] for xyz in open(data_filepath, 'r') if len(xyz.split(' ')) == 3] idx_in_batch = idx % batch_size data[idx_in_batch, 0:len(coordinates), ...] = np.array(coordinates) data_num[idx_in_batch] = len(coordinates) label[idx_in_batch] = category_label[category] if label_folder is not None: label_filepath = os.path.join(label_folder, category, filename[0:-3] + 'seg') label_seg_this = np.loadtxt(label_filepath).astype(np.int32) - label_seg_min assert (len(coordinates) == label_seg_this.shape[0]) label_seg[idx_in_batch, 0:len(coordinates)] = label_seg_this + category_offset[category] data_ply_filepath = os.path.join(data_folder_ply, category, filename[:-3] + 'ply') ply_filepath_list.append(data_ply_filepath) if ((idx + 1) % batch_size == 0) or idx == file_num - 1: item_num = idx_in_batch + 1 filename_h5 = '%s_%d.h5' % (save_path, idx_h5) print('{}-Saving {}...'.format(datetime.now(), filename_h5)) filelist.write('./%s_%d.h5\n' % (os.path.basename(data_folder)[0:-5], idx_h5)) file = h5py.File(filename_h5, 'w') file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) file.create_dataset('label', data=label[0:item_num, ...]) file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.close() if args.save_ply: data_utils.save_ply_property_batch(data[0:item_num, ...], label_seg[0:item_num, ...], ply_filepath_list, data_num[0:item_num, ...], label_seg_max - label_seg_min) ply_filepath_list = [] idx_h5 = idx_h5 + 1 idx = idx + 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--max_point_num', '-m', help='Max point number of each sample', type=int, default=8192) parser.add_argument('--block_size', '-b', help='Block size', type=float, default=1.5) parser.add_argument('--grid_size', '-g', help='Grid size', type=float, default=0.03) parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else os.path.join( DATA_DIR, "S3DIS", "prepare_label_rgb") max_point_num = args.max_point_num batch_size = 2048 data = np.zeros((batch_size, max_point_num, 6)) data_num = np.zeros((batch_size), dtype=np.int32) label = np.zeros((batch_size), dtype=np.int32) label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32) indices_split_to_full = np.zeros((batch_size, max_point_num), dtype=np.int32) for area_idx in range(1, 7): folder = os.path.join(root, 'Area_%d' % area_idx) datasets = [dataset for dataset in os.listdir(folder)] for dataset_idx, dataset in enumerate(datasets): filename_data = os.path.join(folder, dataset, 'xyzrgb.npy') print('{}-Loading {}...'.format(datetime.now(), filename_data)) xyzrgb = np.load(filename_data) filename_labels = os.path.join(folder, dataset, 'label.npy') print('{}-Loading {}...'.format(datetime.now(), filename_labels)) labels = np.load(filename_labels).astype(int).flatten() xyz, rgb = np.split(xyzrgb, [3], axis=-1) xyz_min = np.amin(xyz, axis=0, keepdims=True) xyz_max = np.amax(xyz, axis=0, keepdims=True) xyz_center = (xyz_min + xyz_max) / 2 xyz_center[0][-1] = xyz_min[0][-1] xyz = xyz - xyz_center # align to room bottom center rgb = rgb / 255 - 0.5 offsets = [('zero', 0.0), ('half', args.block_size / 2)] for offset_name, offset in offsets: idx_h5 = 0 idx = 0 print('{}-Computing block id of {} points...'.format( datetime.now(), xyzrgb.shape[0])) xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset xyz_max = np.amax(xyz, axis=0, keepdims=True) block_size = (args.block_size, args.block_size, 2 * (xyz_max[0, -1] - xyz_min[0, -1])) xyz_blocks = np.floor( (xyz - xyz_min) / block_size).astype(np.int) print('{}-Collecting points belong to each block...'.format( datetime.now(), xyzrgb.shape[0])) blocks, point_block_indices, block_point_counts = np.unique( xyz_blocks, return_inverse=True, return_counts=True, axis=0) block_point_indices = np.split( np.argsort(point_block_indices), np.cumsum(block_point_counts[:-1])) print('{}-{} is split into {} blocks.'.format( datetime.now(), dataset, blocks.shape[0])) block_to_block_idx_map = dict() for block_idx in range(blocks.shape[0]): block = (blocks[block_idx][0], blocks[block_idx][1]) block_to_block_idx_map[(block[0], block[1])] = block_idx # merge small blocks into one of their big neighbors block_point_count_threshold = max_point_num / 10 nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1), (1, 1), (1, -1), (-1, -1)] block_merge_count = 0 for block_idx in range(blocks.shape[0]): if block_point_counts[ block_idx] >= block_point_count_threshold: continue block = (blocks[block_idx][0], blocks[block_idx][1]) for x, y in nbr_block_offsets: nbr_block = (block[0] + x, block[1] + y) if nbr_block not in block_to_block_idx_map: continue nbr_block_idx = block_to_block_idx_map[nbr_block] if block_point_counts[ nbr_block_idx] < block_point_count_threshold: continue block_point_indices[nbr_block_idx] = np.concatenate( [ block_point_indices[nbr_block_idx], block_point_indices[block_idx] ], axis=-1) block_point_indices[block_idx] = np.array([], dtype=np.int) block_merge_count = block_merge_count + 1 break print('{}-{} of {} blocks are merged.'.format( datetime.now(), block_merge_count, blocks.shape[0])) idx_last_non_empty_block = 0 for block_idx in reversed(range(blocks.shape[0])): if block_point_indices[block_idx].shape[0] != 0: idx_last_non_empty_block = block_idx break # uniformly sample each block for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_points = xyz[point_indices] block_min = np.amin(block_points, axis=0, keepdims=True) xyz_grids = np.floor((block_points - block_min) / args.grid_size).astype(np.int) grids, point_grid_indices, grid_point_counts = np.unique( xyz_grids, return_inverse=True, return_counts=True, axis=0) grid_point_indices = np.split( np.argsort(point_grid_indices), np.cumsum(grid_point_counts[:-1])) grid_point_count_avg = int(np.average(grid_point_counts)) point_indices_repeated = [] for grid_idx in range(grids.shape[0]): point_indices_in_block = grid_point_indices[grid_idx] repeat_num = math.ceil(grid_point_count_avg / point_indices_in_block.shape[0]) if repeat_num > 1: point_indices_in_block = np.repeat( point_indices_in_block, repeat_num) np.random.shuffle(point_indices_in_block) point_indices_in_block = point_indices_in_block[: grid_point_count_avg] point_indices_repeated.extend( list(point_indices[point_indices_in_block])) block_point_indices[block_idx] = np.array( point_indices_repeated) block_point_counts[block_idx] = len(point_indices_repeated) for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_point_num = point_indices.shape[0] block_split_num = int( math.ceil(block_point_num * 1.0 / max_point_num)) point_num_avg = int( math.ceil(block_point_num * 1.0 / block_split_num)) point_nums = [point_num_avg] * block_split_num point_nums[-1] = block_point_num - (point_num_avg * (block_split_num - 1)) starts = [0] + list(np.cumsum(point_nums)) np.random.shuffle(point_indices) block_points = xyz[point_indices] block_rgb = rgb[point_indices] block_labels = labels[point_indices] x, y, z = np.split(block_points, (1, 2), axis=-1) block_xzyrgb = np.concatenate([x, z, y, block_rgb], axis=-1) for block_split_idx in range(block_split_num): start = starts[block_split_idx] point_num = point_nums[block_split_idx] end = start + point_num idx_in_batch = idx % batch_size data[idx_in_batch, 0:point_num, ...] = block_xzyrgb[start:end, :] data_num[idx_in_batch] = point_num label[idx_in_batch] = dataset_idx # won't be used... label_seg[idx_in_batch, 0:point_num] = block_labels[start:end] indices_split_to_full[ idx_in_batch, 0:point_num] = point_indices[start:end] if ((idx + 1) % batch_size == 0) or \ (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1): item_num = idx_in_batch + 1 filename_h5 = os.path.join( folder, dataset, '%s_%d.h5' % (offset_name, idx_h5)) print('{}-Saving {}...'.format( datetime.now(), filename_h5)) file = h5py.File(filename_h5, 'w') file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) file.create_dataset('label', data=label[0:item_num, ...]) file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.create_dataset( 'indices_split_to_full', data=indices_split_to_full[0:item_num, ...]) file.close() if args.save_ply: print('{}-Saving ply of {}...'.format( datetime.now(), filename_h5)) filepath_label_ply = os.path.join( folder, dataset, 'ply_label', 'label_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3], label_seg[0:item_num, ...], filepath_label_ply, data_num[0:item_num, ...], 14) filepath_rgb_ply = os.path.join( folder, dataset, 'ply_rgb', 'rgb_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_color_batch( data[0:item_num, :, 0:3], (data[0:item_num, :, 3:] + 0.5) * 255, filepath_rgb_ply, data_num[0:item_num, ...]) idx_h5 = idx_h5 + 1 idx = idx + 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--max_point_num', '-m', help='Max point number of each sample', type=int, default=8192) parser.add_argument('--block_size', '-b', help='Block size', type=float, default=1.5) parser.add_argument('--grid_size', '-g', help='Grid size', type=float, default=0.03) parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else '../../data/scannet/seg' max_point_num = args.max_point_num batch_size = 2048 data = np.zeros((batch_size, max_point_num, 3)) data_num = np.zeros((batch_size), dtype=np.int32) label = np.zeros((batch_size), dtype=np.int32) label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32) indices_split_to_full = np.zeros((batch_size, max_point_num, 2), dtype=np.int32) filenames = list(Path(root).glob('*seg*.hdf5')) t = tqdm(filenames) for filename in t: t.set_description(filename.stem) # print('{}-Loading {}...'.format(datetime.now(), filename)) with h5py.File(filename, 'r') as h5f: points = np.array(h5f['data']) xyz = points[:, :, [9, 11, 10]].reshape(-1, 3) labels = np.array(h5f['label']).ravel() offsets = [('zero', 0.0), ('half', args.block_size / 2)] for offset_name, offset in offsets: idx_h5 = 0 idx = 0 # align to room bottom center xyz_min = np.amin(xyz, axis=0, keepdims=True) xyz_max = np.amax(xyz, axis=0, keepdims=True) xyz_center = (xyz_min + xyz_max) / 2 xyz_center[0][-1] = xyz_min[0][-1] xyz = xyz - xyz_center # print('{}-Computing block id of {} points...'.format(datetime.now(), xyz.shape[0])) xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset xyz_max = np.amax(xyz, axis=0, keepdims=True) block_size = (args.block_size, args.block_size, 2 * (xyz_max[0, -1] - xyz_min[0, -1])) xyz_blocks = np.floor((xyz - xyz_min) / block_size).astype(int) # print('{}-Collecting points belong to each block...'.format(datetime.now(), xyz.shape[0])) blocks, point_block_indices, block_point_counts = np.unique( xyz_blocks, return_inverse=True, return_counts=True, axis=0) block_point_indices = np.split(np.argsort(point_block_indices), np.cumsum(block_point_counts[:-1])) # print('{}-{} is split into {} blocks.'.format(datetime.now(), os.path.basename(filename), blocks.shape[0])) block_to_block_idx_map = dict() for block_idx in range(blocks.shape[0]): block = (blocks[block_idx][0], blocks[block_idx][1]) block_to_block_idx_map[(block[0], block[1])] = block_idx # merge small blocks into one of their big neighbors block_point_count_threshold = max_point_num / 10 nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1), (1, 1), (1, -1), (-1, -1)] block_merge_count = 0 for block_idx in range(blocks.shape[0]): if block_point_counts[block_idx] >= block_point_count_threshold: continue block = (blocks[block_idx][0], blocks[block_idx][1]) for x, y in nbr_block_offsets: nbr_block = (block[0] + x, block[1] + y) if nbr_block not in block_to_block_idx_map: continue nbr_block_idx = block_to_block_idx_map[nbr_block] if block_point_counts[ nbr_block_idx] < block_point_count_threshold: continue block_point_indices[nbr_block_idx] = np.concatenate( [ block_point_indices[nbr_block_idx], block_point_indices[block_idx] ], axis=-1) block_point_indices[block_idx] = np.array([], dtype=np.int) block_merge_count = block_merge_count + 1 break # print('{}-{} of {} blocks got merged.'.format(datetime.now(), block_merge_count, blocks.shape[0])) idx_last_non_empty_block = 0 for block_idx in reversed(range(blocks.shape[0])): if block_point_indices[block_idx].shape[0] != 0: idx_last_non_empty_block = block_idx break # uniformly sample each block for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_points = xyz[point_indices] block_min = np.amin(block_points, axis=0, keepdims=True) xyz_grids = np.floor( (block_points - block_min) / args.grid_size).astype(np.int) grids, point_grid_indices, grid_point_counts = np.unique( xyz_grids, return_inverse=True, return_counts=True, axis=0) grid_point_indices = np.split( np.argsort(point_grid_indices), np.cumsum(grid_point_counts[:-1])) grid_point_count_avg = int(np.average(grid_point_counts)) point_indices_repeated = [] for grid_idx in range(grids.shape[0]): point_indices_in_block = grid_point_indices[grid_idx] repeat_num = math.ceil(grid_point_count_avg / point_indices_in_block.shape[0]) if repeat_num > 1: point_indices_in_block = np.repeat( point_indices_in_block, repeat_num) np.random.shuffle(point_indices_in_block) point_indices_in_block = point_indices_in_block[: grid_point_count_avg] point_indices_repeated.extend( list(point_indices[point_indices_in_block])) block_point_indices[block_idx] = np.array( point_indices_repeated) block_point_counts[block_idx] = len(point_indices_repeated) for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_point_num = point_indices.shape[0] block_split_num = int( math.ceil(block_point_num / max_point_num)) point_num_avg = math.ceil(block_point_num / block_split_num) point_nums = [point_num_avg] * block_split_num point_nums[-1] = block_point_num - (point_num_avg * (block_split_num - 1)) # starts = [0] + list(np.cumsum(point_nums)) np.random.shuffle(point_indices) block_points = xyz[point_indices] block_labels = labels[point_indices] x, y, z = np.split(block_points, (1, 2), axis=-1) block_xzy = np.concatenate([x, z, y], axis=-1) # for block_split_idx in range(block_split_num): block_split_idx = 0 start = 0 point_num = point_nums[block_split_idx] end = start + point_num idx_in_batch = idx % batch_size data[idx_in_batch, 0:point_num, ...] = block_xzy[start:end, :] data_num[idx_in_batch] = point_num label[idx_in_batch] = 0 # won't be used... label_seg[idx_in_batch, 0:point_num] = block_labels[start:end] ind_in_room = point_indices[start:end] indices_split_to_full[idx_in_batch, 0:point_num] = np.stack( [np.zeros_like(ind_in_room), ind_in_room], -1) if ((idx + 1) % batch_size == 0) \ or (block_idx == idx_last_non_empty_block): # and block_split_idx == block_split_num - 1): item_num = idx_in_batch + 1 filename_h5 = os.path.join( root, '..', 'subsampled_8192', '%s_%s_%d.h5' % (filename.stem, offset_name, idx_h5)) os.makedirs(os.path.dirname(filename_h5), exist_ok=True) # print('{}-Saving {}...'.format(datetime.now(), filename_h5)) file = h5py.File(filename_h5, 'w') file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) file.create_dataset('label', data=label[0:item_num, ...]) file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.create_dataset('indices_split_to_full', data=indices_split_to_full[0:item_num, ...]) file.close() if args.save_ply: tqdm.write('{}-Saving ply of {}...'.format( datetime.now(), filename_h5)) filepath_label_ply = os.path.join( root, '..', 'subsampled_8192', 'ply_label', 'label_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3], label_seg[0:item_num, ...], filepath_label_ply, data_num[0:item_num, ...], 22) idx_h5 = idx_h5 + 1 idx = idx + 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--max_point_num', '-m', help='Max point number of each sample', type=int, default=8192) parser.add_argument('--block_size', '-b', help='Block size', type=float, default=1.5) parser.add_argument('--grid_size', '-g', help='Grid size', type=float, default=0.03) parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else '../../data/scannet/seg' max_point_num = args.max_point_num batch_size = 2048 data = np.zeros((batch_size, max_point_num, 6)) data_num = np.zeros((batch_size), dtype=np.int32) label = np.zeros((batch_size), dtype=np.int32) label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32) indices_split_to_full = np.zeros((batch_size, max_point_num, 2), dtype=np.int32) datasets = ['train', 'val'] for dataset_idx, dataset in enumerate(datasets): filename = os.path.abspath( os.path.join(root, 'scannet_%s.pickle' % dataset)) print('{}-Loading {}...'.format(datetime.now(), filename)) file_pickle = open(filename, 'rb') xyz_all = pickle.load(file_pickle, encoding='latin1') labels_all = pickle.load(file_pickle, encoding='latin1') file_pickle.close() offsets = [('zero', 0.0), ('half', args.block_size / 2)] for offset_name, offset in offsets: idx_h5 = 0 idx = 0 for room_idx, pts in enumerate(xyz_all): # align to room bottom center xyz = pts[:, 0:3] feature = pts[:, 3:] xyz_min = np.amin(xyz, axis=0, keepdims=True) xyz_max = np.amax(xyz, axis=0, keepdims=True) xyz_center = (xyz_min + xyz_max) / 2 xyz_center[0][-1] = xyz_min[0][-1] xyz = xyz - xyz_center labels = labels_all[room_idx] print('{}-Computing block id of {} points...'.format( datetime.now(), xyz.shape[0])) xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset xyz_max = np.amax(xyz, axis=0, keepdims=True) block_size = (args.block_size, args.block_size, 2 * (xyz_max[0, -1] - xyz_min[0, -1])) xyz_blocks = np.floor( (xyz - xyz_min) / block_size).astype(np.int) print('{}-Collecting points belong to each block...'.format( datetime.now(), xyz.shape[0])) blocks, point_block_indices, block_point_counts = np.unique( xyz_blocks, return_inverse=True, return_counts=True, axis=0) block_point_indices = np.split( np.argsort(point_block_indices), np.cumsum(block_point_counts[:-1])) # 返回值为 block 点索引值的集合 print('{}-{} is split into {} blocks.'.format( datetime.now(), dataset, blocks.shape[0])) block_to_block_idx_map = dict() for block_idx in range(blocks.shape[0]): block = (blocks[block_idx][0], blocks[block_idx][1]) block_to_block_idx_map[(block[0], block[1])] = block_idx # merge small blocks into one of their big neighbors block_point_count_threshold = max_point_num / 10 nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1), (1, 1), (1, -1), (-1, -1)] block_merge_count = 0 for block_idx in range(blocks.shape[0]): if block_point_counts[ block_idx] >= block_point_count_threshold: continue block = (blocks[block_idx][0], blocks[block_idx][1]) for x, y in nbr_block_offsets: nbr_block = (block[0] + x, block[1] + y) if nbr_block not in block_to_block_idx_map: continue nbr_block_idx = block_to_block_idx_map[nbr_block] if block_point_counts[ nbr_block_idx] < block_point_count_threshold: continue block_point_indices[nbr_block_idx] = np.concatenate( [ block_point_indices[nbr_block_idx], block_point_indices[block_idx] ], axis=-1) block_point_indices[block_idx] = np.array([], dtype=np.int) block_merge_count = block_merge_count + 1 break print('{}-{} of {} blocks are merged.'.format( datetime.now(), block_merge_count, blocks.shape[0])) idx_last_non_empty_block = 0 for block_idx in reversed(range(blocks.shape[0])): if block_point_indices[block_idx].shape[0] != 0: idx_last_non_empty_block = block_idx break # uniformly sample each block # 将 block 细分为 grid,得到 grid 的平均值 # 不足平均值的通过repeat之后 shuffle 得到平均值个点 # 超过平均值的全部放入 for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_points = xyz[point_indices] block_points_feature = feature[point_indices] block_min = np.amin(block_points, axis=0, keepdims=True) xyz_grids = np.floor((block_points - block_min) / args.grid_size).astype(np.int) grids, point_grid_indices, grid_point_counts = np.unique( xyz_grids, return_inverse=True, return_counts=True, axis=0) grid_point_indices = np.split( np.argsort(point_grid_indices), np.cumsum(grid_point_counts[:-1])) grid_point_count_avg = int(np.average(grid_point_counts)) point_indices_repeated = [] for grid_idx in range(grids.shape[0]): point_indices_in_block = grid_point_indices[grid_idx] repeat_num = math.ceil(grid_point_count_avg / point_indices_in_block.shape[0]) if repeat_num > 1: point_indices_in_block = np.repeat( point_indices_in_block, repeat_num) np.random.shuffle(point_indices_in_block) point_indices_in_block = point_indices_in_block[: grid_point_count_avg] point_indices_repeated.extend( list(point_indices[point_indices_in_block])) block_point_indices[block_idx] = np.array( point_indices_repeated) block_point_counts[block_idx] = len(point_indices_repeated) for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue # 将 block 分为若干个 max_point_num 组成的集合 block_point_num = point_indices.shape[0] block_split_num = int( math.ceil(block_point_num * 1.0 / max_point_num)) point_num_avg = int( math.ceil(block_point_num * 1.0 / block_split_num)) point_nums = [point_num_avg] * block_split_num point_nums[-1] = block_point_num - (point_num_avg * (block_split_num - 1)) starts = [0] + list(np.cumsum(point_nums)) np.random.shuffle(point_indices) block_points = xyz[point_indices] block_points_feature = feature[point_indices] block_labels = labels[point_indices] x, y, z = np.split(block_points, (1, 2), axis=-1) r, g, b = np.split(block_points_feature, (1, 2), axis=-1) block_xzy_feature = np.concatenate([x, z, y, r, g, b], axis=-1) for block_split_idx in range(block_split_num): start = starts[block_split_idx] point_num = point_nums[block_split_idx] end = start + point_num idx_in_batch = idx % batch_size data[idx_in_batch, 0:point_num, ...] = block_xzy_feature[start:end, :] data_num[idx_in_batch] = point_num label[idx_in_batch] = dataset_idx # won't be used... label_seg[idx_in_batch, 0:point_num] = block_labels[start:end] ind_in_room = point_indices[start:end] indices_split_to_full[idx_in_batch, 0:point_num] = np.stack([ np.zeros_like(ind_in_room) + room_idx, ind_in_room ], -1) if ((idx + 1) % batch_size == 0) \ or (room_idx == len(xyz_all) - 1 and block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1): item_num = idx_in_batch + 1 filename_h5 = os.path.join( root, dataset, '%s_%d.h5' % (offset_name, idx_h5)) os.makedirs(os.path.dirname(filename_h5), exist_ok=True) print('{}-Saving {}...'.format( datetime.now(), filename_h5)) file = h5py.File(filename_h5, 'w') file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) file.create_dataset('label', data=label[0:item_num, ...]) file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.create_dataset( 'indices_split_to_full', data=indices_split_to_full[0:item_num, ...]) file.close() if args.save_ply: print('{}-Saving ply of {}...'.format( datetime.now(), filename_h5)) filepath_label_ply = os.path.join( root, dataset, 'ply_label', 'label_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3], label_seg[0:item_num, ...], filepath_label_ply, data_num[0:item_num, ...], 22) idx_h5 = idx_h5 + 1 idx = idx + 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else '../test/hinterstoisser_rgbseg' folders = [(os.path.join(root, 'train_data'), os.path.join(root, 'train_label')), (os.path.join(root, 'val_data'), os.path.join(root, 'val_label')), (os.path.join(root, 'test_data'), os.path.join(root, 'test_label'))] category_label_seg_max_dict = dict() max_point_num = 0 label_seg_min = sys.maxsize for data_folder, label_folder in folders: if not os.path.exists(data_folder): continue for category in sorted(os.listdir(data_folder)): if category not in category_label_seg_max_dict: category_label_seg_max_dict[category] = 0 data_category_folder = os.path.join(data_folder, category) category_label_seg_max = 0 for filename in sorted(os.listdir(data_category_folder)): data_filepath = os.path.join(data_category_folder, filename) coordinates = [ xyz for xyz in open(data_filepath, 'r') if len(xyz.split(' ')) == 6 ] # TODO: 修改通道 max_point_num = max(max_point_num, len(coordinates)) if label_folder is not None: label_filepath = os.path.join(label_folder, category, filename[0:-3] + 'seg') print("{}", label_filepath) label_seg_this = np.loadtxt(label_filepath).astype( np.int32) assert (len(coordinates) == len(label_seg_this)) category_label_seg_max = max(category_label_seg_max, max(label_seg_this)) label_seg_min = min(label_seg_min, min(label_seg_this)) category_label_seg_max_dict[category] = max( category_label_seg_max_dict[category], category_label_seg_max) category_label_seg_max_list = [ (key, category_label_seg_max_dict[key]) for key in sorted(category_label_seg_max_dict.keys()) ] category_label = dict() offset = 0 category_offset = dict() label_seg_max = max([ category_label_seg_max for _, category_label_seg_max in category_label_seg_max_list ]) with open(os.path.join(root, 'categories.txt'), 'w') as file_categories: for idx, (category, category_label_seg_max ) in enumerate(category_label_seg_max_list): file_categories.write( '%s %d\n' % (category, category_label_seg_max - label_seg_min + 1)) category_label[category] = idx category_offset[category] = offset offset = offset + category_label_seg_max - label_seg_min + 1 print('part_num:', offset) print('max_point_num:', max_point_num) print(category_label_seg_max_list) batch_size = 2048 # TODO:修改通道,3通道分配8g左右,6通道分配15g左右 data = np.zeros((batch_size, max_point_num, 6)) # 初始化大小为batch_size*点云大小307200*3的数据(一次抓取2048张图像?) data_num = np.zeros((batch_size), dtype=np.int32) # 初始化batch_size大小的数据 label = np.zeros((batch_size), dtype=np.int32) # 初始化batch_size大小的标签 label_seg = np.zeros( (batch_size, max_point_num), dtype=np.int32) # 初始化batch_size*电云大小的标签(即2048张图像中每个像素的标签) for data_folder, label_folder in folders: # data和label文件夹 if not os.path.exists(data_folder): continue data_folder_ply = data_folder + '_ply' file_num = 0 # data文件总数 for category in sorted( os.listdir(data_folder)): # data文件夹下的"01","02"等分类文件夹 data_category_folder = os.path.join(data_folder, category) file_num = file_num + len(os.listdir(data_category_folder)) idx_h5 = 0 idx = 0 save_path = '%s/%s' % (os.path.dirname(data_folder), os.path.basename(data_folder)[0:-5]) filename_txt = '%s_files.txt' % (save_path ) # 保存相关数据的路径信息的文件,如:train_files.txt ply_filepath_list = [] with open(filename_txt, 'w') as filelist: # 打开h5路径信息文件,如打开:train_files.txt for category in sorted( os.listdir(data_folder)): # data文件夹下的“01”,“02”等分类文件夹 data_category_folder = os.path.join(data_folder, category) for filename in sorted( os.listdir(data_category_folder)): # data下分类文件夹内每一个文件名 data_filepath = os.path.join(data_category_folder, filename) coordinates = [[float(value) for value in xyz.split(' ')] for xyz in open(data_filepath, 'r') if len(xyz.split(' ')) == 6 ] # TODO:读出文件中数据, # TODO:每三个值组成元组的一个元素,并指定元素的类型为float idx_in_batch = idx % batch_size data[idx_in_batch, 0:len(coordinates), ...] = np.array(coordinates) data_num[idx_in_batch] = len( coordinates) # 计算一个batch中相应点云的大小 label[idx_in_batch] = category_label[ category] # 计算一个batch中相应点云的标签 if label_folder is not None: label_filepath = os.path.join(label_folder, category, filename[0:-3] + 'seg') # label文件 label_seg_this = np.loadtxt(label_filepath).astype( np.int32) - label_seg_min # label文件中所有标签 assert (len(coordinates) == label_seg_this.shape[0]) label_seg[idx_in_batch, 0:len(coordinates )] = label_seg_this + category_offset[ category] # 计算一个batch中相应点云所有点的标签 data_ply_filepath = os.path.join(data_folder_ply, category, filename[:-3] + 'ply') ply_filepath_list.append(data_ply_filepath) if ((idx + 1) % batch_size == 0) or idx == file_num - 1: # 一个batch填满或者到达文件末尾 item_num = idx_in_batch + 1 filename_h5 = '%s_%d.h5' % (save_path, idx_h5 ) # 转换后格式存储路径,如:train_0.h5 print('{}-Saving {}...'.format(datetime.now(), filename_h5)) filelist.write( './%s_%d.h5\n' % (os.path.basename(data_folder)[0:-5], idx_h5)) # h5文件路径信息保存,如存到train_files.txt中 file = h5py.File(filename_h5, 'w') # 打开h5文件,开始写入 file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) file.create_dataset('label', data=label[0:item_num, ...]) file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.close() if args.save_ply: data_utils.save_ply_property_batch( data[0:item_num, ...], label_seg[0:item_num, ...], ply_filepath_list, data_num[0:item_num, ...], label_seg_max - label_seg_min) ply_filepath_list = [] idx_h5 = idx_h5 + 1 idx = idx + 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--max_point_num', '-m', help='Max point number of each sample', type=int, default=8192) parser.add_argument('--block_size', '-b', help='Block size', type=float, default=5.0) parser.add_argument('--grid_size', '-g', help='Grid size', type=float, default=0.1) parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else '../../data/semantic3d' max_point_num = args.max_point_num batch_size = 2048 data = np.zeros((batch_size, max_point_num, 7)) data_num = np.zeros((batch_size), dtype=np.int32) label = np.zeros((batch_size), dtype=np.int32) label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32) indices_split_to_full = np.zeros((batch_size, max_point_num), dtype=np.int32) if args.save_ply: data_center = np.zeros((batch_size, max_point_num, 3)) folders = [os.path.join(root, folder) for folder in ['train', 'val', 'test']] for folder in folders: datasets = [filename[:-4] for filename in os.listdir(folder) if filename.endswith('.txt')] for dataset_idx, dataset in enumerate(datasets): filename_txt = os.path.join(folder, dataset + '.txt') print('{}-Loading {}...'.format(datetime.now(), filename_txt)) xyzirgb = np.loadtxt(filename_txt) filename_labels = os.path.join(folder, dataset + '.labels') has_labels = os.path.exists(filename_labels) if has_labels: print('{}-Loading {}...'.format(datetime.now(), filename_labels)) labels = np.loadtxt(filename_labels, dtype=np.int) indices = (labels != 0) labels = labels[indices] - 1 # since labels == 0 have been removed xyzirgb = xyzirgb[indices, :] else: labels = np.zeros((xyzirgb.shape[0])) xyz, i, rgb = np.split(xyzirgb, (3, 4), axis=-1) i = i / 2000 + 0.5 rgb = rgb / 255 - 0.5 offsets = [('zero', 0.0), ('half', args.block_size / 2)] for offset_name, offset in offsets: idx_h5 = 0 idx = 0 print('{}-Computing block id of {} points...'.format(datetime.now(), xyzirgb.shape[0])) xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset xyz_max = np.amax(xyz, axis=0, keepdims=True) block_size = (args.block_size, args.block_size, 2 * (xyz_max[0, -1] - xyz_min[0, -1])) xyz_blocks = np.floor((xyz - xyz_min) / block_size).astype(np.int) print('{}-Collecting points belong to each block...'.format(datetime.now(), xyzirgb.shape[0])) blocks, point_block_indices, block_point_counts = np.unique(xyz_blocks, return_inverse=True, return_counts=True, axis=0) block_point_indices = np.split(np.argsort(point_block_indices), np.cumsum(block_point_counts[:-1])) print('{}-{} is split into {} blocks.'.format(datetime.now(), dataset, blocks.shape[0])) block_to_block_idx_map = dict() for block_idx in range(blocks.shape[0]): block = (blocks[block_idx][0], blocks[block_idx][1]) block_to_block_idx_map[(block[0], block[1])] = block_idx # merge small blocks into one of their big neighbors block_point_count_threshold = max_point_num / 10 nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1), (1, 1), (1, -1), (-1, -1)] block_merge_count = 0 for block_idx in range(blocks.shape[0]): if block_point_counts[block_idx] >= block_point_count_threshold: continue block = (blocks[block_idx][0], blocks[block_idx][1]) for x, y in nbr_block_offsets: nbr_block = (block[0] + x, block[1] + y) if nbr_block not in block_to_block_idx_map: continue nbr_block_idx = block_to_block_idx_map[nbr_block] if block_point_counts[nbr_block_idx] < block_point_count_threshold: continue block_point_indices[nbr_block_idx] = np.concatenate( [block_point_indices[nbr_block_idx], block_point_indices[block_idx]], axis=-1) block_point_indices[block_idx] = np.array([], dtype=np.int) block_merge_count = block_merge_count + 1 break print('{}-{} of {} blocks are merged.'.format(datetime.now(), block_merge_count, blocks.shape[0])) idx_last_non_empty_block = 0 for block_idx in reversed(range(blocks.shape[0])): if block_point_indices[block_idx].shape[0] != 0: idx_last_non_empty_block = block_idx break # uniformly sample each block for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_points = xyz[point_indices] block_min = np.amin(block_points, axis=0, keepdims=True) xyz_grids = np.floor((block_points - block_min) / args.grid_size).astype(np.int) grids, point_grid_indices, grid_point_counts = np.unique(xyz_grids, return_inverse=True, return_counts=True, axis=0) grid_point_indices = np.split(np.argsort(point_grid_indices), np.cumsum(grid_point_counts[:-1])) grid_point_count_avg = int(np.average(grid_point_counts)) point_indices_repeated = [] for grid_idx in range(grids.shape[0]): point_indices_in_block = grid_point_indices[grid_idx] repeat_num = math.ceil(grid_point_count_avg / point_indices_in_block.shape[0]) if repeat_num > 1: point_indices_in_block = np.repeat(point_indices_in_block, repeat_num) np.random.shuffle(point_indices_in_block) point_indices_in_block = point_indices_in_block[:grid_point_count_avg] point_indices_repeated.extend(list(point_indices[point_indices_in_block])) block_point_indices[block_idx] = np.array(point_indices_repeated) block_point_counts[block_idx] = len(point_indices_repeated) for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_point_num = point_indices.shape[0] block_split_num = int(math.ceil(block_point_num * 1.0 / max_point_num)) point_num_avg = int(math.ceil(block_point_num * 1.0 / block_split_num)) point_nums = [point_num_avg] * block_split_num point_nums[-1] = block_point_num - (point_num_avg * (block_split_num - 1)) starts = [0] + list(np.cumsum(point_nums)) np.random.shuffle(point_indices) block_points = xyz[point_indices] block_min = np.amin(block_points, axis=0, keepdims=True) block_max = np.amax(block_points, axis=0, keepdims=True) block_center = (block_min + block_max) / 2 block_center[0][-1] = block_min[0][-1] block_points = block_points - block_center # align to block bottom center x, y, z = np.split(block_points, (1, 2), axis=-1) block_xzyrgbi = np.concatenate([x, z, y, rgb[point_indices], i[point_indices]], axis=-1) block_labels = labels[point_indices] for block_split_idx in range(block_split_num): start = starts[block_split_idx] point_num = point_nums[block_split_idx] end = start + point_num idx_in_batch = idx % batch_size data[idx_in_batch, 0:point_num, ...] = block_xzyrgbi[start:end, :] data_num[idx_in_batch] = point_num label[idx_in_batch] = dataset_idx # won't be used... label_seg[idx_in_batch, 0:point_num] = block_labels[start:end] indices_split_to_full[idx_in_batch, 0:point_num] = point_indices[start:end] if args.save_ply: block_center_xzy = np.array([[block_center[0][0], block_center[0][2], block_center[0][1]]]) data_center[idx_in_batch, 0:point_num, ...] = block_center_xzy if ((idx + 1) % batch_size == 0) or \ (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1): item_num = idx_in_batch + 1 filename_h5 = os.path.join(folder, dataset + '_%s_%d.h5' % (offset_name, idx_h5)) print('{}-Saving {}...'.format(datetime.now(), filename_h5)) file = h5py.File(filename_h5, 'w') file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) file.create_dataset('label', data=label[0:item_num, ...]) file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.create_dataset('indices_split_to_full', data=indices_split_to_full[0:item_num, ...]) file.close() if args.save_ply: print('{}-Saving ply of {}...'.format(datetime.now(), filename_h5)) filepath_label_ply = os.path.join(folder, 'ply_label', dataset + '_label_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3] + data_center[0:item_num, ...], label_seg[0:item_num, ...], filepath_label_ply, data_num[0:item_num, ...], 8) filepath_i_ply = os.path.join(folder, 'ply_intensity', dataset + '_i_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3] + data_center[0:item_num, ...], data[0:item_num, :, 6], filepath_i_ply, data_num[0:item_num, ...], 1.0) filepath_rgb_ply = os.path.join(folder, 'ply_rgb', dataset + '_rgb_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_color_batch(data[0:item_num, :, 0:3] + data_center[0:item_num, ...], (data[0:item_num, :, 3:6] + 0.5) * 255, filepath_rgb_ply, data_num[0:item_num, ...]) filepath_label_aligned_ply = os.path.join(folder, 'ply_label_aligned', dataset + '_label_%s_%d' % ( offset_name, idx_h5)) data_utils.save_ply_property_batch(data[0:item_num, :, 0:3], label_seg[0:item_num, ...], filepath_label_aligned_ply, data_num[0:item_num, ...], 8) filepath_i_aligned_ply = os.path.join(folder, 'ply_intensity_aligned', dataset + '_i_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch(data[0:item_num, :, 0:3], data[0:item_num, :, 6], filepath_i_aligned_ply, data_num[0:item_num, ...], 1.0) filepath_rgb_aligned_ply = os.path.join(folder, 'ply_rgb_aligned', dataset + '_rgb_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_color_batch(data[0:item_num, :, 0:3], (data[0:item_num, :, 3:6] + 0.5) * 255, filepath_rgb_aligned_ply, data_num[0:item_num, ...]) idx_h5 = idx_h5 + 1 idx = idx + 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--max_point_num', '-m', help='Max point number of each sample', type=int, default=8192) parser.add_argument('--block_size', '-b', help='Block size', type=float, default=1.5) parser.add_argument('--grid_size', '-g', help='Grid size', type=float, default=0.03) parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else '/home/elvin/PointCNN/data/mydata' max_point_num = args.max_point_num batch_size = 2048 # 存xyz data = np.zeros((batch_size, max_point_num, 3)) # 存点云中点的数量 data_num = np.zeros((batch_size), dtype=np.int32) #label = np.zeros((batch_size), dtype=np.int32) #label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32) indices_split_to_full = np.zeros((batch_size, max_point_num), dtype=np.int32) # 每个dataset对应一个点云 datasets = os.listdir(root) for dataset_idx, dataset in enumerate(datasets): #dataset_marker = os.path.join(folder, dataset, ".dataset") #if os.path.exists(dataset_marker): # print('{}-{}/{} already processed, skipping'.format(datetime.now(), folder, dataset)) # continue if dataset.endswith('.npy'): # continue filename_data = os.path.join(root, dataset) #, 'xyzrgb.npy') print('{}-Loading {}...'.format(datetime.now(), filename_data)) xyz = np.load(filename_data) dataset = dataset.split('.')[0] xyz_min = np.amin(xyz, axis=0, keepdims=True) xyz_max = np.amax(xyz, axis=0, keepdims=True) xyz_center = (xyz_min + xyz_max) / 2 xyz_center[0][-1] = xyz_min[0][-1] xyz = xyz - xyz_center # align to room bottom center #rgb = rgb / 255 - 0.5 offsets = [('zero', 0.0), ('half', args.block_size / 2)] for offset_name, offset in offsets: idx_h5 = 0 idx = 0 print('{}-Computing block id of {} points...'.format( datetime.now(), xyz.shape[0])) xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset xyz_max = np.amax(xyz, axis=0, keepdims=True) block_size = (args.block_size, args.block_size, 2 * (xyz_max[0, -1] - xyz_min[0, -1])) xyz_blocks = np.floor( (xyz - xyz_min) / block_size).astype(np.int) print('{}-Collecting points belong to each block...'.format( datetime.now(), xyz.shape[0])) # point_block_indices是旧列表的元素在新列表中的位置 blocks, point_block_indices, block_point_counts = np.unique( xyz_blocks, return_inverse=True, return_counts=True, axis=0) # 某个房间数据被划分成多少个块 block_point_indices = np.split( np.argsort(point_block_indices), np.cumsum(block_point_counts[:-1])) print('{}-{}.npy is split into {} blocks.'.format( datetime.now(), dataset, blocks.shape[0])) block_to_block_idx_map = dict() for block_idx in range(blocks.shape[0]): block = (blocks[block_idx][0], blocks[block_idx][1]) block_to_block_idx_map[(block[0], block[1])] = block_idx # merge small blocks into one of their big neighbors block_point_count_threshold = max_point_num / 10 nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1), (1, 1), (1, -1), (-1, -1)] block_merge_count = 0 for block_idx in range(blocks.shape[0]): if block_point_counts[ block_idx] >= block_point_count_threshold: continue block = (blocks[block_idx][0], blocks[block_idx][1]) for x, y in nbr_block_offsets: nbr_block = (block[0] + x, block[1] + y) if nbr_block not in block_to_block_idx_map: continue nbr_block_idx = block_to_block_idx_map[nbr_block] if block_point_counts[ nbr_block_idx] < block_point_count_threshold: continue block_point_indices[nbr_block_idx] = np.concatenate( [ block_point_indices[nbr_block_idx], block_point_indices[block_idx] ], axis=-1) block_point_indices[block_idx] = np.array([], dtype=np.int) block_merge_count = block_merge_count + 1 break print('{}-{} of {} blocks are merged.'.format( datetime.now(), block_merge_count, blocks.shape[0])) idx_last_non_empty_block = 0 for block_idx in reversed(range(blocks.shape[0])): if block_point_indices[block_idx].shape[0] != 0: idx_last_non_empty_block = block_idx break # uniformly sample each block for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_points = xyz[point_indices] block_min = np.amin(block_points, axis=0, keepdims=True) xyz_grids = np.floor((block_points - block_min) / args.grid_size).astype(np.int) grids, point_grid_indices, grid_point_counts = np.unique( xyz_grids, return_inverse=True, return_counts=True, axis=0) grid_point_indices = np.split( np.argsort(point_grid_indices), np.cumsum(grid_point_counts[:-1])) grid_point_count_avg = int(np.average(grid_point_counts)) point_indices_repeated = [] for grid_idx in range(grids.shape[0]): point_indices_in_block = grid_point_indices[grid_idx] repeat_num = math.ceil(grid_point_count_avg / point_indices_in_block.shape[0]) if repeat_num > 1: point_indices_in_block = np.repeat( point_indices_in_block, repeat_num) np.random.shuffle(point_indices_in_block) point_indices_in_block = point_indices_in_block[: grid_point_count_avg] point_indices_repeated.extend( list(point_indices[point_indices_in_block])) block_point_indices[block_idx] = np.array( point_indices_repeated) block_point_counts[block_idx] = len(point_indices_repeated) for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_point_num = point_indices.shape[0] block_split_num = int( math.ceil(block_point_num * 1.0 / max_point_num)) point_num_avg = int( math.ceil(block_point_num * 1.0 / block_split_num)) point_nums = [point_num_avg] * block_split_num point_nums[-1] = block_point_num - (point_num_avg * (block_split_num - 1)) starts = [0] + list(np.cumsum(point_nums)) np.random.shuffle(point_indices) block_points = xyz[point_indices] #block_rgb = rgb[point_indices] #block_labels = labels[point_indices] #x, y, z = np.split(block_points, (1, 2), axis=-1) #block_xzyrgb = np.concatenate([x, z, y, block_rgb], axis=-1) for block_split_idx in range(block_split_num): start = starts[block_split_idx] point_num = point_nums[block_split_idx] end = start + point_num idx_in_batch = idx % batch_size data[idx_in_batch, 0:point_num, ...] = block_points[start:end, :] data_num[idx_in_batch] = point_num #label[idx_in_batch] = dataset_idx # won't be used... #label_seg[idx_in_batch, 0:point_num] = block_labels[start:end] indices_split_to_full[ idx_in_batch, 0:point_num] = point_indices[start:end] if ((idx + 1) % batch_size == 0) or \ (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1): item_num = idx_in_batch + 1 filename_h5 = os.path.join( root, dataset + '_%s_%d.h5' % (offset_name, idx_h5)) print('{}-Saving {}...'.format( datetime.now(), filename_h5)) file = h5py.File(filename_h5, 'w') file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) #file.create_dataset('label', data=label[0:item_num, ...]) #file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.create_dataset( 'indices_split_to_full', data=indices_split_to_full[0:item_num, ...]) file.close() if args.save_ply: print('{}-Saving ply of {}...'.format( datetime.now(), filename_h5)) filepath_label_ply = os.path.join( folder, dataset + '_ply_label', 'label_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3], label_seg[0:item_num, ...], filepath_label_ply, data_num[0:item_num, ...], 14) filepath_rgb_ply = os.path.join( folder, dataset + '_ply_rgb', 'rgb_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_color_batch( data[0:item_num, :, 0:3], (data[0:item_num, :, 3:] + 0.5) * 255, filepath_rgb_ply, data_num[0:item_num, ...]) idx_h5 = idx_h5 + 1 idx = idx + 1 # Marker indicating we've processed this dataset #open(dataset_marker, "w").close() filename_h5s = [ './%s\n' % (filename) for filename in os.listdir(root) if filename.endswith('.h5') ] filelist_txt = os.path.join(root, 'my_test_data.txt') print('{}-Saving {}...'.format(datetime.now(), filelist_txt)) with open(filelist_txt, 'w') as filelist: for filename_h5 in filename_h5s: filelist.write(filename_h5)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--filelist', '-t', help='Path to input .h5 filelist (.txt)', required=True) parser.add_argument('--load_ckpt', '-l', help='Path to a check point file for load', required=True) parser.add_argument('--max_point_num', '-p', help='Max point number of each sample', type=int, default=8192) parser.add_argument('--repeat_num', '-r', help='Repeat number', type=int, default=1) parser.add_argument('--model', '-m', help='Model to use', required=True) parser.add_argument('--setting', '-x', help='Setting to use', required=True) parser.add_argument('--save_ply', '-s', help='Save results as ply', action='store_true') args = parser.parse_args() print(args) model = importlib.import_module(args.model) setting_path = os.path.join(os.path.dirname(__file__), args.model) sys.path.append(setting_path) setting = importlib.import_module(args.setting) sample_num = setting.sample_num max_point_num = args.max_point_num batch_size = args.repeat_num * math.ceil(max_point_num / sample_num) ###################################################################### # Placeholders indices = tf.placeholder(tf.int32, shape=(batch_size, None, 2), name="indices") is_training = tf.placeholder(tf.bool, name='is_training') pts_fts = tf.placeholder(tf.float32, shape=(batch_size, max_point_num, setting.data_dim), name='points') ###################################################################### ###################################################################### pts_fts_sampled = tf.gather_nd(pts_fts, indices=indices, name='pts_fts_sampled') if setting.data_dim > 3: points_sampled, features_sampled = tf.split( pts_fts_sampled, [3, setting.data_dim - 3], axis=-1, name='split_points_features') if not setting.use_extra_features: features_sampled = None else: points_sampled = pts_fts_sampled features_sampled = None net = model.Net(points_sampled, features_sampled, is_training, setting) seg_probs_op = tf.nn.softmax(net.logits, name='seg_probs') update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # for restore model saver = tf.train.Saver() parameter_num = np.sum( [np.prod(v.shape.as_list()) for v in tf.trainable_variables()]) print('{}-Parameter number: {:d}.'.format(datetime.now(), parameter_num)) with tf.Session() as sess: # Load the model saver.restore(sess, args.load_ckpt) print('{}-Checkpoint loaded from {}!'.format(datetime.now(), args.load_ckpt)) indices_batch_indices = np.tile( np.reshape(np.arange(batch_size), (batch_size, 1, 1)), (1, sample_num, 1)) folder = os.path.dirname(args.filelist) filenames = [ os.path.join(folder, line.strip()) for line in open(args.filelist) ] for filename in filenames: print('{}-Reading {}...'.format(datetime.now(), filename)) data_h5 = h5py.File(os.path.join(folder, filename)) data = data_h5['data'][...].astype(np.float32) data_num = data_h5['data_num'][...].astype(np.int32) batch_num = data.shape[0] labels_pred = np.full((batch_num, max_point_num), -1, dtype=np.int32) confidences_pred = np.zeros((batch_num, max_point_num), dtype=np.float32) print('{}-{:d} testing batches.'.format(datetime.now(), batch_num)) for batch_idx in range(batch_num): if batch_idx % 10 == 0: print('{}-Processing {} of {} batches.'.format( datetime.now(), batch_idx, batch_num)) points_batch = data[[batch_idx] * batch_size, ...] point_num = data_num[batch_idx] tile_num = math.ceil((sample_num * batch_size) / point_num) indices_shuffle = np.tile(np.arange(point_num), tile_num)[0:sample_num * batch_size] np.random.shuffle(indices_shuffle) indices_batch_shuffle = np.reshape(indices_shuffle, (batch_size, sample_num, 1)) indices_batch = np.concatenate( (indices_batch_indices, indices_batch_shuffle), axis=2) _, seg_probs = sess.run( [update_ops, seg_probs_op], feed_dict={ pts_fts: points_batch, indices: indices_batch, is_training: False, }) probs_2d = np.reshape(seg_probs, (sample_num * batch_size, -1)) predictions = [(-1, 0.0)] * point_num for idx in range(sample_num * batch_size): point_idx = indices_shuffle[idx] probs = probs_2d[idx, :] confidence = np.amax(probs) label = np.argmax(probs) if confidence > predictions[point_idx][1]: predictions[point_idx] = [label, confidence] labels_pred[batch_idx, 0:point_num] = np.array( [label for label, _ in predictions]) confidences_pred[batch_idx, 0:point_num] = np.array( [confidence for _, confidence in predictions]) filename_pred = filename[:-3] + '_pred.h5' print('{}-Saving {}...'.format(datetime.now(), filename_pred)) file = h5py.File(filename_pred, 'w') file.create_dataset('data_num', data=data_num) file.create_dataset('label_seg', data=labels_pred) file.create_dataset('confidence', data=confidences_pred) has_indices = 'indices_split_to_full' in data_h5 if has_indices: file.create_dataset('indices_split_to_full', data=data_h5['indices_split_to_full'][...]) file.close() if args.save_ply: print('{}-Saving ply of {}...'.format(datetime.now(), filename_pred)) filepath_label_ply = os.path.join(filename_pred[:-3] + 'ply_label') data_utils.save_ply_property_batch(data[:, :, 0:3], labels_pred[...], filepath_label_ply, data_num[...], setting.num_class) ###################################################################### print('{}-Done!'.format(datetime.now()))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--max_point_num', '-m', help='Max point number of each sample', type=int, default=8192) parser.add_argument('--block_size', '-b', help='Block size', type=float, default=0.3) parser.add_argument('--grid_size', '-g', help='Grid size', type=float, default=0.03) parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else '../../data/S3DIS/prepare_label_rgb' dir_root = '../../data/S3DIS/dir_all_d/' max_point_num = args.max_point_num batch_size = 2048 data = np.zeros((batch_size, max_point_num, 6)) direction = np.zeros((batch_size, max_point_num, 6)) data_num = np.zeros((batch_size), dtype=np.int32) label = np.zeros((batch_size), dtype=np.int32) label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32) indices_split_to_full = np.zeros((batch_size, max_point_num), dtype=np.int32) data_extra = np.zeros((batch_size, 4096, 6)) direction_extra = np.zeros((batch_size, 4096, 6)) data_num_extra = np.zeros((batch_size), dtype=np.int32) label_seg_extra = np.zeros((batch_size, 4096), dtype=np.int32) mergenum = 0 for area_idx in range(1, 7): folder = os.path.join(root, 'Area_%d' % area_idx) datasets = [dataset for dataset in os.listdir(folder)] for dataset_idx, dataset in enumerate(datasets): dataset_marker = os.path.join(folder, dataset, ".dataset") if os.path.exists(dataset_marker): print('{}-{}/{} already processed, skipping'.format( datetime.now(), folder, dataset)) continue filename_data = os.path.join(folder, dataset, 'xyzrgb_label.mat') print('{}-Loading {}...'.format(datetime.now(), filename_data)) xyz_label = scio.loadmat(filename_data) xyzrgb = xyz_label['xyzrgb'] labels = xyz_label['label'].astype(int).flatten() filename_direction = os.path.join(dir_root, 'Area_%d' % area_idx, dataset, 'vec.mat') print('{}-Loading {}...'.format(datetime.now(), filename_direction)) vec = scio.loadmat(filename_direction) vec = vec['vec'] xyz, rgb = np.split(xyzrgb, [3], axis=-1) xyz_min = np.amin(xyz, axis=0, keepdims=True) xyz_max = np.amax(xyz, axis=0, keepdims=True) xyz_center = (xyz_min + xyz_max) / 2 xyz_center[0][-1] = xyz_min[0][-1] xyz = xyz - xyz_center # align to room bottom center rgb = rgb / 255 - 0.5 offsets = [('zero', 0.0), ('half', 1.2 / 2)] for offset_name, offset in offsets: idx_h5 = 0 idx = 0 print('{}-Computing block id of {} points...'.format( datetime.now(), xyzrgb.shape[0])) xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset xyz_max = np.amax(xyz, axis=0, keepdims=True) block_size = (args.block_size, args.block_size, 2 * (xyz_max[0, -1] - xyz_min[0, -1])) xyz_blocks = np.floor( (xyz - xyz_min) / block_size).astype(np.int) print('{}-Collecting points belong to each block...'.format( datetime.now(), xyzrgb.shape[0])) blocks, point_block_indices, block_point_counts = np.unique( xyz_blocks, return_inverse=True, return_counts=True, axis=0) block_point_indices = np.split( np.argsort(point_block_indices), np.cumsum(block_point_counts[:-1])) print('{}-{} is split into {} blocks.'.format( datetime.now(), dataset, blocks.shape[0])) blockmax = np.amax(blocks, axis=0) xblocks = int(blockmax[0] / 5) + 1 yblocks = int(blockmax[1] / 5) + 1 blocks_base = [[] for _ in range(xblocks * yblocks)] blocks_extra = [[] for _ in range(xblocks * yblocks)] for xi in range(blockmax[0] + 1): for yj in range(blockmax[1] + 1): if sum( abs([xi, yj, 0] - blocks[min((blockmax[1] + 1) * xi + yj, blocks.shape[0] - 1)])): #print([xi, yj, 0]) blocks = np.insert(blocks, (blockmax[1] + 1) * xi + yj, [xi, yj, 0], 0) block_point_indices.insert( (blockmax[1] + 1) * xi + yj, []) #print(len(block_point_indices)) for xi in range(blockmax[0] + 1): for yj in range(blockmax[1] + 1): subid = (blockmax[1] + 1) * xi + yj bid = yblocks * int(xi / 5) + int(yj / 5) blocks_base[bid].append(block_point_indices[subid]) if ((xi % 5) == 0) & (xi > 0): eid = yblocks * int((xi - 1) / 5) + int(yj / 5) blocks_extra[eid].append( block_point_indices[subid]) elif ((xi % 5) == 4) & (xi < blocks[-1][0]): eid = yblocks * int((xi + 1) / 5) + int(yj / 5) blocks_extra[eid].append( block_point_indices[subid]) if ((yj % 5) == 0) & (yj > 0): eid = yblocks * int(xi / 5) + int((yj - 1) / 5) blocks_extra[eid].append( block_point_indices[subid]) elif ((yj % 5) == 4) & (yj < blocks[-1][1]): eid = yblocks * int(xi / 5) + int((yj + 1) / 5) blocks_extra[eid].append( block_point_indices[subid]) blocks_base = [np.concatenate(ind) for ind in blocks_base] blocks_extra = [np.concatenate(ind) for ind in blocks_extra] block_to_block_idx_map = dict() for block_idx in range(xblocks * yblocks): block = (block_idx // yblocks, block_idx % yblocks) block_to_block_idx_map[(block[0], block[1])] = block_idx # merge small blocks into one of their big neighbors block_point_count_threshold = max_point_num / 10 nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1), (1, 1), (1, -1), (-1, -1)] block_merge_count = 0 for block_idx in range(xblocks * yblocks): if len(blocks_base[block_idx] ) >= block_point_count_threshold: continue block = (blocks[block_idx][0], blocks[block_idx][1]) for x, y in nbr_block_offsets: nbr_block = (block[0] + x, block[1] + y) if nbr_block not in block_to_block_idx_map: continue nbr_block_idx = block_to_block_idx_map[nbr_block] if len(blocks_base[nbr_block_idx] ) < block_point_count_threshold: continue blocks_base[nbr_block_idx] = np.concatenate([ blocks_base[nbr_block_idx], blocks_base[block_idx] ], axis=-1) blocks_base[block_idx] = np.array([], dtype=np.int) blocks_extra[nbr_block_idx] = np.concatenate([ blocks_extra[nbr_block_idx], blocks_extra[block_idx] ], axis=-1) blocks_extra[block_idx] = np.array([], dtype=np.int) blocks_extra[nbr_block_idx] = np.array( list( set(blocks_extra[nbr_block_idx]).difference( set(blocks_base[nbr_block_idx])))) block_merge_count = block_merge_count + 1 break print('{}-{} of {} blocks are merged.'.format( datetime.now(), block_merge_count, blocks.shape[0])) idx_last_non_empty_block = 0 for block_idx in reversed(range(xblocks * yblocks)): if blocks_base[block_idx].shape[0] != 0: idx_last_non_empty_block = block_idx break blocks_base = [ind.astype(np.int) for ind in blocks_base] blocks_extra = [ind.astype(np.int) for ind in blocks_extra] for block_idx in range(idx_last_non_empty_block + 1): point_indices = blocks_base[block_idx] point_indices_extra = blocks_extra[block_idx] if point_indices.shape[0] == 0: continue block_point_num = point_indices.shape[0] block_split_num = int( math.ceil(block_point_num * 1.0 / max_point_num)) point_num_avg = int( math.ceil(block_point_num * 1.0 / block_split_num)) point_nums = [point_num_avg] * block_split_num point_nums[-1] = block_point_num - (point_num_avg * (block_split_num - 1)) starts = [0] + list(np.cumsum(point_nums)) np.random.shuffle(point_indices) block_points = xyz[point_indices] block_rgb = rgb[point_indices] block_dir = vec[point_indices] block_labels = labels[point_indices] block_xzyrgb = np.concatenate([block_points, block_rgb], axis=-1) np.random.shuffle(point_indices_extra) block_points_extra = xyz[point_indices_extra] block_rgb_extra = rgb[point_indices_extra] block_dir_extra = vec[point_indices_extra] block_labels_extra = labels[point_indices_extra] block_xzyrgb_extra = np.concatenate( [block_points_extra, block_rgb_extra], axis=-1) for block_split_idx in range(block_split_num): start = starts[block_split_idx] point_num = point_nums[block_split_idx] end = start + point_num idx_in_batch = idx % batch_size data[idx_in_batch, 0:point_num, ...] = block_xzyrgb[start:end, :] direction[idx_in_batch, 0:point_num, ...] = block_dir[start:end, :] data_num[idx_in_batch] = point_num label[idx_in_batch] = dataset_idx # won't be used... label_seg[idx_in_batch, 0:point_num] = block_labels[start:end] indices_split_to_full[ idx_in_batch, 0:point_num] = point_indices[start:end] point_num_extra = int(point_num * 2.0 / 4) indice = np.random.choice( len(point_indices_extra), point_num_extra, replace=len(point_indices_extra) < point_num_extra) data_extra[idx_in_batch, 0:point_num_extra, ...] = block_xzyrgb_extra[indice, :] direction_extra[idx_in_batch, 0:point_num_extra, ...] = block_dir_extra[indice, :] data_num_extra[idx_in_batch] = point_num_extra label_seg_extra[ idx_in_batch, 0:point_num_extra] = block_labels_extra[indice] if ((idx + 1) % batch_size == 0) or \ (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1): item_num = idx_in_batch + 1 filename_h5 = os.path.join( folder, dataset, '%s_%d.h5' % (offset_name, idx_h5)) print('{}-Saving {}...'.format( datetime.now(), filename_h5)) file = h5py.File(filename_h5, 'w') file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) file.create_dataset('direction', data=direction[0:item_num, ...]) file.create_dataset('label', data=label[0:item_num, ...]) file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.create_dataset( 'indices_split_to_full', data=indices_split_to_full[0:item_num, ...]) file.create_dataset('data_extra', data=data_extra[0:item_num, ...]) file.create_dataset('data_num_extra', data=data_num_extra[0:item_num, ...]) file.create_dataset( 'direction_extra', data=direction_extra[0:item_num, ...]) file.create_dataset( 'label_seg_extra', data=label_seg_extra[0:item_num, ...]) file.close() if args.save_ply: print('{}-Saving ply of {}...'.format( datetime.now(), filename_h5)) filepath_label_ply = os.path.join( folder, dataset, 'ply_label', 'label_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3], label_seg[0:item_num, ...], filepath_label_ply, data_num[0:item_num, ...], 14) filepath_rgb_ply = os.path.join( folder, dataset, 'ply_rgb', 'rgb_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_color_batch( data[0:item_num, :, 0:3], (data[0:item_num, :, 3:] + 0.5) * 255, filepath_rgb_ply, data_num[0:item_num, ...]) idx_h5 = idx_h5 + 1 idx = idx + 1 # Marker indicating we've processed this dataset open(dataset_marker, "w").close() print(mergenum)