def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--max_point_num', '-m', help='Max point number of each sample', type=int, default=8192) parser.add_argument('--block_size', '-b', help='Block size', type=float, default=1.5) parser.add_argument('--grid_size', '-g', help='Grid size', type=float, default=0.03) parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else os.path.join( DATA_DIR, "S3DIS", "prepare_label_rgb") max_point_num = args.max_point_num batch_size = 2048 data = np.zeros((batch_size, max_point_num, 6)) data_num = np.zeros((batch_size), dtype=np.int32) label = np.zeros((batch_size), dtype=np.int32) label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32) indices_split_to_full = np.zeros((batch_size, max_point_num), dtype=np.int32) for area_idx in range(1, 7): folder = os.path.join(root, 'Area_%d' % area_idx) datasets = [dataset for dataset in os.listdir(folder)] for dataset_idx, dataset in enumerate(datasets): filename_data = os.path.join(folder, dataset, 'xyzrgb.npy') print('{}-Loading {}...'.format(datetime.now(), filename_data)) xyzrgb = np.load(filename_data) filename_labels = os.path.join(folder, dataset, 'label.npy') print('{}-Loading {}...'.format(datetime.now(), filename_labels)) labels = np.load(filename_labels).astype(int).flatten() xyz, rgb = np.split(xyzrgb, [3], axis=-1) xyz_min = np.amin(xyz, axis=0, keepdims=True) xyz_max = np.amax(xyz, axis=0, keepdims=True) xyz_center = (xyz_min + xyz_max) / 2 xyz_center[0][-1] = xyz_min[0][-1] xyz = xyz - xyz_center # align to room bottom center rgb = rgb / 255 - 0.5 offsets = [('zero', 0.0), ('half', args.block_size / 2)] for offset_name, offset in offsets: idx_h5 = 0 idx = 0 print('{}-Computing block id of {} points...'.format( datetime.now(), xyzrgb.shape[0])) xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset xyz_max = np.amax(xyz, axis=0, keepdims=True) block_size = (args.block_size, args.block_size, 2 * (xyz_max[0, -1] - xyz_min[0, -1])) xyz_blocks = np.floor( (xyz - xyz_min) / block_size).astype(np.int) print('{}-Collecting points belong to each block...'.format( datetime.now(), xyzrgb.shape[0])) blocks, point_block_indices, block_point_counts = np.unique( xyz_blocks, return_inverse=True, return_counts=True, axis=0) block_point_indices = np.split( np.argsort(point_block_indices), np.cumsum(block_point_counts[:-1])) print('{}-{} is split into {} blocks.'.format( datetime.now(), dataset, blocks.shape[0])) block_to_block_idx_map = dict() for block_idx in range(blocks.shape[0]): block = (blocks[block_idx][0], blocks[block_idx][1]) block_to_block_idx_map[(block[0], block[1])] = block_idx # merge small blocks into one of their big neighbors block_point_count_threshold = max_point_num / 10 nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1), (1, 1), (1, -1), (-1, -1)] block_merge_count = 0 for block_idx in range(blocks.shape[0]): if block_point_counts[ block_idx] >= block_point_count_threshold: continue block = (blocks[block_idx][0], blocks[block_idx][1]) for x, y in nbr_block_offsets: nbr_block = (block[0] + x, block[1] + y) if nbr_block not in block_to_block_idx_map: continue nbr_block_idx = block_to_block_idx_map[nbr_block] if block_point_counts[ nbr_block_idx] < block_point_count_threshold: continue block_point_indices[nbr_block_idx] = np.concatenate( [ block_point_indices[nbr_block_idx], block_point_indices[block_idx] ], axis=-1) block_point_indices[block_idx] = np.array([], dtype=np.int) block_merge_count = block_merge_count + 1 break print('{}-{} of {} blocks are merged.'.format( datetime.now(), block_merge_count, blocks.shape[0])) idx_last_non_empty_block = 0 for block_idx in reversed(range(blocks.shape[0])): if block_point_indices[block_idx].shape[0] != 0: idx_last_non_empty_block = block_idx break # uniformly sample each block for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_points = xyz[point_indices] block_min = np.amin(block_points, axis=0, keepdims=True) xyz_grids = np.floor((block_points - block_min) / args.grid_size).astype(np.int) grids, point_grid_indices, grid_point_counts = np.unique( xyz_grids, return_inverse=True, return_counts=True, axis=0) grid_point_indices = np.split( np.argsort(point_grid_indices), np.cumsum(grid_point_counts[:-1])) grid_point_count_avg = int(np.average(grid_point_counts)) point_indices_repeated = [] for grid_idx in range(grids.shape[0]): point_indices_in_block = grid_point_indices[grid_idx] repeat_num = math.ceil(grid_point_count_avg / point_indices_in_block.shape[0]) if repeat_num > 1: point_indices_in_block = np.repeat( point_indices_in_block, repeat_num) np.random.shuffle(point_indices_in_block) point_indices_in_block = point_indices_in_block[: grid_point_count_avg] point_indices_repeated.extend( list(point_indices[point_indices_in_block])) block_point_indices[block_idx] = np.array( point_indices_repeated) block_point_counts[block_idx] = len(point_indices_repeated) for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_point_num = point_indices.shape[0] block_split_num = int( math.ceil(block_point_num * 1.0 / max_point_num)) point_num_avg = int( math.ceil(block_point_num * 1.0 / block_split_num)) point_nums = [point_num_avg] * block_split_num point_nums[-1] = block_point_num - (point_num_avg * (block_split_num - 1)) starts = [0] + list(np.cumsum(point_nums)) np.random.shuffle(point_indices) block_points = xyz[point_indices] block_rgb = rgb[point_indices] block_labels = labels[point_indices] x, y, z = np.split(block_points, (1, 2), axis=-1) block_xzyrgb = np.concatenate([x, z, y, block_rgb], axis=-1) for block_split_idx in range(block_split_num): start = starts[block_split_idx] point_num = point_nums[block_split_idx] end = start + point_num idx_in_batch = idx % batch_size data[idx_in_batch, 0:point_num, ...] = block_xzyrgb[start:end, :] data_num[idx_in_batch] = point_num label[idx_in_batch] = dataset_idx # won't be used... label_seg[idx_in_batch, 0:point_num] = block_labels[start:end] indices_split_to_full[ idx_in_batch, 0:point_num] = point_indices[start:end] if ((idx + 1) % batch_size == 0) or \ (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1): item_num = idx_in_batch + 1 filename_h5 = os.path.join( folder, dataset, '%s_%d.h5' % (offset_name, idx_h5)) print('{}-Saving {}...'.format( datetime.now(), filename_h5)) file = h5py.File(filename_h5, 'w') file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) file.create_dataset('label', data=label[0:item_num, ...]) file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.create_dataset( 'indices_split_to_full', data=indices_split_to_full[0:item_num, ...]) file.close() if args.save_ply: print('{}-Saving ply of {}...'.format( datetime.now(), filename_h5)) filepath_label_ply = os.path.join( folder, dataset, 'ply_label', 'label_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3], label_seg[0:item_num, ...], filepath_label_ply, data_num[0:item_num, ...], 14) filepath_rgb_ply = os.path.join( folder, dataset, 'ply_rgb', 'rgb_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_color_batch( data[0:item_num, :, 0:3], (data[0:item_num, :, 3:] + 0.5) * 255, filepath_rgb_ply, data_num[0:item_num, ...]) idx_h5 = idx_h5 + 1 idx = idx + 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--max_point_num', '-m', help='Max point number of each sample', type=int, default=8192) parser.add_argument('--block_size', '-b', help='Block size', type=float, default=1.5) parser.add_argument('--grid_size', '-g', help='Grid size', type=float, default=0.03) parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else '/home/elvin/PointCNN/data/mydata' max_point_num = args.max_point_num batch_size = 2048 # 存xyz data = np.zeros((batch_size, max_point_num, 3)) # 存点云中点的数量 data_num = np.zeros((batch_size), dtype=np.int32) #label = np.zeros((batch_size), dtype=np.int32) #label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32) indices_split_to_full = np.zeros((batch_size, max_point_num), dtype=np.int32) # 每个dataset对应一个点云 datasets = os.listdir(root) for dataset_idx, dataset in enumerate(datasets): #dataset_marker = os.path.join(folder, dataset, ".dataset") #if os.path.exists(dataset_marker): # print('{}-{}/{} already processed, skipping'.format(datetime.now(), folder, dataset)) # continue if dataset.endswith('.npy'): # continue filename_data = os.path.join(root, dataset) #, 'xyzrgb.npy') print('{}-Loading {}...'.format(datetime.now(), filename_data)) xyz = np.load(filename_data) dataset = dataset.split('.')[0] xyz_min = np.amin(xyz, axis=0, keepdims=True) xyz_max = np.amax(xyz, axis=0, keepdims=True) xyz_center = (xyz_min + xyz_max) / 2 xyz_center[0][-1] = xyz_min[0][-1] xyz = xyz - xyz_center # align to room bottom center #rgb = rgb / 255 - 0.5 offsets = [('zero', 0.0), ('half', args.block_size / 2)] for offset_name, offset in offsets: idx_h5 = 0 idx = 0 print('{}-Computing block id of {} points...'.format( datetime.now(), xyz.shape[0])) xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset xyz_max = np.amax(xyz, axis=0, keepdims=True) block_size = (args.block_size, args.block_size, 2 * (xyz_max[0, -1] - xyz_min[0, -1])) xyz_blocks = np.floor( (xyz - xyz_min) / block_size).astype(np.int) print('{}-Collecting points belong to each block...'.format( datetime.now(), xyz.shape[0])) # point_block_indices是旧列表的元素在新列表中的位置 blocks, point_block_indices, block_point_counts = np.unique( xyz_blocks, return_inverse=True, return_counts=True, axis=0) # 某个房间数据被划分成多少个块 block_point_indices = np.split( np.argsort(point_block_indices), np.cumsum(block_point_counts[:-1])) print('{}-{}.npy is split into {} blocks.'.format( datetime.now(), dataset, blocks.shape[0])) block_to_block_idx_map = dict() for block_idx in range(blocks.shape[0]): block = (blocks[block_idx][0], blocks[block_idx][1]) block_to_block_idx_map[(block[0], block[1])] = block_idx # merge small blocks into one of their big neighbors block_point_count_threshold = max_point_num / 10 nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1), (1, 1), (1, -1), (-1, -1)] block_merge_count = 0 for block_idx in range(blocks.shape[0]): if block_point_counts[ block_idx] >= block_point_count_threshold: continue block = (blocks[block_idx][0], blocks[block_idx][1]) for x, y in nbr_block_offsets: nbr_block = (block[0] + x, block[1] + y) if nbr_block not in block_to_block_idx_map: continue nbr_block_idx = block_to_block_idx_map[nbr_block] if block_point_counts[ nbr_block_idx] < block_point_count_threshold: continue block_point_indices[nbr_block_idx] = np.concatenate( [ block_point_indices[nbr_block_idx], block_point_indices[block_idx] ], axis=-1) block_point_indices[block_idx] = np.array([], dtype=np.int) block_merge_count = block_merge_count + 1 break print('{}-{} of {} blocks are merged.'.format( datetime.now(), block_merge_count, blocks.shape[0])) idx_last_non_empty_block = 0 for block_idx in reversed(range(blocks.shape[0])): if block_point_indices[block_idx].shape[0] != 0: idx_last_non_empty_block = block_idx break # uniformly sample each block for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_points = xyz[point_indices] block_min = np.amin(block_points, axis=0, keepdims=True) xyz_grids = np.floor((block_points - block_min) / args.grid_size).astype(np.int) grids, point_grid_indices, grid_point_counts = np.unique( xyz_grids, return_inverse=True, return_counts=True, axis=0) grid_point_indices = np.split( np.argsort(point_grid_indices), np.cumsum(grid_point_counts[:-1])) grid_point_count_avg = int(np.average(grid_point_counts)) point_indices_repeated = [] for grid_idx in range(grids.shape[0]): point_indices_in_block = grid_point_indices[grid_idx] repeat_num = math.ceil(grid_point_count_avg / point_indices_in_block.shape[0]) if repeat_num > 1: point_indices_in_block = np.repeat( point_indices_in_block, repeat_num) np.random.shuffle(point_indices_in_block) point_indices_in_block = point_indices_in_block[: grid_point_count_avg] point_indices_repeated.extend( list(point_indices[point_indices_in_block])) block_point_indices[block_idx] = np.array( point_indices_repeated) block_point_counts[block_idx] = len(point_indices_repeated) for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_point_num = point_indices.shape[0] block_split_num = int( math.ceil(block_point_num * 1.0 / max_point_num)) point_num_avg = int( math.ceil(block_point_num * 1.0 / block_split_num)) point_nums = [point_num_avg] * block_split_num point_nums[-1] = block_point_num - (point_num_avg * (block_split_num - 1)) starts = [0] + list(np.cumsum(point_nums)) np.random.shuffle(point_indices) block_points = xyz[point_indices] #block_rgb = rgb[point_indices] #block_labels = labels[point_indices] #x, y, z = np.split(block_points, (1, 2), axis=-1) #block_xzyrgb = np.concatenate([x, z, y, block_rgb], axis=-1) for block_split_idx in range(block_split_num): start = starts[block_split_idx] point_num = point_nums[block_split_idx] end = start + point_num idx_in_batch = idx % batch_size data[idx_in_batch, 0:point_num, ...] = block_points[start:end, :] data_num[idx_in_batch] = point_num #label[idx_in_batch] = dataset_idx # won't be used... #label_seg[idx_in_batch, 0:point_num] = block_labels[start:end] indices_split_to_full[ idx_in_batch, 0:point_num] = point_indices[start:end] if ((idx + 1) % batch_size == 0) or \ (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1): item_num = idx_in_batch + 1 filename_h5 = os.path.join( root, dataset + '_%s_%d.h5' % (offset_name, idx_h5)) print('{}-Saving {}...'.format( datetime.now(), filename_h5)) file = h5py.File(filename_h5, 'w') file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) #file.create_dataset('label', data=label[0:item_num, ...]) #file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.create_dataset( 'indices_split_to_full', data=indices_split_to_full[0:item_num, ...]) file.close() if args.save_ply: print('{}-Saving ply of {}...'.format( datetime.now(), filename_h5)) filepath_label_ply = os.path.join( folder, dataset + '_ply_label', 'label_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3], label_seg[0:item_num, ...], filepath_label_ply, data_num[0:item_num, ...], 14) filepath_rgb_ply = os.path.join( folder, dataset + '_ply_rgb', 'rgb_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_color_batch( data[0:item_num, :, 0:3], (data[0:item_num, :, 3:] + 0.5) * 255, filepath_rgb_ply, data_num[0:item_num, ...]) idx_h5 = idx_h5 + 1 idx = idx + 1 # Marker indicating we've processed this dataset #open(dataset_marker, "w").close() filename_h5s = [ './%s\n' % (filename) for filename in os.listdir(root) if filename.endswith('.h5') ] filelist_txt = os.path.join(root, 'my_test_data.txt') print('{}-Saving {}...'.format(datetime.now(), filelist_txt)) with open(filelist_txt, 'w') as filelist: for filename_h5 in filename_h5s: filelist.write(filename_h5)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--max_point_num', '-m', help='Max point number of each sample', type=int, default=8192) parser.add_argument('--block_size', '-b', help='Block size', type=float, default=5.0) parser.add_argument('--grid_size', '-g', help='Grid size', type=float, default=0.1) parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else '../../data/semantic3d' max_point_num = args.max_point_num batch_size = 2048 data = np.zeros((batch_size, max_point_num, 7)) data_num = np.zeros((batch_size), dtype=np.int32) label = np.zeros((batch_size), dtype=np.int32) label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32) indices_split_to_full = np.zeros((batch_size, max_point_num), dtype=np.int32) if args.save_ply: data_center = np.zeros((batch_size, max_point_num, 3)) folders = [os.path.join(root, folder) for folder in ['train', 'val', 'test']] for folder in folders: datasets = [filename[:-4] for filename in os.listdir(folder) if filename.endswith('.txt')] for dataset_idx, dataset in enumerate(datasets): filename_txt = os.path.join(folder, dataset + '.txt') print('{}-Loading {}...'.format(datetime.now(), filename_txt)) xyzirgb = np.loadtxt(filename_txt) filename_labels = os.path.join(folder, dataset + '.labels') has_labels = os.path.exists(filename_labels) if has_labels: print('{}-Loading {}...'.format(datetime.now(), filename_labels)) labels = np.loadtxt(filename_labels, dtype=np.int) indices = (labels != 0) labels = labels[indices] - 1 # since labels == 0 have been removed xyzirgb = xyzirgb[indices, :] else: labels = np.zeros((xyzirgb.shape[0])) xyz, i, rgb = np.split(xyzirgb, (3, 4), axis=-1) i = i / 2000 + 0.5 rgb = rgb / 255 - 0.5 offsets = [('zero', 0.0), ('half', args.block_size / 2)] for offset_name, offset in offsets: idx_h5 = 0 idx = 0 print('{}-Computing block id of {} points...'.format(datetime.now(), xyzirgb.shape[0])) xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset xyz_max = np.amax(xyz, axis=0, keepdims=True) block_size = (args.block_size, args.block_size, 2 * (xyz_max[0, -1] - xyz_min[0, -1])) xyz_blocks = np.floor((xyz - xyz_min) / block_size).astype(np.int) print('{}-Collecting points belong to each block...'.format(datetime.now(), xyzirgb.shape[0])) blocks, point_block_indices, block_point_counts = np.unique(xyz_blocks, return_inverse=True, return_counts=True, axis=0) block_point_indices = np.split(np.argsort(point_block_indices), np.cumsum(block_point_counts[:-1])) print('{}-{} is split into {} blocks.'.format(datetime.now(), dataset, blocks.shape[0])) block_to_block_idx_map = dict() for block_idx in range(blocks.shape[0]): block = (blocks[block_idx][0], blocks[block_idx][1]) block_to_block_idx_map[(block[0], block[1])] = block_idx # merge small blocks into one of their big neighbors block_point_count_threshold = max_point_num / 10 nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1), (1, 1), (1, -1), (-1, -1)] block_merge_count = 0 for block_idx in range(blocks.shape[0]): if block_point_counts[block_idx] >= block_point_count_threshold: continue block = (blocks[block_idx][0], blocks[block_idx][1]) for x, y in nbr_block_offsets: nbr_block = (block[0] + x, block[1] + y) if nbr_block not in block_to_block_idx_map: continue nbr_block_idx = block_to_block_idx_map[nbr_block] if block_point_counts[nbr_block_idx] < block_point_count_threshold: continue block_point_indices[nbr_block_idx] = np.concatenate( [block_point_indices[nbr_block_idx], block_point_indices[block_idx]], axis=-1) block_point_indices[block_idx] = np.array([], dtype=np.int) block_merge_count = block_merge_count + 1 break print('{}-{} of {} blocks are merged.'.format(datetime.now(), block_merge_count, blocks.shape[0])) idx_last_non_empty_block = 0 for block_idx in reversed(range(blocks.shape[0])): if block_point_indices[block_idx].shape[0] != 0: idx_last_non_empty_block = block_idx break # uniformly sample each block for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_points = xyz[point_indices] block_min = np.amin(block_points, axis=0, keepdims=True) xyz_grids = np.floor((block_points - block_min) / args.grid_size).astype(np.int) grids, point_grid_indices, grid_point_counts = np.unique(xyz_grids, return_inverse=True, return_counts=True, axis=0) grid_point_indices = np.split(np.argsort(point_grid_indices), np.cumsum(grid_point_counts[:-1])) grid_point_count_avg = int(np.average(grid_point_counts)) point_indices_repeated = [] for grid_idx in range(grids.shape[0]): point_indices_in_block = grid_point_indices[grid_idx] repeat_num = math.ceil(grid_point_count_avg / point_indices_in_block.shape[0]) if repeat_num > 1: point_indices_in_block = np.repeat(point_indices_in_block, repeat_num) np.random.shuffle(point_indices_in_block) point_indices_in_block = point_indices_in_block[:grid_point_count_avg] point_indices_repeated.extend(list(point_indices[point_indices_in_block])) block_point_indices[block_idx] = np.array(point_indices_repeated) block_point_counts[block_idx] = len(point_indices_repeated) for block_idx in range(idx_last_non_empty_block + 1): point_indices = block_point_indices[block_idx] if point_indices.shape[0] == 0: continue block_point_num = point_indices.shape[0] block_split_num = int(math.ceil(block_point_num * 1.0 / max_point_num)) point_num_avg = int(math.ceil(block_point_num * 1.0 / block_split_num)) point_nums = [point_num_avg] * block_split_num point_nums[-1] = block_point_num - (point_num_avg * (block_split_num - 1)) starts = [0] + list(np.cumsum(point_nums)) np.random.shuffle(point_indices) block_points = xyz[point_indices] block_min = np.amin(block_points, axis=0, keepdims=True) block_max = np.amax(block_points, axis=0, keepdims=True) block_center = (block_min + block_max) / 2 block_center[0][-1] = block_min[0][-1] block_points = block_points - block_center # align to block bottom center x, y, z = np.split(block_points, (1, 2), axis=-1) block_xzyrgbi = np.concatenate([x, z, y, rgb[point_indices], i[point_indices]], axis=-1) block_labels = labels[point_indices] for block_split_idx in range(block_split_num): start = starts[block_split_idx] point_num = point_nums[block_split_idx] end = start + point_num idx_in_batch = idx % batch_size data[idx_in_batch, 0:point_num, ...] = block_xzyrgbi[start:end, :] data_num[idx_in_batch] = point_num label[idx_in_batch] = dataset_idx # won't be used... label_seg[idx_in_batch, 0:point_num] = block_labels[start:end] indices_split_to_full[idx_in_batch, 0:point_num] = point_indices[start:end] if args.save_ply: block_center_xzy = np.array([[block_center[0][0], block_center[0][2], block_center[0][1]]]) data_center[idx_in_batch, 0:point_num, ...] = block_center_xzy if ((idx + 1) % batch_size == 0) or \ (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1): item_num = idx_in_batch + 1 filename_h5 = os.path.join(folder, dataset + '_%s_%d.h5' % (offset_name, idx_h5)) print('{}-Saving {}...'.format(datetime.now(), filename_h5)) file = h5py.File(filename_h5, 'w') file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) file.create_dataset('label', data=label[0:item_num, ...]) file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.create_dataset('indices_split_to_full', data=indices_split_to_full[0:item_num, ...]) file.close() if args.save_ply: print('{}-Saving ply of {}...'.format(datetime.now(), filename_h5)) filepath_label_ply = os.path.join(folder, 'ply_label', dataset + '_label_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3] + data_center[0:item_num, ...], label_seg[0:item_num, ...], filepath_label_ply, data_num[0:item_num, ...], 8) filepath_i_ply = os.path.join(folder, 'ply_intensity', dataset + '_i_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3] + data_center[0:item_num, ...], data[0:item_num, :, 6], filepath_i_ply, data_num[0:item_num, ...], 1.0) filepath_rgb_ply = os.path.join(folder, 'ply_rgb', dataset + '_rgb_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_color_batch(data[0:item_num, :, 0:3] + data_center[0:item_num, ...], (data[0:item_num, :, 3:6] + 0.5) * 255, filepath_rgb_ply, data_num[0:item_num, ...]) filepath_label_aligned_ply = os.path.join(folder, 'ply_label_aligned', dataset + '_label_%s_%d' % ( offset_name, idx_h5)) data_utils.save_ply_property_batch(data[0:item_num, :, 0:3], label_seg[0:item_num, ...], filepath_label_aligned_ply, data_num[0:item_num, ...], 8) filepath_i_aligned_ply = os.path.join(folder, 'ply_intensity_aligned', dataset + '_i_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch(data[0:item_num, :, 0:3], data[0:item_num, :, 6], filepath_i_aligned_ply, data_num[0:item_num, ...], 1.0) filepath_rgb_aligned_ply = os.path.join(folder, 'ply_rgb_aligned', dataset + '_rgb_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_color_batch(data[0:item_num, :, 0:3], (data[0:item_num, :, 3:6] + 0.5) * 255, filepath_rgb_aligned_ply, data_num[0:item_num, ...]) idx_h5 = idx_h5 + 1 idx = idx + 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder', '-f', help='Path to data folder') parser.add_argument('--max_point_num', '-m', help='Max point number of each sample', type=int, default=8192) parser.add_argument('--block_size', '-b', help='Block size', type=float, default=0.3) parser.add_argument('--grid_size', '-g', help='Grid size', type=float, default=0.03) parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true') args = parser.parse_args() print(args) root = args.folder if args.folder else '../../data/S3DIS/prepare_label_rgb' dir_root = '../../data/S3DIS/dir_all_d/' max_point_num = args.max_point_num batch_size = 2048 data = np.zeros((batch_size, max_point_num, 6)) direction = np.zeros((batch_size, max_point_num, 6)) data_num = np.zeros((batch_size), dtype=np.int32) label = np.zeros((batch_size), dtype=np.int32) label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32) indices_split_to_full = np.zeros((batch_size, max_point_num), dtype=np.int32) data_extra = np.zeros((batch_size, 4096, 6)) direction_extra = np.zeros((batch_size, 4096, 6)) data_num_extra = np.zeros((batch_size), dtype=np.int32) label_seg_extra = np.zeros((batch_size, 4096), dtype=np.int32) mergenum = 0 for area_idx in range(1, 7): folder = os.path.join(root, 'Area_%d' % area_idx) datasets = [dataset for dataset in os.listdir(folder)] for dataset_idx, dataset in enumerate(datasets): dataset_marker = os.path.join(folder, dataset, ".dataset") if os.path.exists(dataset_marker): print('{}-{}/{} already processed, skipping'.format( datetime.now(), folder, dataset)) continue filename_data = os.path.join(folder, dataset, 'xyzrgb_label.mat') print('{}-Loading {}...'.format(datetime.now(), filename_data)) xyz_label = scio.loadmat(filename_data) xyzrgb = xyz_label['xyzrgb'] labels = xyz_label['label'].astype(int).flatten() filename_direction = os.path.join(dir_root, 'Area_%d' % area_idx, dataset, 'vec.mat') print('{}-Loading {}...'.format(datetime.now(), filename_direction)) vec = scio.loadmat(filename_direction) vec = vec['vec'] xyz, rgb = np.split(xyzrgb, [3], axis=-1) xyz_min = np.amin(xyz, axis=0, keepdims=True) xyz_max = np.amax(xyz, axis=0, keepdims=True) xyz_center = (xyz_min + xyz_max) / 2 xyz_center[0][-1] = xyz_min[0][-1] xyz = xyz - xyz_center # align to room bottom center rgb = rgb / 255 - 0.5 offsets = [('zero', 0.0), ('half', 1.2 / 2)] for offset_name, offset in offsets: idx_h5 = 0 idx = 0 print('{}-Computing block id of {} points...'.format( datetime.now(), xyzrgb.shape[0])) xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset xyz_max = np.amax(xyz, axis=0, keepdims=True) block_size = (args.block_size, args.block_size, 2 * (xyz_max[0, -1] - xyz_min[0, -1])) xyz_blocks = np.floor( (xyz - xyz_min) / block_size).astype(np.int) print('{}-Collecting points belong to each block...'.format( datetime.now(), xyzrgb.shape[0])) blocks, point_block_indices, block_point_counts = np.unique( xyz_blocks, return_inverse=True, return_counts=True, axis=0) block_point_indices = np.split( np.argsort(point_block_indices), np.cumsum(block_point_counts[:-1])) print('{}-{} is split into {} blocks.'.format( datetime.now(), dataset, blocks.shape[0])) blockmax = np.amax(blocks, axis=0) xblocks = int(blockmax[0] / 5) + 1 yblocks = int(blockmax[1] / 5) + 1 blocks_base = [[] for _ in range(xblocks * yblocks)] blocks_extra = [[] for _ in range(xblocks * yblocks)] for xi in range(blockmax[0] + 1): for yj in range(blockmax[1] + 1): if sum( abs([xi, yj, 0] - blocks[min((blockmax[1] + 1) * xi + yj, blocks.shape[0] - 1)])): #print([xi, yj, 0]) blocks = np.insert(blocks, (blockmax[1] + 1) * xi + yj, [xi, yj, 0], 0) block_point_indices.insert( (blockmax[1] + 1) * xi + yj, []) #print(len(block_point_indices)) for xi in range(blockmax[0] + 1): for yj in range(blockmax[1] + 1): subid = (blockmax[1] + 1) * xi + yj bid = yblocks * int(xi / 5) + int(yj / 5) blocks_base[bid].append(block_point_indices[subid]) if ((xi % 5) == 0) & (xi > 0): eid = yblocks * int((xi - 1) / 5) + int(yj / 5) blocks_extra[eid].append( block_point_indices[subid]) elif ((xi % 5) == 4) & (xi < blocks[-1][0]): eid = yblocks * int((xi + 1) / 5) + int(yj / 5) blocks_extra[eid].append( block_point_indices[subid]) if ((yj % 5) == 0) & (yj > 0): eid = yblocks * int(xi / 5) + int((yj - 1) / 5) blocks_extra[eid].append( block_point_indices[subid]) elif ((yj % 5) == 4) & (yj < blocks[-1][1]): eid = yblocks * int(xi / 5) + int((yj + 1) / 5) blocks_extra[eid].append( block_point_indices[subid]) blocks_base = [np.concatenate(ind) for ind in blocks_base] blocks_extra = [np.concatenate(ind) for ind in blocks_extra] block_to_block_idx_map = dict() for block_idx in range(xblocks * yblocks): block = (block_idx // yblocks, block_idx % yblocks) block_to_block_idx_map[(block[0], block[1])] = block_idx # merge small blocks into one of their big neighbors block_point_count_threshold = max_point_num / 10 nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1), (1, 1), (1, -1), (-1, -1)] block_merge_count = 0 for block_idx in range(xblocks * yblocks): if len(blocks_base[block_idx] ) >= block_point_count_threshold: continue block = (blocks[block_idx][0], blocks[block_idx][1]) for x, y in nbr_block_offsets: nbr_block = (block[0] + x, block[1] + y) if nbr_block not in block_to_block_idx_map: continue nbr_block_idx = block_to_block_idx_map[nbr_block] if len(blocks_base[nbr_block_idx] ) < block_point_count_threshold: continue blocks_base[nbr_block_idx] = np.concatenate([ blocks_base[nbr_block_idx], blocks_base[block_idx] ], axis=-1) blocks_base[block_idx] = np.array([], dtype=np.int) blocks_extra[nbr_block_idx] = np.concatenate([ blocks_extra[nbr_block_idx], blocks_extra[block_idx] ], axis=-1) blocks_extra[block_idx] = np.array([], dtype=np.int) blocks_extra[nbr_block_idx] = np.array( list( set(blocks_extra[nbr_block_idx]).difference( set(blocks_base[nbr_block_idx])))) block_merge_count = block_merge_count + 1 break print('{}-{} of {} blocks are merged.'.format( datetime.now(), block_merge_count, blocks.shape[0])) idx_last_non_empty_block = 0 for block_idx in reversed(range(xblocks * yblocks)): if blocks_base[block_idx].shape[0] != 0: idx_last_non_empty_block = block_idx break blocks_base = [ind.astype(np.int) for ind in blocks_base] blocks_extra = [ind.astype(np.int) for ind in blocks_extra] for block_idx in range(idx_last_non_empty_block + 1): point_indices = blocks_base[block_idx] point_indices_extra = blocks_extra[block_idx] if point_indices.shape[0] == 0: continue block_point_num = point_indices.shape[0] block_split_num = int( math.ceil(block_point_num * 1.0 / max_point_num)) point_num_avg = int( math.ceil(block_point_num * 1.0 / block_split_num)) point_nums = [point_num_avg] * block_split_num point_nums[-1] = block_point_num - (point_num_avg * (block_split_num - 1)) starts = [0] + list(np.cumsum(point_nums)) np.random.shuffle(point_indices) block_points = xyz[point_indices] block_rgb = rgb[point_indices] block_dir = vec[point_indices] block_labels = labels[point_indices] block_xzyrgb = np.concatenate([block_points, block_rgb], axis=-1) np.random.shuffle(point_indices_extra) block_points_extra = xyz[point_indices_extra] block_rgb_extra = rgb[point_indices_extra] block_dir_extra = vec[point_indices_extra] block_labels_extra = labels[point_indices_extra] block_xzyrgb_extra = np.concatenate( [block_points_extra, block_rgb_extra], axis=-1) for block_split_idx in range(block_split_num): start = starts[block_split_idx] point_num = point_nums[block_split_idx] end = start + point_num idx_in_batch = idx % batch_size data[idx_in_batch, 0:point_num, ...] = block_xzyrgb[start:end, :] direction[idx_in_batch, 0:point_num, ...] = block_dir[start:end, :] data_num[idx_in_batch] = point_num label[idx_in_batch] = dataset_idx # won't be used... label_seg[idx_in_batch, 0:point_num] = block_labels[start:end] indices_split_to_full[ idx_in_batch, 0:point_num] = point_indices[start:end] point_num_extra = int(point_num * 2.0 / 4) indice = np.random.choice( len(point_indices_extra), point_num_extra, replace=len(point_indices_extra) < point_num_extra) data_extra[idx_in_batch, 0:point_num_extra, ...] = block_xzyrgb_extra[indice, :] direction_extra[idx_in_batch, 0:point_num_extra, ...] = block_dir_extra[indice, :] data_num_extra[idx_in_batch] = point_num_extra label_seg_extra[ idx_in_batch, 0:point_num_extra] = block_labels_extra[indice] if ((idx + 1) % batch_size == 0) or \ (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1): item_num = idx_in_batch + 1 filename_h5 = os.path.join( folder, dataset, '%s_%d.h5' % (offset_name, idx_h5)) print('{}-Saving {}...'.format( datetime.now(), filename_h5)) file = h5py.File(filename_h5, 'w') file.create_dataset('data', data=data[0:item_num, ...]) file.create_dataset('data_num', data=data_num[0:item_num, ...]) file.create_dataset('direction', data=direction[0:item_num, ...]) file.create_dataset('label', data=label[0:item_num, ...]) file.create_dataset('label_seg', data=label_seg[0:item_num, ...]) file.create_dataset( 'indices_split_to_full', data=indices_split_to_full[0:item_num, ...]) file.create_dataset('data_extra', data=data_extra[0:item_num, ...]) file.create_dataset('data_num_extra', data=data_num_extra[0:item_num, ...]) file.create_dataset( 'direction_extra', data=direction_extra[0:item_num, ...]) file.create_dataset( 'label_seg_extra', data=label_seg_extra[0:item_num, ...]) file.close() if args.save_ply: print('{}-Saving ply of {}...'.format( datetime.now(), filename_h5)) filepath_label_ply = os.path.join( folder, dataset, 'ply_label', 'label_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_property_batch( data[0:item_num, :, 0:3], label_seg[0:item_num, ...], filepath_label_ply, data_num[0:item_num, ...], 14) filepath_rgb_ply = os.path.join( folder, dataset, 'ply_rgb', 'rgb_%s_%d' % (offset_name, idx_h5)) data_utils.save_ply_color_batch( data[0:item_num, :, 0:3], (data[0:item_num, :, 3:] + 0.5) * 255, filepath_rgb_ply, data_num[0:item_num, ...]) idx_h5 = idx_h5 + 1 idx = idx + 1 # Marker indicating we've processed this dataset open(dataset_marker, "w").close() print(mergenum)