Exemple #1
0
def export_ply_blocks(batched_data, data_num, filepath_pred, labels, setting):
    folder = os.path.join(os.path.dirname(filepath_pred), 'PLY')
    filename = os.path.splitext(os.path.basename(filepath_pred))[0]
    filepath_label_ply = os.path.join(folder, filename)
    data_utils.save_ply_property_batch(batched_data[:, :, 0:3], labels[...],
                                       filepath_label_ply, data_num[...],
                                       setting.num_class)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder', '-f', help='Path to data folder')
    parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true')
    args = parser.parse_args()
    print(args)

    root = args.folder if args.folder else '../../data/shapenet_partseg/'
    folders = [(root + 'train_data', root + 'train_label'),
               (root + 'val_data', root + 'val_label'),
               (root + 'test_data', root + 'test_label')]
    category_label_seg_max_dict = dict()
    max_point_num = 0
    label_seg_min = sys.maxsize
    for data_folder, label_folder in folders:
        if not os.path.exists(data_folder):
            continue
        for category in sorted(os.listdir(data_folder)):
            if category not in category_label_seg_max_dict:
                category_label_seg_max_dict[category] = 0
            data_category_folder = os.path.join(data_folder, category)
            category_label_seg_max = 0
            for filename in sorted(os.listdir(data_category_folder)):
                data_filepath = os.path.join(data_category_folder, filename)
                coordinates = [xyz for xyz in open(data_filepath, 'r') if len(xyz.split(' ')) == 3]
                max_point_num = max(max_point_num, len(coordinates))

                if label_folder is not None:
                    label_filepath = os.path.join(label_folder, category, filename[0:-3] + 'seg')
                    label_seg_this = np.loadtxt(label_filepath).astype(np.int32)
                    assert (len(coordinates) == len(label_seg_this))
                    category_label_seg_max = max(category_label_seg_max, max(label_seg_this))
                    label_seg_min = min(label_seg_min, min(label_seg_this))
            category_label_seg_max_dict[category] = max(category_label_seg_max_dict[category], category_label_seg_max)
    category_label_seg_max_list = [(key, category_label_seg_max_dict[key]) for key in
                                   sorted(category_label_seg_max_dict.keys())]

    category_label = dict()
    offset = 0
    category_offset = dict()
    label_seg_max = max([category_label_seg_max for _, category_label_seg_max in category_label_seg_max_list])
    with open(os.path.join(root, 'categories.txt'), 'w') as file_categories:
        for idx, (category, category_label_seg_max) in enumerate(category_label_seg_max_list):
            file_categories.write('%s %d\n' % (category, category_label_seg_max - label_seg_min + 1))
            category_label[category] = idx
            category_offset[category] = offset
            offset = offset + category_label_seg_max - label_seg_min + 1

    print('part_num:', offset)
    print('max_point_num:', max_point_num)
    print(category_label_seg_max_list)

    batch_size = 2048
    data = np.zeros((batch_size, max_point_num, 3))
    data_num = np.zeros((batch_size), dtype=np.int32)
    label = np.zeros((batch_size), dtype=np.int32)
    label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32)
    for data_folder, label_folder in folders:
        if not os.path.exists(data_folder):
            continue
        data_folder_ply = data_folder + '_ply'
        file_num = 0
        for category in sorted(os.listdir(data_folder)):
            data_category_folder = os.path.join(data_folder, category)
            file_num = file_num + len(os.listdir(data_category_folder))
        idx_h5 = 0
        idx = 0

        save_path = '%s/%s' % (os.path.dirname(data_folder), os.path.basename(data_folder)[0:-5])
        filename_txt = '%s_files.txt' % (save_path)
        ply_filepath_list = []
        with open(filename_txt, 'w') as filelist:
            for category in sorted(os.listdir(data_folder)):
                data_category_folder = os.path.join(data_folder, category)
                for filename in sorted(os.listdir(data_category_folder)):
                    data_filepath = os.path.join(data_category_folder, filename)
                    coordinates = [[float(value) for value in xyz.split(' ')]
                                   for xyz in open(data_filepath, 'r') if len(xyz.split(' ')) == 3]
                    idx_in_batch = idx % batch_size
                    data[idx_in_batch, 0:len(coordinates), ...] = np.array(coordinates)
                    data_num[idx_in_batch] = len(coordinates)
                    label[idx_in_batch] = category_label[category]

                    if label_folder is not None:
                        label_filepath = os.path.join(label_folder, category, filename[0:-3] + 'seg')
                        label_seg_this = np.loadtxt(label_filepath).astype(np.int32) - label_seg_min
                        assert (len(coordinates) == label_seg_this.shape[0])
                        label_seg[idx_in_batch, 0:len(coordinates)] = label_seg_this + category_offset[category]

                    data_ply_filepath = os.path.join(data_folder_ply, category, filename[:-3] + 'ply')
                    ply_filepath_list.append(data_ply_filepath)

                    if ((idx + 1) % batch_size == 0) or idx == file_num - 1:
                        item_num = idx_in_batch + 1
                        filename_h5 = '%s_%d.h5' % (save_path, idx_h5)
                        print('{}-Saving {}...'.format(datetime.now(), filename_h5))
                        filelist.write('./%s_%d.h5\n' % (os.path.basename(data_folder)[0:-5], idx_h5))

                        file = h5py.File(filename_h5, 'w')
                        file.create_dataset('data', data=data[0:item_num, ...])
                        file.create_dataset('data_num', data=data_num[0:item_num, ...])
                        file.create_dataset('label', data=label[0:item_num, ...])
                        file.create_dataset('label_seg', data=label_seg[0:item_num, ...])
                        file.close()

                        if args.save_ply:
                            data_utils.save_ply_property_batch(data[0:item_num, ...], label_seg[0:item_num, ...],
                                                               ply_filepath_list, data_num[0:item_num, ...],
                                                               label_seg_max - label_seg_min)
                        ply_filepath_list = []
                        idx_h5 = idx_h5 + 1
                    idx = idx + 1
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder', '-f', help='Path to data folder')
    parser.add_argument('--max_point_num',
                        '-m',
                        help='Max point number of each sample',
                        type=int,
                        default=8192)
    parser.add_argument('--block_size',
                        '-b',
                        help='Block size',
                        type=float,
                        default=1.5)
    parser.add_argument('--grid_size',
                        '-g',
                        help='Grid size',
                        type=float,
                        default=0.03)
    parser.add_argument('--save_ply',
                        '-s',
                        help='Convert .pts to .ply',
                        action='store_true')

    args = parser.parse_args()
    print(args)

    root = args.folder if args.folder else os.path.join(
        DATA_DIR, "S3DIS", "prepare_label_rgb")
    max_point_num = args.max_point_num

    batch_size = 2048
    data = np.zeros((batch_size, max_point_num, 6))
    data_num = np.zeros((batch_size), dtype=np.int32)
    label = np.zeros((batch_size), dtype=np.int32)
    label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32)
    indices_split_to_full = np.zeros((batch_size, max_point_num),
                                     dtype=np.int32)

    for area_idx in range(1, 7):
        folder = os.path.join(root, 'Area_%d' % area_idx)
        datasets = [dataset for dataset in os.listdir(folder)]
        for dataset_idx, dataset in enumerate(datasets):
            filename_data = os.path.join(folder, dataset, 'xyzrgb.npy')
            print('{}-Loading {}...'.format(datetime.now(), filename_data))
            xyzrgb = np.load(filename_data)

            filename_labels = os.path.join(folder, dataset, 'label.npy')
            print('{}-Loading {}...'.format(datetime.now(), filename_labels))
            labels = np.load(filename_labels).astype(int).flatten()

            xyz, rgb = np.split(xyzrgb, [3], axis=-1)
            xyz_min = np.amin(xyz, axis=0, keepdims=True)
            xyz_max = np.amax(xyz, axis=0, keepdims=True)
            xyz_center = (xyz_min + xyz_max) / 2
            xyz_center[0][-1] = xyz_min[0][-1]
            xyz = xyz - xyz_center  # align to room bottom center
            rgb = rgb / 255 - 0.5

            offsets = [('zero', 0.0), ('half', args.block_size / 2)]
            for offset_name, offset in offsets:
                idx_h5 = 0
                idx = 0

                print('{}-Computing block id of {} points...'.format(
                    datetime.now(), xyzrgb.shape[0]))
                xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset
                xyz_max = np.amax(xyz, axis=0, keepdims=True)
                block_size = (args.block_size, args.block_size,
                              2 * (xyz_max[0, -1] - xyz_min[0, -1]))
                xyz_blocks = np.floor(
                    (xyz - xyz_min) / block_size).astype(np.int)

                print('{}-Collecting points belong to each block...'.format(
                    datetime.now(), xyzrgb.shape[0]))
                blocks, point_block_indices, block_point_counts = np.unique(
                    xyz_blocks,
                    return_inverse=True,
                    return_counts=True,
                    axis=0)
                block_point_indices = np.split(
                    np.argsort(point_block_indices),
                    np.cumsum(block_point_counts[:-1]))
                print('{}-{} is split into {} blocks.'.format(
                    datetime.now(), dataset, blocks.shape[0]))

                block_to_block_idx_map = dict()
                for block_idx in range(blocks.shape[0]):
                    block = (blocks[block_idx][0], blocks[block_idx][1])
                    block_to_block_idx_map[(block[0], block[1])] = block_idx

                # merge small blocks into one of their big neighbors
                block_point_count_threshold = max_point_num / 10
                nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1),
                                     (1, 1), (1, -1), (-1, -1)]
                block_merge_count = 0
                for block_idx in range(blocks.shape[0]):
                    if block_point_counts[
                            block_idx] >= block_point_count_threshold:
                        continue

                    block = (blocks[block_idx][0], blocks[block_idx][1])
                    for x, y in nbr_block_offsets:
                        nbr_block = (block[0] + x, block[1] + y)
                        if nbr_block not in block_to_block_idx_map:
                            continue

                        nbr_block_idx = block_to_block_idx_map[nbr_block]
                        if block_point_counts[
                                nbr_block_idx] < block_point_count_threshold:
                            continue

                        block_point_indices[nbr_block_idx] = np.concatenate(
                            [
                                block_point_indices[nbr_block_idx],
                                block_point_indices[block_idx]
                            ],
                            axis=-1)
                        block_point_indices[block_idx] = np.array([],
                                                                  dtype=np.int)
                        block_merge_count = block_merge_count + 1
                        break
                print('{}-{} of {} blocks are merged.'.format(
                    datetime.now(), block_merge_count, blocks.shape[0]))

                idx_last_non_empty_block = 0
                for block_idx in reversed(range(blocks.shape[0])):
                    if block_point_indices[block_idx].shape[0] != 0:
                        idx_last_non_empty_block = block_idx
                        break

                # uniformly sample each block
                for block_idx in range(idx_last_non_empty_block + 1):
                    point_indices = block_point_indices[block_idx]
                    if point_indices.shape[0] == 0:
                        continue
                    block_points = xyz[point_indices]
                    block_min = np.amin(block_points, axis=0, keepdims=True)
                    xyz_grids = np.floor((block_points - block_min) /
                                         args.grid_size).astype(np.int)
                    grids, point_grid_indices, grid_point_counts = np.unique(
                        xyz_grids,
                        return_inverse=True,
                        return_counts=True,
                        axis=0)
                    grid_point_indices = np.split(
                        np.argsort(point_grid_indices),
                        np.cumsum(grid_point_counts[:-1]))
                    grid_point_count_avg = int(np.average(grid_point_counts))
                    point_indices_repeated = []
                    for grid_idx in range(grids.shape[0]):
                        point_indices_in_block = grid_point_indices[grid_idx]
                        repeat_num = math.ceil(grid_point_count_avg /
                                               point_indices_in_block.shape[0])
                        if repeat_num > 1:
                            point_indices_in_block = np.repeat(
                                point_indices_in_block, repeat_num)
                            np.random.shuffle(point_indices_in_block)
                            point_indices_in_block = point_indices_in_block[:
                                                                            grid_point_count_avg]
                        point_indices_repeated.extend(
                            list(point_indices[point_indices_in_block]))
                    block_point_indices[block_idx] = np.array(
                        point_indices_repeated)
                    block_point_counts[block_idx] = len(point_indices_repeated)

                for block_idx in range(idx_last_non_empty_block + 1):
                    point_indices = block_point_indices[block_idx]
                    if point_indices.shape[0] == 0:
                        continue

                    block_point_num = point_indices.shape[0]
                    block_split_num = int(
                        math.ceil(block_point_num * 1.0 / max_point_num))
                    point_num_avg = int(
                        math.ceil(block_point_num * 1.0 / block_split_num))
                    point_nums = [point_num_avg] * block_split_num
                    point_nums[-1] = block_point_num - (point_num_avg *
                                                        (block_split_num - 1))
                    starts = [0] + list(np.cumsum(point_nums))

                    np.random.shuffle(point_indices)
                    block_points = xyz[point_indices]
                    block_rgb = rgb[point_indices]
                    block_labels = labels[point_indices]
                    x, y, z = np.split(block_points, (1, 2), axis=-1)
                    block_xzyrgb = np.concatenate([x, z, y, block_rgb],
                                                  axis=-1)

                    for block_split_idx in range(block_split_num):
                        start = starts[block_split_idx]
                        point_num = point_nums[block_split_idx]
                        end = start + point_num
                        idx_in_batch = idx % batch_size
                        data[idx_in_batch, 0:point_num,
                             ...] = block_xzyrgb[start:end, :]
                        data_num[idx_in_batch] = point_num
                        label[idx_in_batch] = dataset_idx  # won't be used...
                        label_seg[idx_in_batch,
                                  0:point_num] = block_labels[start:end]
                        indices_split_to_full[
                            idx_in_batch,
                            0:point_num] = point_indices[start:end]

                        if ((idx + 1) % batch_size == 0) or \
                                (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1):
                            item_num = idx_in_batch + 1
                            filename_h5 = os.path.join(
                                folder, dataset,
                                '%s_%d.h5' % (offset_name, idx_h5))
                            print('{}-Saving {}...'.format(
                                datetime.now(), filename_h5))

                            file = h5py.File(filename_h5, 'w')
                            file.create_dataset('data',
                                                data=data[0:item_num, ...])
                            file.create_dataset('data_num',
                                                data=data_num[0:item_num, ...])
                            file.create_dataset('label',
                                                data=label[0:item_num, ...])
                            file.create_dataset('label_seg',
                                                data=label_seg[0:item_num,
                                                               ...])
                            file.create_dataset(
                                'indices_split_to_full',
                                data=indices_split_to_full[0:item_num, ...])
                            file.close()

                            if args.save_ply:
                                print('{}-Saving ply of {}...'.format(
                                    datetime.now(), filename_h5))
                                filepath_label_ply = os.path.join(
                                    folder, dataset, 'ply_label',
                                    'label_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_property_batch(
                                    data[0:item_num, :,
                                         0:3], label_seg[0:item_num, ...],
                                    filepath_label_ply, data_num[0:item_num,
                                                                 ...], 14)

                                filepath_rgb_ply = os.path.join(
                                    folder, dataset, 'ply_rgb',
                                    'rgb_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_color_batch(
                                    data[0:item_num, :, 0:3],
                                    (data[0:item_num, :, 3:] + 0.5) * 255,
                                    filepath_rgb_ply, data_num[0:item_num,
                                                               ...])

                            idx_h5 = idx_h5 + 1
                        idx = idx + 1
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder', '-f', help='Path to data folder')
    parser.add_argument('--max_point_num',
                        '-m',
                        help='Max point number of each sample',
                        type=int,
                        default=8192)
    parser.add_argument('--block_size',
                        '-b',
                        help='Block size',
                        type=float,
                        default=1.5)
    parser.add_argument('--grid_size',
                        '-g',
                        help='Grid size',
                        type=float,
                        default=0.03)
    parser.add_argument('--save_ply',
                        '-s',
                        help='Convert .pts to .ply',
                        action='store_true')

    args = parser.parse_args()
    print(args)

    root = args.folder if args.folder else '../../data/scannet/seg'
    max_point_num = args.max_point_num

    batch_size = 2048
    data = np.zeros((batch_size, max_point_num, 3))
    data_num = np.zeros((batch_size), dtype=np.int32)
    label = np.zeros((batch_size), dtype=np.int32)
    label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32)
    indices_split_to_full = np.zeros((batch_size, max_point_num, 2),
                                     dtype=np.int32)

    filenames = list(Path(root).glob('*seg*.hdf5'))

    t = tqdm(filenames)
    for filename in t:
        t.set_description(filename.stem)
        # print('{}-Loading {}...'.format(datetime.now(), filename))

        with h5py.File(filename, 'r') as h5f:
            points = np.array(h5f['data'])
            xyz = points[:, :, [9, 11, 10]].reshape(-1, 3)
            labels = np.array(h5f['label']).ravel()

        offsets = [('zero', 0.0), ('half', args.block_size / 2)]
        for offset_name, offset in offsets:
            idx_h5 = 0
            idx = 0

            # align to room bottom center
            xyz_min = np.amin(xyz, axis=0, keepdims=True)
            xyz_max = np.amax(xyz, axis=0, keepdims=True)
            xyz_center = (xyz_min + xyz_max) / 2
            xyz_center[0][-1] = xyz_min[0][-1]
            xyz = xyz - xyz_center

            # print('{}-Computing block id of {} points...'.format(datetime.now(), xyz.shape[0]))
            xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset
            xyz_max = np.amax(xyz, axis=0, keepdims=True)
            block_size = (args.block_size, args.block_size,
                          2 * (xyz_max[0, -1] - xyz_min[0, -1]))
            xyz_blocks = np.floor((xyz - xyz_min) / block_size).astype(int)

            # print('{}-Collecting points belong to each block...'.format(datetime.now(), xyz.shape[0]))
            blocks, point_block_indices, block_point_counts = np.unique(
                xyz_blocks, return_inverse=True, return_counts=True, axis=0)
            block_point_indices = np.split(np.argsort(point_block_indices),
                                           np.cumsum(block_point_counts[:-1]))
            # print('{}-{} is split into {} blocks.'.format(datetime.now(), os.path.basename(filename), blocks.shape[0]))

            block_to_block_idx_map = dict()
            for block_idx in range(blocks.shape[0]):
                block = (blocks[block_idx][0], blocks[block_idx][1])
                block_to_block_idx_map[(block[0], block[1])] = block_idx

            # merge small blocks into one of their big neighbors
            block_point_count_threshold = max_point_num / 10
            nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1),
                                 (1, 1), (1, -1), (-1, -1)]
            block_merge_count = 0
            for block_idx in range(blocks.shape[0]):
                if block_point_counts[block_idx] >= block_point_count_threshold:
                    continue

                block = (blocks[block_idx][0], blocks[block_idx][1])
                for x, y in nbr_block_offsets:
                    nbr_block = (block[0] + x, block[1] + y)
                    if nbr_block not in block_to_block_idx_map:
                        continue

                    nbr_block_idx = block_to_block_idx_map[nbr_block]
                    if block_point_counts[
                            nbr_block_idx] < block_point_count_threshold:
                        continue

                    block_point_indices[nbr_block_idx] = np.concatenate(
                        [
                            block_point_indices[nbr_block_idx],
                            block_point_indices[block_idx]
                        ],
                        axis=-1)
                    block_point_indices[block_idx] = np.array([], dtype=np.int)
                    block_merge_count = block_merge_count + 1
                    break
            # print('{}-{} of {} blocks got merged.'.format(datetime.now(), block_merge_count, blocks.shape[0]))

            idx_last_non_empty_block = 0
            for block_idx in reversed(range(blocks.shape[0])):
                if block_point_indices[block_idx].shape[0] != 0:
                    idx_last_non_empty_block = block_idx
                    break

            # uniformly sample each block
            for block_idx in range(idx_last_non_empty_block + 1):
                point_indices = block_point_indices[block_idx]
                if point_indices.shape[0] == 0:
                    continue
                block_points = xyz[point_indices]
                block_min = np.amin(block_points, axis=0, keepdims=True)
                xyz_grids = np.floor(
                    (block_points - block_min) / args.grid_size).astype(np.int)
                grids, point_grid_indices, grid_point_counts = np.unique(
                    xyz_grids, return_inverse=True, return_counts=True, axis=0)
                grid_point_indices = np.split(
                    np.argsort(point_grid_indices),
                    np.cumsum(grid_point_counts[:-1]))
                grid_point_count_avg = int(np.average(grid_point_counts))
                point_indices_repeated = []
                for grid_idx in range(grids.shape[0]):
                    point_indices_in_block = grid_point_indices[grid_idx]
                    repeat_num = math.ceil(grid_point_count_avg /
                                           point_indices_in_block.shape[0])
                    if repeat_num > 1:
                        point_indices_in_block = np.repeat(
                            point_indices_in_block, repeat_num)
                        np.random.shuffle(point_indices_in_block)
                        point_indices_in_block = point_indices_in_block[:
                                                                        grid_point_count_avg]
                    point_indices_repeated.extend(
                        list(point_indices[point_indices_in_block]))
                block_point_indices[block_idx] = np.array(
                    point_indices_repeated)
                block_point_counts[block_idx] = len(point_indices_repeated)

            for block_idx in range(idx_last_non_empty_block + 1):
                point_indices = block_point_indices[block_idx]
                if point_indices.shape[0] == 0:
                    continue

                block_point_num = point_indices.shape[0]
                block_split_num = int(
                    math.ceil(block_point_num / max_point_num))
                point_num_avg = math.ceil(block_point_num / block_split_num)
                point_nums = [point_num_avg] * block_split_num
                point_nums[-1] = block_point_num - (point_num_avg *
                                                    (block_split_num - 1))
                # starts = [0] + list(np.cumsum(point_nums))

                np.random.shuffle(point_indices)
                block_points = xyz[point_indices]
                block_labels = labels[point_indices]
                x, y, z = np.split(block_points, (1, 2), axis=-1)
                block_xzy = np.concatenate([x, z, y], axis=-1)

                # for block_split_idx in range(block_split_num):
                block_split_idx = 0
                start = 0
                point_num = point_nums[block_split_idx]
                end = start + point_num
                idx_in_batch = idx % batch_size
                data[idx_in_batch, 0:point_num, ...] = block_xzy[start:end, :]
                data_num[idx_in_batch] = point_num
                label[idx_in_batch] = 0  # won't be used...
                label_seg[idx_in_batch, 0:point_num] = block_labels[start:end]

                ind_in_room = point_indices[start:end]
                indices_split_to_full[idx_in_batch, 0:point_num] = np.stack(
                    [np.zeros_like(ind_in_room), ind_in_room], -1)

                if ((idx + 1) % batch_size == 0) \
                        or (block_idx == idx_last_non_empty_block):
                    # and block_split_idx == block_split_num - 1):
                    item_num = idx_in_batch + 1
                    filename_h5 = os.path.join(
                        root, '..', 'subsampled_8192',
                        '%s_%s_%d.h5' % (filename.stem, offset_name, idx_h5))
                    os.makedirs(os.path.dirname(filename_h5), exist_ok=True)
                    # print('{}-Saving {}...'.format(datetime.now(), filename_h5))

                    file = h5py.File(filename_h5, 'w')
                    file.create_dataset('data', data=data[0:item_num, ...])
                    file.create_dataset('data_num',
                                        data=data_num[0:item_num, ...])
                    file.create_dataset('label', data=label[0:item_num, ...])
                    file.create_dataset('label_seg',
                                        data=label_seg[0:item_num, ...])
                    file.create_dataset('indices_split_to_full',
                                        data=indices_split_to_full[0:item_num,
                                                                   ...])
                    file.close()

                    if args.save_ply:
                        tqdm.write('{}-Saving ply of {}...'.format(
                            datetime.now(), filename_h5))
                        filepath_label_ply = os.path.join(
                            root, '..', 'subsampled_8192', 'ply_label',
                            'label_%s_%d' % (offset_name, idx_h5))
                        data_utils.save_ply_property_batch(
                            data[0:item_num, :, 0:3], label_seg[0:item_num,
                                                                ...],
                            filepath_label_ply, data_num[0:item_num, ...], 22)

                    idx_h5 = idx_h5 + 1
                idx = idx + 1
Exemple #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder', '-f', help='Path to data folder')
    parser.add_argument('--max_point_num',
                        '-m',
                        help='Max point number of each sample',
                        type=int,
                        default=8192)
    parser.add_argument('--block_size',
                        '-b',
                        help='Block size',
                        type=float,
                        default=1.5)
    parser.add_argument('--grid_size',
                        '-g',
                        help='Grid size',
                        type=float,
                        default=0.03)
    parser.add_argument('--save_ply',
                        '-s',
                        help='Convert .pts to .ply',
                        action='store_true')

    args = parser.parse_args()
    print(args)

    root = args.folder if args.folder else '../../data/scannet/seg'
    max_point_num = args.max_point_num

    batch_size = 2048
    data = np.zeros((batch_size, max_point_num, 6))
    data_num = np.zeros((batch_size), dtype=np.int32)
    label = np.zeros((batch_size), dtype=np.int32)
    label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32)
    indices_split_to_full = np.zeros((batch_size, max_point_num, 2),
                                     dtype=np.int32)

    datasets = ['train', 'val']
    for dataset_idx, dataset in enumerate(datasets):
        filename = os.path.abspath(
            os.path.join(root, 'scannet_%s.pickle' % dataset))

        print('{}-Loading {}...'.format(datetime.now(), filename))
        file_pickle = open(filename, 'rb')
        xyz_all = pickle.load(file_pickle, encoding='latin1')
        labels_all = pickle.load(file_pickle, encoding='latin1')
        file_pickle.close()

        offsets = [('zero', 0.0), ('half', args.block_size / 2)]
        for offset_name, offset in offsets:
            idx_h5 = 0
            idx = 0

            for room_idx, pts in enumerate(xyz_all):
                # align to room bottom center
                xyz = pts[:, 0:3]
                feature = pts[:, 3:]

                xyz_min = np.amin(xyz, axis=0, keepdims=True)
                xyz_max = np.amax(xyz, axis=0, keepdims=True)
                xyz_center = (xyz_min + xyz_max) / 2
                xyz_center[0][-1] = xyz_min[0][-1]
                xyz = xyz - xyz_center

                labels = labels_all[room_idx]
                print('{}-Computing block id of {} points...'.format(
                    datetime.now(), xyz.shape[0]))
                xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset
                xyz_max = np.amax(xyz, axis=0, keepdims=True)
                block_size = (args.block_size, args.block_size,
                              2 * (xyz_max[0, -1] - xyz_min[0, -1]))
                xyz_blocks = np.floor(
                    (xyz - xyz_min) / block_size).astype(np.int)

                print('{}-Collecting points belong to each block...'.format(
                    datetime.now(), xyz.shape[0]))
                blocks, point_block_indices, block_point_counts = np.unique(
                    xyz_blocks,
                    return_inverse=True,
                    return_counts=True,
                    axis=0)
                block_point_indices = np.split(
                    np.argsort(point_block_indices),
                    np.cumsum(block_point_counts[:-1]))  # 返回值为 block 点索引值的集合
                print('{}-{} is split into {} blocks.'.format(
                    datetime.now(), dataset, blocks.shape[0]))

                block_to_block_idx_map = dict()
                for block_idx in range(blocks.shape[0]):
                    block = (blocks[block_idx][0], blocks[block_idx][1])
                    block_to_block_idx_map[(block[0], block[1])] = block_idx

                # merge small blocks into one of their big neighbors
                block_point_count_threshold = max_point_num / 10
                nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1),
                                     (1, 1), (1, -1), (-1, -1)]
                block_merge_count = 0
                for block_idx in range(blocks.shape[0]):
                    if block_point_counts[
                            block_idx] >= block_point_count_threshold:
                        continue

                    block = (blocks[block_idx][0], blocks[block_idx][1])
                    for x, y in nbr_block_offsets:
                        nbr_block = (block[0] + x, block[1] + y)
                        if nbr_block not in block_to_block_idx_map:
                            continue

                        nbr_block_idx = block_to_block_idx_map[nbr_block]
                        if block_point_counts[
                                nbr_block_idx] < block_point_count_threshold:
                            continue

                        block_point_indices[nbr_block_idx] = np.concatenate(
                            [
                                block_point_indices[nbr_block_idx],
                                block_point_indices[block_idx]
                            ],
                            axis=-1)
                        block_point_indices[block_idx] = np.array([],
                                                                  dtype=np.int)
                        block_merge_count = block_merge_count + 1
                        break
                print('{}-{} of {} blocks are merged.'.format(
                    datetime.now(), block_merge_count, blocks.shape[0]))

                idx_last_non_empty_block = 0
                for block_idx in reversed(range(blocks.shape[0])):
                    if block_point_indices[block_idx].shape[0] != 0:
                        idx_last_non_empty_block = block_idx
                        break

                # uniformly sample each block
                # 将 block 细分为 grid,得到 grid 的平均值
                # 不足平均值的通过repeat之后 shuffle 得到平均值个点
                # 超过平均值的全部放入

                for block_idx in range(idx_last_non_empty_block + 1):
                    point_indices = block_point_indices[block_idx]
                    if point_indices.shape[0] == 0:
                        continue
                    block_points = xyz[point_indices]
                    block_points_feature = feature[point_indices]

                    block_min = np.amin(block_points, axis=0, keepdims=True)
                    xyz_grids = np.floor((block_points - block_min) /
                                         args.grid_size).astype(np.int)
                    grids, point_grid_indices, grid_point_counts = np.unique(
                        xyz_grids,
                        return_inverse=True,
                        return_counts=True,
                        axis=0)
                    grid_point_indices = np.split(
                        np.argsort(point_grid_indices),
                        np.cumsum(grid_point_counts[:-1]))
                    grid_point_count_avg = int(np.average(grid_point_counts))
                    point_indices_repeated = []
                    for grid_idx in range(grids.shape[0]):
                        point_indices_in_block = grid_point_indices[grid_idx]
                        repeat_num = math.ceil(grid_point_count_avg /
                                               point_indices_in_block.shape[0])
                        if repeat_num > 1:
                            point_indices_in_block = np.repeat(
                                point_indices_in_block, repeat_num)
                            np.random.shuffle(point_indices_in_block)
                            point_indices_in_block = point_indices_in_block[:
                                                                            grid_point_count_avg]
                        point_indices_repeated.extend(
                            list(point_indices[point_indices_in_block]))
                    block_point_indices[block_idx] = np.array(
                        point_indices_repeated)
                    block_point_counts[block_idx] = len(point_indices_repeated)

                for block_idx in range(idx_last_non_empty_block + 1):
                    point_indices = block_point_indices[block_idx]
                    if point_indices.shape[0] == 0:
                        continue
                    # 将 block 分为若干个 max_point_num 组成的集合
                    block_point_num = point_indices.shape[0]
                    block_split_num = int(
                        math.ceil(block_point_num * 1.0 / max_point_num))
                    point_num_avg = int(
                        math.ceil(block_point_num * 1.0 / block_split_num))
                    point_nums = [point_num_avg] * block_split_num
                    point_nums[-1] = block_point_num - (point_num_avg *
                                                        (block_split_num - 1))
                    starts = [0] + list(np.cumsum(point_nums))

                    np.random.shuffle(point_indices)

                    block_points = xyz[point_indices]
                    block_points_feature = feature[point_indices]

                    block_labels = labels[point_indices]
                    x, y, z = np.split(block_points, (1, 2), axis=-1)
                    r, g, b = np.split(block_points_feature, (1, 2), axis=-1)
                    block_xzy_feature = np.concatenate([x, z, y, r, g, b],
                                                       axis=-1)

                    for block_split_idx in range(block_split_num):
                        start = starts[block_split_idx]
                        point_num = point_nums[block_split_idx]
                        end = start + point_num
                        idx_in_batch = idx % batch_size
                        data[idx_in_batch, 0:point_num,
                             ...] = block_xzy_feature[start:end, :]
                        data_num[idx_in_batch] = point_num
                        label[idx_in_batch] = dataset_idx  # won't be used...
                        label_seg[idx_in_batch,
                                  0:point_num] = block_labels[start:end]

                        ind_in_room = point_indices[start:end]
                        indices_split_to_full[idx_in_batch,
                                              0:point_num] = np.stack([
                                                  np.zeros_like(ind_in_room) +
                                                  room_idx, ind_in_room
                                              ], -1)


                        if ((idx + 1) % batch_size == 0) \
                                or (room_idx == len(xyz_all) - 1
                                    and block_idx == idx_last_non_empty_block
                                    and block_split_idx == block_split_num - 1):
                            item_num = idx_in_batch + 1
                            filename_h5 = os.path.join(
                                root, dataset,
                                '%s_%d.h5' % (offset_name, idx_h5))
                            os.makedirs(os.path.dirname(filename_h5),
                                        exist_ok=True)
                            print('{}-Saving {}...'.format(
                                datetime.now(), filename_h5))

                            file = h5py.File(filename_h5, 'w')
                            file.create_dataset('data',
                                                data=data[0:item_num, ...])
                            file.create_dataset('data_num',
                                                data=data_num[0:item_num, ...])
                            file.create_dataset('label',
                                                data=label[0:item_num, ...])
                            file.create_dataset('label_seg',
                                                data=label_seg[0:item_num,
                                                               ...])
                            file.create_dataset(
                                'indices_split_to_full',
                                data=indices_split_to_full[0:item_num, ...])
                            file.close()

                            if args.save_ply:
                                print('{}-Saving ply of {}...'.format(
                                    datetime.now(), filename_h5))
                                filepath_label_ply = os.path.join(
                                    root, dataset, 'ply_label',
                                    'label_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_property_batch(
                                    data[0:item_num, :,
                                         0:3], label_seg[0:item_num, ...],
                                    filepath_label_ply, data_num[0:item_num,
                                                                 ...], 22)

                            idx_h5 = idx_h5 + 1
                        idx = idx + 1
Exemple #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder', '-f', help='Path to data folder')
    parser.add_argument('--save_ply',
                        '-s',
                        help='Convert .pts to .ply',
                        action='store_true')
    args = parser.parse_args()
    print(args)

    root = args.folder if args.folder else '../test/hinterstoisser_rgbseg'
    folders = [(os.path.join(root,
                             'train_data'), os.path.join(root, 'train_label')),
               (os.path.join(root,
                             'val_data'), os.path.join(root, 'val_label')),
               (os.path.join(root,
                             'test_data'), os.path.join(root, 'test_label'))]
    category_label_seg_max_dict = dict()
    max_point_num = 0
    label_seg_min = sys.maxsize
    for data_folder, label_folder in folders:
        if not os.path.exists(data_folder):
            continue
        for category in sorted(os.listdir(data_folder)):
            if category not in category_label_seg_max_dict:
                category_label_seg_max_dict[category] = 0
            data_category_folder = os.path.join(data_folder, category)
            category_label_seg_max = 0
            for filename in sorted(os.listdir(data_category_folder)):
                data_filepath = os.path.join(data_category_folder, filename)
                coordinates = [
                    xyz for xyz in open(data_filepath, 'r')
                    if len(xyz.split(' ')) == 6
                ]  # TODO: 修改通道
                max_point_num = max(max_point_num, len(coordinates))

                if label_folder is not None:
                    label_filepath = os.path.join(label_folder, category,
                                                  filename[0:-3] + 'seg')
                    print("{}", label_filepath)
                    label_seg_this = np.loadtxt(label_filepath).astype(
                        np.int32)
                    assert (len(coordinates) == len(label_seg_this))
                    category_label_seg_max = max(category_label_seg_max,
                                                 max(label_seg_this))
                    label_seg_min = min(label_seg_min, min(label_seg_this))
            category_label_seg_max_dict[category] = max(
                category_label_seg_max_dict[category], category_label_seg_max)
    category_label_seg_max_list = [
        (key, category_label_seg_max_dict[key])
        for key in sorted(category_label_seg_max_dict.keys())
    ]

    category_label = dict()
    offset = 0
    category_offset = dict()
    label_seg_max = max([
        category_label_seg_max
        for _, category_label_seg_max in category_label_seg_max_list
    ])
    with open(os.path.join(root, 'categories.txt'), 'w') as file_categories:
        for idx, (category, category_label_seg_max
                  ) in enumerate(category_label_seg_max_list):
            file_categories.write(
                '%s %d\n' %
                (category, category_label_seg_max - label_seg_min + 1))
            category_label[category] = idx
            category_offset[category] = offset
            offset = offset + category_label_seg_max - label_seg_min + 1

    print('part_num:', offset)
    print('max_point_num:', max_point_num)
    print(category_label_seg_max_list)

    batch_size = 2048
    # TODO:修改通道,3通道分配8g左右,6通道分配15g左右
    data = np.zeros((batch_size, max_point_num,
                     6))  # 初始化大小为batch_size*点云大小307200*3的数据(一次抓取2048张图像?)
    data_num = np.zeros((batch_size), dtype=np.int32)  # 初始化batch_size大小的数据
    label = np.zeros((batch_size), dtype=np.int32)  # 初始化batch_size大小的标签
    label_seg = np.zeros(
        (batch_size, max_point_num),
        dtype=np.int32)  # 初始化batch_size*电云大小的标签(即2048张图像中每个像素的标签)
    for data_folder, label_folder in folders:  # data和label文件夹
        if not os.path.exists(data_folder):
            continue
        data_folder_ply = data_folder + '_ply'
        file_num = 0  # data文件总数
        for category in sorted(
                os.listdir(data_folder)):  # data文件夹下的"01","02"等分类文件夹
            data_category_folder = os.path.join(data_folder, category)
            file_num = file_num + len(os.listdir(data_category_folder))
        idx_h5 = 0
        idx = 0

        save_path = '%s/%s' % (os.path.dirname(data_folder),
                               os.path.basename(data_folder)[0:-5])
        filename_txt = '%s_files.txt' % (save_path
                                         )  # 保存相关数据的路径信息的文件,如:train_files.txt
        ply_filepath_list = []
        with open(filename_txt,
                  'w') as filelist:  # 打开h5路径信息文件,如打开:train_files.txt
            for category in sorted(
                    os.listdir(data_folder)):  # data文件夹下的“01”,“02”等分类文件夹
                data_category_folder = os.path.join(data_folder, category)
                for filename in sorted(
                        os.listdir(data_category_folder)):  # data下分类文件夹内每一个文件名
                    data_filepath = os.path.join(data_category_folder,
                                                 filename)
                    coordinates = [[float(value) for value in xyz.split(' ')]
                                   for xyz in open(data_filepath, 'r')
                                   if len(xyz.split(' ')) == 6
                                   ]  # TODO:读出文件中数据,
                    # TODO:每三个值组成元组的一个元素,并指定元素的类型为float
                    idx_in_batch = idx % batch_size
                    data[idx_in_batch, 0:len(coordinates),
                         ...] = np.array(coordinates)
                    data_num[idx_in_batch] = len(
                        coordinates)  # 计算一个batch中相应点云的大小
                    label[idx_in_batch] = category_label[
                        category]  # 计算一个batch中相应点云的标签

                    if label_folder is not None:
                        label_filepath = os.path.join(label_folder, category,
                                                      filename[0:-3] +
                                                      'seg')  # label文件
                        label_seg_this = np.loadtxt(label_filepath).astype(
                            np.int32) - label_seg_min  # label文件中所有标签
                        assert (len(coordinates) == label_seg_this.shape[0])
                        label_seg[idx_in_batch,
                                  0:len(coordinates
                                        )] = label_seg_this + category_offset[
                                            category]  # 计算一个batch中相应点云所有点的标签

                    data_ply_filepath = os.path.join(data_folder_ply, category,
                                                     filename[:-3] + 'ply')
                    ply_filepath_list.append(data_ply_filepath)

                    if ((idx + 1) % batch_size
                            == 0) or idx == file_num - 1:  # 一个batch填满或者到达文件末尾
                        item_num = idx_in_batch + 1
                        filename_h5 = '%s_%d.h5' % (save_path, idx_h5
                                                    )  # 转换后格式存储路径,如:train_0.h5
                        print('{}-Saving {}...'.format(datetime.now(),
                                                       filename_h5))
                        filelist.write(
                            './%s_%d.h5\n' %
                            (os.path.basename(data_folder)[0:-5],
                             idx_h5))  # h5文件路径信息保存,如存到train_files.txt中

                        file = h5py.File(filename_h5, 'w')  # 打开h5文件,开始写入
                        file.create_dataset('data', data=data[0:item_num, ...])
                        file.create_dataset('data_num',
                                            data=data_num[0:item_num, ...])
                        file.create_dataset('label',
                                            data=label[0:item_num, ...])
                        file.create_dataset('label_seg',
                                            data=label_seg[0:item_num, ...])
                        file.close()

                        if args.save_ply:
                            data_utils.save_ply_property_batch(
                                data[0:item_num, ...], label_seg[0:item_num,
                                                                 ...],
                                ply_filepath_list, data_num[0:item_num, ...],
                                label_seg_max - label_seg_min)
                        ply_filepath_list = []
                        idx_h5 = idx_h5 + 1
                    idx = idx + 1
Exemple #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder', '-f', help='Path to data folder')
    parser.add_argument('--max_point_num', '-m', help='Max point number of each sample', type=int, default=8192)
    parser.add_argument('--block_size', '-b', help='Block size', type=float, default=5.0)
    parser.add_argument('--grid_size', '-g', help='Grid size', type=float, default=0.1)
    parser.add_argument('--save_ply', '-s', help='Convert .pts to .ply', action='store_true')

    args = parser.parse_args()
    print(args)

    root = args.folder if args.folder else '../../data/semantic3d'
    max_point_num = args.max_point_num

    batch_size = 2048
    data = np.zeros((batch_size, max_point_num, 7))
    data_num = np.zeros((batch_size), dtype=np.int32)
    label = np.zeros((batch_size), dtype=np.int32)
    label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32)
    indices_split_to_full = np.zeros((batch_size, max_point_num), dtype=np.int32)

    if args.save_ply:
        data_center = np.zeros((batch_size, max_point_num, 3))

    folders = [os.path.join(root, folder) for folder in ['train', 'val', 'test']]
    for folder in folders:
        datasets = [filename[:-4] for filename in os.listdir(folder) if filename.endswith('.txt')]
        for dataset_idx, dataset in enumerate(datasets):
            filename_txt = os.path.join(folder, dataset + '.txt')
            print('{}-Loading {}...'.format(datetime.now(), filename_txt))
            xyzirgb = np.loadtxt(filename_txt)
            filename_labels = os.path.join(folder, dataset + '.labels')
            has_labels = os.path.exists(filename_labels)
            if has_labels:
                print('{}-Loading {}...'.format(datetime.now(), filename_labels))
                labels = np.loadtxt(filename_labels, dtype=np.int)
                indices = (labels != 0)
                labels = labels[indices] - 1  # since labels == 0 have been removed
                xyzirgb = xyzirgb[indices, :]
            else:
                labels = np.zeros((xyzirgb.shape[0]))

            xyz, i, rgb = np.split(xyzirgb, (3, 4), axis=-1)
            i = i / 2000 + 0.5
            rgb = rgb / 255 - 0.5

            offsets = [('zero', 0.0), ('half', args.block_size / 2)]
            for offset_name, offset in offsets:
                idx_h5 = 0
                idx = 0

                print('{}-Computing block id of {} points...'.format(datetime.now(), xyzirgb.shape[0]))
                xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset
                xyz_max = np.amax(xyz, axis=0, keepdims=True)
                block_size = (args.block_size, args.block_size, 2 * (xyz_max[0, -1] - xyz_min[0, -1]))
                xyz_blocks = np.floor((xyz - xyz_min) / block_size).astype(np.int)

                print('{}-Collecting points belong to each block...'.format(datetime.now(), xyzirgb.shape[0]))
                blocks, point_block_indices, block_point_counts = np.unique(xyz_blocks, return_inverse=True,
                                                                            return_counts=True, axis=0)
                block_point_indices = np.split(np.argsort(point_block_indices), np.cumsum(block_point_counts[:-1]))
                print('{}-{} is split into {} blocks.'.format(datetime.now(), dataset, blocks.shape[0]))

                block_to_block_idx_map = dict()
                for block_idx in range(blocks.shape[0]):
                    block = (blocks[block_idx][0], blocks[block_idx][1])
                    block_to_block_idx_map[(block[0], block[1])] = block_idx

                # merge small blocks into one of their big neighbors
                block_point_count_threshold = max_point_num / 10
                nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1), (1, 1), (1, -1), (-1, -1)]
                block_merge_count = 0
                for block_idx in range(blocks.shape[0]):
                    if block_point_counts[block_idx] >= block_point_count_threshold:
                        continue

                    block = (blocks[block_idx][0], blocks[block_idx][1])
                    for x, y in nbr_block_offsets:
                        nbr_block = (block[0] + x, block[1] + y)
                        if nbr_block not in block_to_block_idx_map:
                            continue

                        nbr_block_idx = block_to_block_idx_map[nbr_block]
                        if block_point_counts[nbr_block_idx] < block_point_count_threshold:
                            continue

                        block_point_indices[nbr_block_idx] = np.concatenate(
                            [block_point_indices[nbr_block_idx], block_point_indices[block_idx]], axis=-1)
                        block_point_indices[block_idx] = np.array([], dtype=np.int)
                        block_merge_count = block_merge_count + 1
                        break
                print('{}-{} of {} blocks are merged.'.format(datetime.now(), block_merge_count, blocks.shape[0]))

                idx_last_non_empty_block = 0
                for block_idx in reversed(range(blocks.shape[0])):
                    if block_point_indices[block_idx].shape[0] != 0:
                        idx_last_non_empty_block = block_idx
                        break

                # uniformly sample each block
                for block_idx in range(idx_last_non_empty_block + 1):
                    point_indices = block_point_indices[block_idx]
                    if point_indices.shape[0] == 0:
                        continue
                    block_points = xyz[point_indices]
                    block_min = np.amin(block_points, axis=0, keepdims=True)
                    xyz_grids = np.floor((block_points - block_min) / args.grid_size).astype(np.int)
                    grids, point_grid_indices, grid_point_counts = np.unique(xyz_grids, return_inverse=True,
                                                                             return_counts=True, axis=0)
                    grid_point_indices = np.split(np.argsort(point_grid_indices), np.cumsum(grid_point_counts[:-1]))
                    grid_point_count_avg = int(np.average(grid_point_counts))
                    point_indices_repeated = []
                    for grid_idx in range(grids.shape[0]):
                        point_indices_in_block = grid_point_indices[grid_idx]
                        repeat_num = math.ceil(grid_point_count_avg / point_indices_in_block.shape[0])
                        if repeat_num > 1:
                            point_indices_in_block = np.repeat(point_indices_in_block, repeat_num)
                            np.random.shuffle(point_indices_in_block)
                            point_indices_in_block = point_indices_in_block[:grid_point_count_avg]
                        point_indices_repeated.extend(list(point_indices[point_indices_in_block]))
                    block_point_indices[block_idx] = np.array(point_indices_repeated)
                    block_point_counts[block_idx] = len(point_indices_repeated)

                for block_idx in range(idx_last_non_empty_block + 1):
                    point_indices = block_point_indices[block_idx]
                    if point_indices.shape[0] == 0:
                        continue

                    block_point_num = point_indices.shape[0]
                    block_split_num = int(math.ceil(block_point_num * 1.0 / max_point_num))
                    point_num_avg = int(math.ceil(block_point_num * 1.0 / block_split_num))
                    point_nums = [point_num_avg] * block_split_num
                    point_nums[-1] = block_point_num - (point_num_avg * (block_split_num - 1))
                    starts = [0] + list(np.cumsum(point_nums))

                    np.random.shuffle(point_indices)
                    block_points = xyz[point_indices]
                    block_min = np.amin(block_points, axis=0, keepdims=True)
                    block_max = np.amax(block_points, axis=0, keepdims=True)
                    block_center = (block_min + block_max) / 2
                    block_center[0][-1] = block_min[0][-1]
                    block_points = block_points - block_center  # align to block bottom center
                    x, y, z = np.split(block_points, (1, 2), axis=-1)
                    block_xzyrgbi = np.concatenate([x, z, y, rgb[point_indices], i[point_indices]], axis=-1)
                    block_labels = labels[point_indices]

                    for block_split_idx in range(block_split_num):
                        start = starts[block_split_idx]
                        point_num = point_nums[block_split_idx]
                        end = start + point_num
                        idx_in_batch = idx % batch_size
                        data[idx_in_batch, 0:point_num, ...] = block_xzyrgbi[start:end, :]
                        data_num[idx_in_batch] = point_num
                        label[idx_in_batch] = dataset_idx  # won't be used...
                        label_seg[idx_in_batch, 0:point_num] = block_labels[start:end]
                        indices_split_to_full[idx_in_batch, 0:point_num] = point_indices[start:end]
                        if args.save_ply:
                            block_center_xzy = np.array([[block_center[0][0], block_center[0][2], block_center[0][1]]])
                            data_center[idx_in_batch, 0:point_num, ...] = block_center_xzy

                        if ((idx + 1) % batch_size == 0) or \
                                (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1):
                            item_num = idx_in_batch + 1
                            filename_h5 = os.path.join(folder, dataset + '_%s_%d.h5' % (offset_name, idx_h5))
                            print('{}-Saving {}...'.format(datetime.now(), filename_h5))

                            file = h5py.File(filename_h5, 'w')
                            file.create_dataset('data', data=data[0:item_num, ...])
                            file.create_dataset('data_num', data=data_num[0:item_num, ...])
                            file.create_dataset('label', data=label[0:item_num, ...])
                            file.create_dataset('label_seg', data=label_seg[0:item_num, ...])
                            file.create_dataset('indices_split_to_full', data=indices_split_to_full[0:item_num, ...])
                            file.close()

                            if args.save_ply:
                                print('{}-Saving ply of {}...'.format(datetime.now(), filename_h5))
                                filepath_label_ply = os.path.join(folder, 'ply_label',
                                                                  dataset + '_label_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_property_batch(
                                    data[0:item_num, :, 0:3] + data_center[0:item_num, ...],
                                    label_seg[0:item_num, ...],
                                    filepath_label_ply, data_num[0:item_num, ...], 8)

                                filepath_i_ply = os.path.join(folder, 'ply_intensity',
                                                              dataset + '_i_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_property_batch(
                                    data[0:item_num, :, 0:3] + data_center[0:item_num, ...],
                                    data[0:item_num, :, 6],
                                    filepath_i_ply, data_num[0:item_num, ...], 1.0)

                                filepath_rgb_ply = os.path.join(folder, 'ply_rgb',
                                                                dataset + '_rgb_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_color_batch(data[0:item_num, :, 0:3] + data_center[0:item_num, ...],
                                                                (data[0:item_num, :, 3:6] + 0.5) * 255,
                                                                filepath_rgb_ply, data_num[0:item_num, ...])

                                filepath_label_aligned_ply = os.path.join(folder, 'ply_label_aligned',
                                                                          dataset + '_label_%s_%d' % (
                                                                              offset_name, idx_h5))
                                data_utils.save_ply_property_batch(data[0:item_num, :, 0:3],
                                                                   label_seg[0:item_num, ...],
                                                                   filepath_label_aligned_ply,
                                                                   data_num[0:item_num, ...], 8)

                                filepath_i_aligned_ply = os.path.join(folder, 'ply_intensity_aligned',
                                                                      dataset + '_i_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_property_batch(data[0:item_num, :, 0:3],
                                                                   data[0:item_num, :, 6],
                                                                   filepath_i_aligned_ply, data_num[0:item_num, ...],
                                                                   1.0)

                                filepath_rgb_aligned_ply = os.path.join(folder, 'ply_rgb_aligned',
                                                                        dataset + '_rgb_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_color_batch(data[0:item_num, :, 0:3],
                                                                (data[0:item_num, :, 3:6] + 0.5) * 255,
                                                                filepath_rgb_aligned_ply, data_num[0:item_num, ...])
                            idx_h5 = idx_h5 + 1
                        idx = idx + 1
Exemple #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder', '-f', help='Path to data folder')
    parser.add_argument('--max_point_num',
                        '-m',
                        help='Max point number of each sample',
                        type=int,
                        default=8192)
    parser.add_argument('--block_size',
                        '-b',
                        help='Block size',
                        type=float,
                        default=1.5)
    parser.add_argument('--grid_size',
                        '-g',
                        help='Grid size',
                        type=float,
                        default=0.03)
    parser.add_argument('--save_ply',
                        '-s',
                        help='Convert .pts to .ply',
                        action='store_true')

    args = parser.parse_args()
    print(args)

    root = args.folder if args.folder else '/home/elvin/PointCNN/data/mydata'
    max_point_num = args.max_point_num

    batch_size = 2048
    # 存xyz
    data = np.zeros((batch_size, max_point_num, 3))
    # 存点云中点的数量
    data_num = np.zeros((batch_size), dtype=np.int32)
    #label = np.zeros((batch_size), dtype=np.int32)
    #label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32)
    indices_split_to_full = np.zeros((batch_size, max_point_num),
                                     dtype=np.int32)

    # 每个dataset对应一个点云
    datasets = os.listdir(root)
    for dataset_idx, dataset in enumerate(datasets):
        #dataset_marker = os.path.join(folder, dataset, ".dataset")
        #if os.path.exists(dataset_marker):
        #    print('{}-{}/{} already processed, skipping'.format(datetime.now(), folder, dataset))
        #    continue
        if dataset.endswith('.npy'):
            #    continue
            filename_data = os.path.join(root, dataset)  #, 'xyzrgb.npy')
            print('{}-Loading {}...'.format(datetime.now(), filename_data))
            xyz = np.load(filename_data)
            dataset = dataset.split('.')[0]

            xyz_min = np.amin(xyz, axis=0, keepdims=True)
            xyz_max = np.amax(xyz, axis=0, keepdims=True)
            xyz_center = (xyz_min + xyz_max) / 2
            xyz_center[0][-1] = xyz_min[0][-1]
            xyz = xyz - xyz_center  # align to room bottom center
            #rgb = rgb / 255 - 0.5

            offsets = [('zero', 0.0), ('half', args.block_size / 2)]
            for offset_name, offset in offsets:
                idx_h5 = 0
                idx = 0

                print('{}-Computing block id of {} points...'.format(
                    datetime.now(), xyz.shape[0]))
                xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset
                xyz_max = np.amax(xyz, axis=0, keepdims=True)
                block_size = (args.block_size, args.block_size,
                              2 * (xyz_max[0, -1] - xyz_min[0, -1]))
                xyz_blocks = np.floor(
                    (xyz - xyz_min) / block_size).astype(np.int)

                print('{}-Collecting points belong to each block...'.format(
                    datetime.now(), xyz.shape[0]))
                # point_block_indices是旧列表的元素在新列表中的位置
                blocks, point_block_indices, block_point_counts = np.unique(
                    xyz_blocks,
                    return_inverse=True,
                    return_counts=True,
                    axis=0)
                # 某个房间数据被划分成多少个块
                block_point_indices = np.split(
                    np.argsort(point_block_indices),
                    np.cumsum(block_point_counts[:-1]))
                print('{}-{}.npy is split into {} blocks.'.format(
                    datetime.now(), dataset, blocks.shape[0]))

                block_to_block_idx_map = dict()
                for block_idx in range(blocks.shape[0]):
                    block = (blocks[block_idx][0], blocks[block_idx][1])
                    block_to_block_idx_map[(block[0], block[1])] = block_idx

                # merge small blocks into one of their big neighbors
                block_point_count_threshold = max_point_num / 10
                nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1),
                                     (1, 1), (1, -1), (-1, -1)]
                block_merge_count = 0
                for block_idx in range(blocks.shape[0]):
                    if block_point_counts[
                            block_idx] >= block_point_count_threshold:
                        continue

                    block = (blocks[block_idx][0], blocks[block_idx][1])
                    for x, y in nbr_block_offsets:
                        nbr_block = (block[0] + x, block[1] + y)
                        if nbr_block not in block_to_block_idx_map:
                            continue

                        nbr_block_idx = block_to_block_idx_map[nbr_block]
                        if block_point_counts[
                                nbr_block_idx] < block_point_count_threshold:
                            continue

                        block_point_indices[nbr_block_idx] = np.concatenate(
                            [
                                block_point_indices[nbr_block_idx],
                                block_point_indices[block_idx]
                            ],
                            axis=-1)
                        block_point_indices[block_idx] = np.array([],
                                                                  dtype=np.int)
                        block_merge_count = block_merge_count + 1
                        break
                print('{}-{} of {} blocks are merged.'.format(
                    datetime.now(), block_merge_count, blocks.shape[0]))

                idx_last_non_empty_block = 0
                for block_idx in reversed(range(blocks.shape[0])):
                    if block_point_indices[block_idx].shape[0] != 0:
                        idx_last_non_empty_block = block_idx
                        break

                # uniformly sample each block
                for block_idx in range(idx_last_non_empty_block + 1):
                    point_indices = block_point_indices[block_idx]
                    if point_indices.shape[0] == 0:
                        continue
                    block_points = xyz[point_indices]
                    block_min = np.amin(block_points, axis=0, keepdims=True)
                    xyz_grids = np.floor((block_points - block_min) /
                                         args.grid_size).astype(np.int)
                    grids, point_grid_indices, grid_point_counts = np.unique(
                        xyz_grids,
                        return_inverse=True,
                        return_counts=True,
                        axis=0)
                    grid_point_indices = np.split(
                        np.argsort(point_grid_indices),
                        np.cumsum(grid_point_counts[:-1]))
                    grid_point_count_avg = int(np.average(grid_point_counts))
                    point_indices_repeated = []
                    for grid_idx in range(grids.shape[0]):
                        point_indices_in_block = grid_point_indices[grid_idx]
                        repeat_num = math.ceil(grid_point_count_avg /
                                               point_indices_in_block.shape[0])
                        if repeat_num > 1:
                            point_indices_in_block = np.repeat(
                                point_indices_in_block, repeat_num)
                            np.random.shuffle(point_indices_in_block)
                            point_indices_in_block = point_indices_in_block[:
                                                                            grid_point_count_avg]
                        point_indices_repeated.extend(
                            list(point_indices[point_indices_in_block]))
                    block_point_indices[block_idx] = np.array(
                        point_indices_repeated)
                    block_point_counts[block_idx] = len(point_indices_repeated)

                for block_idx in range(idx_last_non_empty_block + 1):
                    point_indices = block_point_indices[block_idx]
                    if point_indices.shape[0] == 0:
                        continue

                    block_point_num = point_indices.shape[0]
                    block_split_num = int(
                        math.ceil(block_point_num * 1.0 / max_point_num))
                    point_num_avg = int(
                        math.ceil(block_point_num * 1.0 / block_split_num))
                    point_nums = [point_num_avg] * block_split_num
                    point_nums[-1] = block_point_num - (point_num_avg *
                                                        (block_split_num - 1))
                    starts = [0] + list(np.cumsum(point_nums))

                    np.random.shuffle(point_indices)
                    block_points = xyz[point_indices]
                    #block_rgb = rgb[point_indices]
                    #block_labels = labels[point_indices]
                    #x, y, z = np.split(block_points, (1, 2), axis=-1)
                    #block_xzyrgb = np.concatenate([x, z, y, block_rgb], axis=-1)

                    for block_split_idx in range(block_split_num):
                        start = starts[block_split_idx]
                        point_num = point_nums[block_split_idx]
                        end = start + point_num
                        idx_in_batch = idx % batch_size
                        data[idx_in_batch, 0:point_num,
                             ...] = block_points[start:end, :]
                        data_num[idx_in_batch] = point_num
                        #label[idx_in_batch] = dataset_idx  # won't be used...
                        #label_seg[idx_in_batch, 0:point_num] = block_labels[start:end]
                        indices_split_to_full[
                            idx_in_batch,
                            0:point_num] = point_indices[start:end]

                        if ((idx + 1) % batch_size == 0) or \
                                (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1):
                            item_num = idx_in_batch + 1
                            filename_h5 = os.path.join(
                                root,
                                dataset + '_%s_%d.h5' % (offset_name, idx_h5))
                            print('{}-Saving {}...'.format(
                                datetime.now(), filename_h5))

                            file = h5py.File(filename_h5, 'w')
                            file.create_dataset('data',
                                                data=data[0:item_num, ...])
                            file.create_dataset('data_num',
                                                data=data_num[0:item_num, ...])
                            #file.create_dataset('label', data=label[0:item_num, ...])
                            #file.create_dataset('label_seg', data=label_seg[0:item_num, ...])
                            file.create_dataset(
                                'indices_split_to_full',
                                data=indices_split_to_full[0:item_num, ...])
                            file.close()

                            if args.save_ply:
                                print('{}-Saving ply of {}...'.format(
                                    datetime.now(), filename_h5))
                                filepath_label_ply = os.path.join(
                                    folder, dataset + '_ply_label',
                                    'label_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_property_batch(
                                    data[0:item_num, :,
                                         0:3], label_seg[0:item_num, ...],
                                    filepath_label_ply, data_num[0:item_num,
                                                                 ...], 14)

                                filepath_rgb_ply = os.path.join(
                                    folder, dataset + '_ply_rgb',
                                    'rgb_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_color_batch(
                                    data[0:item_num, :, 0:3],
                                    (data[0:item_num, :, 3:] + 0.5) * 255,
                                    filepath_rgb_ply, data_num[0:item_num,
                                                               ...])

                            idx_h5 = idx_h5 + 1
                        idx = idx + 1

            # Marker indicating we've processed this dataset
            #open(dataset_marker, "w").close()

    filename_h5s = [
        './%s\n' % (filename) for filename in os.listdir(root)
        if filename.endswith('.h5')
    ]
    filelist_txt = os.path.join(root, 'my_test_data.txt')
    print('{}-Saving {}...'.format(datetime.now(), filelist_txt))
    with open(filelist_txt, 'w') as filelist:
        for filename_h5 in filename_h5s:
            filelist.write(filename_h5)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--filelist',
                        '-t',
                        help='Path to input .h5 filelist (.txt)',
                        required=True)
    parser.add_argument('--load_ckpt',
                        '-l',
                        help='Path to a check point file for load',
                        required=True)
    parser.add_argument('--max_point_num',
                        '-p',
                        help='Max point number of each sample',
                        type=int,
                        default=8192)
    parser.add_argument('--repeat_num',
                        '-r',
                        help='Repeat number',
                        type=int,
                        default=1)
    parser.add_argument('--model', '-m', help='Model to use', required=True)
    parser.add_argument('--setting',
                        '-x',
                        help='Setting to use',
                        required=True)
    parser.add_argument('--save_ply',
                        '-s',
                        help='Save results as ply',
                        action='store_true')
    args = parser.parse_args()
    print(args)

    model = importlib.import_module(args.model)
    setting_path = os.path.join(os.path.dirname(__file__), args.model)
    sys.path.append(setting_path)
    setting = importlib.import_module(args.setting)

    sample_num = setting.sample_num
    max_point_num = args.max_point_num
    batch_size = args.repeat_num * math.ceil(max_point_num / sample_num)

    ######################################################################
    # Placeholders
    indices = tf.placeholder(tf.int32,
                             shape=(batch_size, None, 2),
                             name="indices")
    is_training = tf.placeholder(tf.bool, name='is_training')
    pts_fts = tf.placeholder(tf.float32,
                             shape=(batch_size, max_point_num,
                                    setting.data_dim),
                             name='points')
    ######################################################################

    ######################################################################
    pts_fts_sampled = tf.gather_nd(pts_fts,
                                   indices=indices,
                                   name='pts_fts_sampled')
    if setting.data_dim > 3:
        points_sampled, features_sampled = tf.split(
            pts_fts_sampled, [3, setting.data_dim - 3],
            axis=-1,
            name='split_points_features')
        if not setting.use_extra_features:
            features_sampled = None
    else:
        points_sampled = pts_fts_sampled
        features_sampled = None

    net = model.Net(points_sampled, features_sampled, is_training, setting)
    seg_probs_op = tf.nn.softmax(net.logits, name='seg_probs')

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    # for restore model
    saver = tf.train.Saver()

    parameter_num = np.sum(
        [np.prod(v.shape.as_list()) for v in tf.trainable_variables()])
    print('{}-Parameter number: {:d}.'.format(datetime.now(), parameter_num))

    with tf.Session() as sess:
        # Load the model
        saver.restore(sess, args.load_ckpt)
        print('{}-Checkpoint loaded from {}!'.format(datetime.now(),
                                                     args.load_ckpt))

        indices_batch_indices = np.tile(
            np.reshape(np.arange(batch_size), (batch_size, 1, 1)),
            (1, sample_num, 1))

        folder = os.path.dirname(args.filelist)
        filenames = [
            os.path.join(folder, line.strip()) for line in open(args.filelist)
        ]
        for filename in filenames:
            print('{}-Reading {}...'.format(datetime.now(), filename))
            data_h5 = h5py.File(os.path.join(folder, filename))
            data = data_h5['data'][...].astype(np.float32)
            data_num = data_h5['data_num'][...].astype(np.int32)
            batch_num = data.shape[0]

            labels_pred = np.full((batch_num, max_point_num),
                                  -1,
                                  dtype=np.int32)
            confidences_pred = np.zeros((batch_num, max_point_num),
                                        dtype=np.float32)

            print('{}-{:d} testing batches.'.format(datetime.now(), batch_num))
            for batch_idx in range(batch_num):
                if batch_idx % 10 == 0:
                    print('{}-Processing {} of {} batches.'.format(
                        datetime.now(), batch_idx, batch_num))
                points_batch = data[[batch_idx] * batch_size, ...]
                point_num = data_num[batch_idx]

                tile_num = math.ceil((sample_num * batch_size) / point_num)
                indices_shuffle = np.tile(np.arange(point_num),
                                          tile_num)[0:sample_num * batch_size]
                np.random.shuffle(indices_shuffle)
                indices_batch_shuffle = np.reshape(indices_shuffle,
                                                   (batch_size, sample_num, 1))
                indices_batch = np.concatenate(
                    (indices_batch_indices, indices_batch_shuffle), axis=2)

                _, seg_probs = sess.run(
                    [update_ops, seg_probs_op],
                    feed_dict={
                        pts_fts: points_batch,
                        indices: indices_batch,
                        is_training: False,
                    })
                probs_2d = np.reshape(seg_probs, (sample_num * batch_size, -1))

                predictions = [(-1, 0.0)] * point_num
                for idx in range(sample_num * batch_size):
                    point_idx = indices_shuffle[idx]
                    probs = probs_2d[idx, :]
                    confidence = np.amax(probs)
                    label = np.argmax(probs)
                    if confidence > predictions[point_idx][1]:
                        predictions[point_idx] = [label, confidence]
                labels_pred[batch_idx, 0:point_num] = np.array(
                    [label for label, _ in predictions])
                confidences_pred[batch_idx, 0:point_num] = np.array(
                    [confidence for _, confidence in predictions])

            filename_pred = filename[:-3] + '_pred.h5'
            print('{}-Saving {}...'.format(datetime.now(), filename_pred))
            file = h5py.File(filename_pred, 'w')
            file.create_dataset('data_num', data=data_num)
            file.create_dataset('label_seg', data=labels_pred)
            file.create_dataset('confidence', data=confidences_pred)
            has_indices = 'indices_split_to_full' in data_h5
            if has_indices:
                file.create_dataset('indices_split_to_full',
                                    data=data_h5['indices_split_to_full'][...])
            file.close()

            if args.save_ply:
                print('{}-Saving ply of {}...'.format(datetime.now(),
                                                      filename_pred))
                filepath_label_ply = os.path.join(filename_pred[:-3] +
                                                  'ply_label')
                data_utils.save_ply_property_batch(data[:, :,
                                                        0:3], labels_pred[...],
                                                   filepath_label_ply,
                                                   data_num[...],
                                                   setting.num_class)
            ######################################################################
        print('{}-Done!'.format(datetime.now()))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder', '-f', help='Path to data folder')
    parser.add_argument('--max_point_num',
                        '-m',
                        help='Max point number of each sample',
                        type=int,
                        default=8192)
    parser.add_argument('--block_size',
                        '-b',
                        help='Block size',
                        type=float,
                        default=0.3)
    parser.add_argument('--grid_size',
                        '-g',
                        help='Grid size',
                        type=float,
                        default=0.03)
    parser.add_argument('--save_ply',
                        '-s',
                        help='Convert .pts to .ply',
                        action='store_true')

    args = parser.parse_args()
    print(args)

    root = args.folder if args.folder else '../../data/S3DIS/prepare_label_rgb'
    dir_root = '../../data/S3DIS/dir_all_d/'
    max_point_num = args.max_point_num

    batch_size = 2048
    data = np.zeros((batch_size, max_point_num, 6))
    direction = np.zeros((batch_size, max_point_num, 6))
    data_num = np.zeros((batch_size), dtype=np.int32)
    label = np.zeros((batch_size), dtype=np.int32)
    label_seg = np.zeros((batch_size, max_point_num), dtype=np.int32)
    indices_split_to_full = np.zeros((batch_size, max_point_num),
                                     dtype=np.int32)

    data_extra = np.zeros((batch_size, 4096, 6))
    direction_extra = np.zeros((batch_size, 4096, 6))
    data_num_extra = np.zeros((batch_size), dtype=np.int32)
    label_seg_extra = np.zeros((batch_size, 4096), dtype=np.int32)

    mergenum = 0

    for area_idx in range(1, 7):
        folder = os.path.join(root, 'Area_%d' % area_idx)
        datasets = [dataset for dataset in os.listdir(folder)]
        for dataset_idx, dataset in enumerate(datasets):
            dataset_marker = os.path.join(folder, dataset, ".dataset")
            if os.path.exists(dataset_marker):
                print('{}-{}/{} already processed, skipping'.format(
                    datetime.now(), folder, dataset))
                continue
            filename_data = os.path.join(folder, dataset, 'xyzrgb_label.mat')
            print('{}-Loading {}...'.format(datetime.now(), filename_data))
            xyz_label = scio.loadmat(filename_data)
            xyzrgb = xyz_label['xyzrgb']
            labels = xyz_label['label'].astype(int).flatten()

            filename_direction = os.path.join(dir_root, 'Area_%d' % area_idx,
                                              dataset, 'vec.mat')
            print('{}-Loading {}...'.format(datetime.now(),
                                            filename_direction))
            vec = scio.loadmat(filename_direction)
            vec = vec['vec']

            xyz, rgb = np.split(xyzrgb, [3], axis=-1)
            xyz_min = np.amin(xyz, axis=0, keepdims=True)
            xyz_max = np.amax(xyz, axis=0, keepdims=True)
            xyz_center = (xyz_min + xyz_max) / 2
            xyz_center[0][-1] = xyz_min[0][-1]
            xyz = xyz - xyz_center  # align to room bottom center
            rgb = rgb / 255 - 0.5

            offsets = [('zero', 0.0), ('half', 1.2 / 2)]
            for offset_name, offset in offsets:
                idx_h5 = 0
                idx = 0

                print('{}-Computing block id of {} points...'.format(
                    datetime.now(), xyzrgb.shape[0]))
                xyz_min = np.amin(xyz, axis=0, keepdims=True) - offset
                xyz_max = np.amax(xyz, axis=0, keepdims=True)
                block_size = (args.block_size, args.block_size,
                              2 * (xyz_max[0, -1] - xyz_min[0, -1]))
                xyz_blocks = np.floor(
                    (xyz - xyz_min) / block_size).astype(np.int)

                print('{}-Collecting points belong to each block...'.format(
                    datetime.now(), xyzrgb.shape[0]))
                blocks, point_block_indices, block_point_counts = np.unique(
                    xyz_blocks,
                    return_inverse=True,
                    return_counts=True,
                    axis=0)
                block_point_indices = np.split(
                    np.argsort(point_block_indices),
                    np.cumsum(block_point_counts[:-1]))
                print('{}-{} is split into {} blocks.'.format(
                    datetime.now(), dataset, blocks.shape[0]))

                blockmax = np.amax(blocks, axis=0)
                xblocks = int(blockmax[0] / 5) + 1
                yblocks = int(blockmax[1] / 5) + 1
                blocks_base = [[] for _ in range(xblocks * yblocks)]
                blocks_extra = [[] for _ in range(xblocks * yblocks)]
                for xi in range(blockmax[0] + 1):
                    for yj in range(blockmax[1] + 1):
                        if sum(
                                abs([xi, yj, 0] -
                                    blocks[min((blockmax[1] + 1) * xi +
                                               yj, blocks.shape[0] - 1)])):
                            #print([xi, yj, 0])
                            blocks = np.insert(blocks,
                                               (blockmax[1] + 1) * xi + yj,
                                               [xi, yj, 0], 0)
                            block_point_indices.insert(
                                (blockmax[1] + 1) * xi + yj, [])
                            #print(len(block_point_indices))
                for xi in range(blockmax[0] + 1):
                    for yj in range(blockmax[1] + 1):
                        subid = (blockmax[1] + 1) * xi + yj
                        bid = yblocks * int(xi / 5) + int(yj / 5)
                        blocks_base[bid].append(block_point_indices[subid])
                        if ((xi % 5) == 0) & (xi > 0):
                            eid = yblocks * int((xi - 1) / 5) + int(yj / 5)
                            blocks_extra[eid].append(
                                block_point_indices[subid])
                        elif ((xi % 5) == 4) & (xi < blocks[-1][0]):
                            eid = yblocks * int((xi + 1) / 5) + int(yj / 5)
                            blocks_extra[eid].append(
                                block_point_indices[subid])

                        if ((yj % 5) == 0) & (yj > 0):
                            eid = yblocks * int(xi / 5) + int((yj - 1) / 5)
                            blocks_extra[eid].append(
                                block_point_indices[subid])
                        elif ((yj % 5) == 4) & (yj < blocks[-1][1]):
                            eid = yblocks * int(xi / 5) + int((yj + 1) / 5)
                            blocks_extra[eid].append(
                                block_point_indices[subid])
                blocks_base = [np.concatenate(ind) for ind in blocks_base]
                blocks_extra = [np.concatenate(ind) for ind in blocks_extra]

                block_to_block_idx_map = dict()
                for block_idx in range(xblocks * yblocks):
                    block = (block_idx // yblocks, block_idx % yblocks)
                    block_to_block_idx_map[(block[0], block[1])] = block_idx

                # merge small blocks into one of their big neighbors
                block_point_count_threshold = max_point_num / 10
                nbr_block_offsets = [(0, 1), (1, 0), (0, -1), (-1, 0), (-1, 1),
                                     (1, 1), (1, -1), (-1, -1)]
                block_merge_count = 0
                for block_idx in range(xblocks * yblocks):
                    if len(blocks_base[block_idx]
                           ) >= block_point_count_threshold:
                        continue

                    block = (blocks[block_idx][0], blocks[block_idx][1])
                    for x, y in nbr_block_offsets:
                        nbr_block = (block[0] + x, block[1] + y)
                        if nbr_block not in block_to_block_idx_map:
                            continue

                        nbr_block_idx = block_to_block_idx_map[nbr_block]
                        if len(blocks_base[nbr_block_idx]
                               ) < block_point_count_threshold:
                            continue

                        blocks_base[nbr_block_idx] = np.concatenate([
                            blocks_base[nbr_block_idx], blocks_base[block_idx]
                        ],
                                                                    axis=-1)
                        blocks_base[block_idx] = np.array([], dtype=np.int)
                        blocks_extra[nbr_block_idx] = np.concatenate([
                            blocks_extra[nbr_block_idx],
                            blocks_extra[block_idx]
                        ],
                                                                     axis=-1)
                        blocks_extra[block_idx] = np.array([], dtype=np.int)

                        blocks_extra[nbr_block_idx] = np.array(
                            list(
                                set(blocks_extra[nbr_block_idx]).difference(
                                    set(blocks_base[nbr_block_idx]))))
                        block_merge_count = block_merge_count + 1
                        break
                print('{}-{} of {} blocks are merged.'.format(
                    datetime.now(), block_merge_count, blocks.shape[0]))

                idx_last_non_empty_block = 0
                for block_idx in reversed(range(xblocks * yblocks)):
                    if blocks_base[block_idx].shape[0] != 0:
                        idx_last_non_empty_block = block_idx
                        break

                blocks_base = [ind.astype(np.int) for ind in blocks_base]
                blocks_extra = [ind.astype(np.int) for ind in blocks_extra]

                for block_idx in range(idx_last_non_empty_block + 1):
                    point_indices = blocks_base[block_idx]
                    point_indices_extra = blocks_extra[block_idx]
                    if point_indices.shape[0] == 0:
                        continue

                    block_point_num = point_indices.shape[0]
                    block_split_num = int(
                        math.ceil(block_point_num * 1.0 / max_point_num))
                    point_num_avg = int(
                        math.ceil(block_point_num * 1.0 / block_split_num))
                    point_nums = [point_num_avg] * block_split_num
                    point_nums[-1] = block_point_num - (point_num_avg *
                                                        (block_split_num - 1))
                    starts = [0] + list(np.cumsum(point_nums))

                    np.random.shuffle(point_indices)
                    block_points = xyz[point_indices]
                    block_rgb = rgb[point_indices]
                    block_dir = vec[point_indices]
                    block_labels = labels[point_indices]
                    block_xzyrgb = np.concatenate([block_points, block_rgb],
                                                  axis=-1)

                    np.random.shuffle(point_indices_extra)
                    block_points_extra = xyz[point_indices_extra]
                    block_rgb_extra = rgb[point_indices_extra]
                    block_dir_extra = vec[point_indices_extra]
                    block_labels_extra = labels[point_indices_extra]
                    block_xzyrgb_extra = np.concatenate(
                        [block_points_extra, block_rgb_extra], axis=-1)

                    for block_split_idx in range(block_split_num):
                        start = starts[block_split_idx]
                        point_num = point_nums[block_split_idx]
                        end = start + point_num
                        idx_in_batch = idx % batch_size
                        data[idx_in_batch, 0:point_num,
                             ...] = block_xzyrgb[start:end, :]
                        direction[idx_in_batch, 0:point_num,
                                  ...] = block_dir[start:end, :]
                        data_num[idx_in_batch] = point_num
                        label[idx_in_batch] = dataset_idx  # won't be used...
                        label_seg[idx_in_batch,
                                  0:point_num] = block_labels[start:end]
                        indices_split_to_full[
                            idx_in_batch,
                            0:point_num] = point_indices[start:end]

                        point_num_extra = int(point_num * 2.0 / 4)
                        indice = np.random.choice(
                            len(point_indices_extra),
                            point_num_extra,
                            replace=len(point_indices_extra) < point_num_extra)
                        data_extra[idx_in_batch, 0:point_num_extra,
                                   ...] = block_xzyrgb_extra[indice, :]
                        direction_extra[idx_in_batch, 0:point_num_extra,
                                        ...] = block_dir_extra[indice, :]
                        data_num_extra[idx_in_batch] = point_num_extra
                        label_seg_extra[
                            idx_in_batch,
                            0:point_num_extra] = block_labels_extra[indice]

                        if ((idx + 1) % batch_size == 0) or \
                                (block_idx == idx_last_non_empty_block and block_split_idx == block_split_num - 1):
                            item_num = idx_in_batch + 1
                            filename_h5 = os.path.join(
                                folder, dataset,
                                '%s_%d.h5' % (offset_name, idx_h5))
                            print('{}-Saving {}...'.format(
                                datetime.now(), filename_h5))

                            file = h5py.File(filename_h5, 'w')
                            file.create_dataset('data',
                                                data=data[0:item_num, ...])
                            file.create_dataset('data_num',
                                                data=data_num[0:item_num, ...])
                            file.create_dataset('direction',
                                                data=direction[0:item_num,
                                                               ...])
                            file.create_dataset('label',
                                                data=label[0:item_num, ...])
                            file.create_dataset('label_seg',
                                                data=label_seg[0:item_num,
                                                               ...])
                            file.create_dataset(
                                'indices_split_to_full',
                                data=indices_split_to_full[0:item_num, ...])
                            file.create_dataset('data_extra',
                                                data=data_extra[0:item_num,
                                                                ...])
                            file.create_dataset('data_num_extra',
                                                data=data_num_extra[0:item_num,
                                                                    ...])
                            file.create_dataset(
                                'direction_extra',
                                data=direction_extra[0:item_num, ...])
                            file.create_dataset(
                                'label_seg_extra',
                                data=label_seg_extra[0:item_num, ...])
                            file.close()

                            if args.save_ply:
                                print('{}-Saving ply of {}...'.format(
                                    datetime.now(), filename_h5))
                                filepath_label_ply = os.path.join(
                                    folder, dataset, 'ply_label',
                                    'label_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_property_batch(
                                    data[0:item_num, :,
                                         0:3], label_seg[0:item_num, ...],
                                    filepath_label_ply, data_num[0:item_num,
                                                                 ...], 14)

                                filepath_rgb_ply = os.path.join(
                                    folder, dataset, 'ply_rgb',
                                    'rgb_%s_%d' % (offset_name, idx_h5))
                                data_utils.save_ply_color_batch(
                                    data[0:item_num, :, 0:3],
                                    (data[0:item_num, :, 3:] + 0.5) * 255,
                                    filepath_rgb_ply, data_num[0:item_num,
                                                               ...])

                            idx_h5 = idx_h5 + 1
                        idx = idx + 1

            # Marker indicating we've processed this dataset
            open(dataset_marker, "w").close()
        print(mergenum)