Example #1
0
 def tilename_to_blob_path(self, tilename, relative=False):
     """Given a tile's name, returns the path the tile's pointer should be written to."""
     tilename = self.tilename_from_path(
         tilename
     )  # Just in case it's a whole path, not just a name.
     tile_prefix = hexhash(tilename)[0:2]
     rel_path = f"tile/{tile_prefix}/{tilename}"
     return rel_path if relative else self.ensure_full_path(rel_path)
Example #2
0
 def __init__(self, crs_spec, geometry_spec, match_all=False):
     if match_all:
         super().__init__(None, None, match_all=True)
         self.hexhash = None
     else:
         ctx = "spatial filter"
         geometry = geometry_from_string(geometry_spec, context=ctx)
         crs = make_crs(crs_spec, context=ctx)
         super().__init__(crs, geometry.to_ogr())
         self.hexhash = hexhash(crs_spec.strip(), geometry.to_wkb())
Example #3
0
 def hexhash(self):
     """Like __hash__ but with platform-independent, 160-bit hex strings."""
     return hexhash(self.dumps())
Example #4
0
 def hexhash(self):
     if self.match_all:
         return None
     return hexhash(self.crs_spec.strip(), self.geometry.to_wkb())
Example #5
0
def point_cloud_import(ctx, convert_to_copc, ds_path, sources):
    """
    Experimental command for importing point cloud datasets. Work-in-progress.
    Will eventually be merged with the main `import` command.

    SOURCES should be one or more LAZ or LAS files (or wildcards that match multiple LAZ or LAS files).
    """
    import pdal

    repo = ctx.obj.repo

    # TODO - improve path validation to make sure datasets of any type don't collide with each other
    # or with attachments.
    validate_dataset_paths([ds_path])

    for source in sources:
        if not (Path() / source).is_file():
            raise NotFound(f"No data found at {source}",
                           exit_code=NO_IMPORT_SOURCE)

    compressed_set = ListBasedSet()
    version_set = ListBasedSet()
    copc_version_set = ListBasedSet()
    pdrf_set = ListBasedSet()
    pdr_length_set = ListBasedSet()
    crs_set = ListBasedSet()
    transform = None
    schema = None
    crs_name = None

    per_source_info = {}

    for source in sources:
        click.echo(f"Checking {source}...          \r", nl=False)
        config = [{
            "type": "readers.las",
            "filename": source,
            "count": 0,  # Don't read any individual points.
        }]
        if schema is None:
            config.append({"type": "filters.info"})

        pipeline = pdal.Pipeline(json.dumps(config))
        try:
            pipeline.execute()
        except RuntimeError:
            raise InvalidOperation(f"Error reading {source}",
                                   exit_code=INVALID_FILE_FORMAT)

        metadata = _unwrap_metadata(pipeline.metadata)

        info = metadata["readers.las"]

        compressed_set.add(info["compressed"])
        if len(compressed_set) > 1:
            raise _non_homogenous_error("filetype", "LAS vs LAZ")

        version = f"{info['major_version']}.{info['minor_version']}"
        version_set.add(version)
        if len(version_set) > 1:
            raise _non_homogenous_error("version", version_set)

        copc_version_set.add(get_copc_version(info))
        if len(copc_version_set) > 1:
            raise _non_homogenous_error("COPC version", copc_version_set)

        pdrf_set.add(info["dataformat_id"])
        if len(pdrf_set) > 1:
            raise _non_homogenous_error("Point Data Record Format", pdrf_set)

        pdr_length_set.add(info["point_length"])
        if len(pdr_length_set) > 1:
            raise _non_homogenous_error("Point Data Record Length",
                                        pdr_length_set)

        crs_set.add(info["srs"]["wkt"])
        if len(crs_set) > 1:
            raise _non_homogenous_error(
                "CRS",
                "\n vs \n".join((format_wkt_for_output(wkt, sys.stderr)
                                 for wkt in crs_set)),
            )

        if transform is None:
            transform = _make_transform_to_crs84(crs_set.only())

        native_envelope = get_native_envelope(info)
        crs84_envelope = _transform_3d_envelope(transform, native_envelope)
        per_source_info[source] = {
            "count": info["count"],
            "native_envelope": native_envelope,
            "crs84_envelope": crs84_envelope,
        }

        if schema is None:
            crs_name = get_identifier_str(crs_set.only())
            schema = metadata["filters.info"]["schema"]
            schema["CRS"] = crs_name

    click.echo()

    version = version_set.only()
    copc_version = copc_version_set.only()
    is_laz = compressed_set.only() is True
    is_copc = is_laz and copc_version != NOT_COPC

    if is_copc:
        # Keep native format.
        import_func = get_hash_and_size_of_file_while_copying
        kart_format = f"pc:v1/copc-{copc_version}.0"
    elif is_laz:
        # Optionally Convert to COPC 1.0 if requested
        import_func = (_convert_tile_to_copc_lfs_blob if convert_to_copc else
                       get_hash_and_size_of_file_while_copying)
        kart_format = "pc:v1/copc-1.0" if convert_to_copc else f"pc:v1/laz-{version}"
    else:  # LAS
        if not convert_to_copc:
            raise InvalidOperation(
                "LAS datasets are not supported - dataset must be converted to LAZ / COPC",
                exit_code=INVALID_FILE_FORMAT,
            )
        import_func = _convert_tile_to_copc_lfs_blob
        kart_format = "pc:v1/copc-1.0"

    import_ext = ".copc.laz" if "copc" in kart_format else ".laz"

    # Set up LFS hooks.
    # TODO: This could eventually be moved to `kart init`.
    if not (repo.gitdir_path / "hooks" / "pre-push").is_file():
        subprocess.check_call(
            ["git", "-C",
             str(repo.gitdir_path), "lfs", "install", "hooks"])

    # We still need to write .kart.repostructure.version unfortunately, even though it's only relevant to tabular datasets.
    assert repo.table_dataset_version in SUPPORTED_VERSIONS
    extra_blobs = (extra_blobs_for_version(repo.table_dataset_version)
                   if not repo.head_commit else [])

    header = generate_header(
        repo,
        None,
        f"Importing {len(sources)} LAZ tiles as {ds_path}",
        repo.head_branch,
        repo.head_commit,
    )

    ds_inner_path = f"{ds_path}/.point-cloud-dataset.v1"

    lfs_tmp_path = repo.gitdir_path / "lfs" / "objects" / "tmp"
    lfs_tmp_path.mkdir(parents=True, exist_ok=True)

    with git_fast_import(repo,
                         *FastImportSettings().as_args(), "--quiet") as proc:
        proc.stdin.write(header.encode("utf8"))

        for i, blob_path in write_blobs_to_stream(proc.stdin, extra_blobs):
            pass

        for source in sources:
            click.echo(f"Importing {source}...")

            tmp_object_path = lfs_tmp_path / str(uuid.uuid4())
            oid, size = import_func(source, tmp_object_path)
            actual_object_path = get_local_path_from_lfs_hash(repo, oid)
            actual_object_path.parents[0].mkdir(parents=True, exist_ok=True)
            tmp_object_path.rename(actual_object_path)

            # TODO - is this the right prefix and name?
            tilename = os.path.splitext(
                os.path.basename(source))[0] + import_ext
            tile_prefix = hexhash(tilename)[0:2]
            blob_path = f"{ds_inner_path}/tile/{tile_prefix}/{tilename}"
            info = per_source_info[source]
            pointer_dict = {
                "version": "https://git-lfs.github.com/spec/v1",
                # TODO - available.<URL-IDX> <URL>
                "kart.extent.crs84": _format_array(info["crs84_envelope"]),
                "kart.extent.native": _format_array(info["native_envelope"]),
                "kart.format": kart_format,
                "kart.pc.count": info["count"],
                "oid": f"sha256:{oid}",
                "size": size,
            }
            write_blob_to_stream(proc.stdin, blob_path,
                                 dict_to_pointer_file_bytes(pointer_dict))

        write_blob_to_stream(proc.stdin, f"{ds_inner_path}/meta/schema.json",
                             json_pack(schema))
        write_blob_to_stream(
            proc.stdin,
            f"{ds_inner_path}/meta/crs/{crs_name}.wkt",
            ensure_bytes(normalise_wkt(crs_set.only())),
        )

    click.echo("Updating working copy...")
    reset_wc_if_needed(repo)

    # TODO - fix up reset code - there should be a single function you can call that updates all working copies.
    tabular_wc = repo.get_working_copy(allow_uncreated=True)
    if tabular_wc is not None:
        tabular_wc.reset(repo.head_commit)