Example #1
0
    def load_repo_config(cls, repo):
        from kart.repo import KartConfigKeys

        geometry_spec = repo.get_config_str(
            KartConfigKeys.KART_SPATIALFILTER_GEOMETRY)
        crs_spec = repo.get_config_str(KartConfigKeys.KART_SPATIALFILTER_CRS)
        if geometry_spec:
            if not crs_spec:
                raise NotFound(
                    "Spatial filter CRS is missing from config",
                    exit_code=NO_SPATIAL_FILTER,
                )
            return {"geometry": geometry_spec, "crs": crs_spec}

        ref_spec = repo.get_config_str(
            KartConfigKeys.KART_SPATIALFILTER_REFERENCE)
        oid_spec = repo.get_config_str(
            KartConfigKeys.KART_SPATIALFILTER_OBJECTID)
        if ref_spec:
            if not oid_spec:
                raise NotFound(
                    "Spatial filter object ID is missing from config",
                    exit_code=NO_SPATIAL_FILTER,
                )

            if ref_spec not in repo.references:
                click.echo(
                    f"The current spatial filter has been deleted from {ref_spec} - to unapply this filter, run: "
                    f"kart checkout --spatial-filter=",
                    err=True,
                )
            elif str(repo.references[ref_spec].resolve().target) != oid_spec:
                # TODO -  Improve handling of changed spatial filter - maybe reapply it automatically if WC is clean.
                click.echo(
                    f"The spatial filter at {ref_spec} has changed since it was applied - to apply the new filter, "
                    f"run: kart checkout --spatial-filter={ref_spec}",
                    err=True,
                )

            contents = repo[oid_spec].data.decode("utf-8")
            crs_spec, geometry_spec = ReferenceSpatialFilterSpec.split_file(
                contents)
            return {
                "reference": ref_spec,
                "objectId": oid_spec,
                "geometry": geometry_spec,
                "crs": crs_spec,
            }

        return None
Example #2
0
    def _resolve_ref_oid_blob(self, repo):
        """
        Returns a tuple (ref, oid, blob), where ref, oid and blob are the reference, object ID and blob
        indicated by self.ref_or_oid (but ref will be None if self.ref_or_oid is an object ID).
        """

        ref = None
        oid = None
        obj = None
        try:
            oid = self.ref_or_oid
            obj = repo[oid]
        except (KeyError, ValueError):
            pass

        if obj is None:
            ref = self.ref_or_oid
            if not ref.startswith("refs/"):
                ref = f"refs/filters/{ref}"

            if ref in repo.references:
                oid = str(repo.references[ref].resolve().target)
            try:
                obj = repo[oid]
            except (KeyError, ValueError):
                pass

        if obj is None or obj.type_str != "blob":
            ref_desc = " or ".join(set([oid, ref]))
            raise NotFound(
                f"No spatial filter object was found in the repository at {ref_desc}",
                exit_code=NO_SPATIAL_FILTER,
            )

        return ref, oid, obj
Example #3
0
    def validate_table(self, table):
        """
        Find the db-schema and the table, given a table name that the user supplied.
        The table-name might be in the format "DBSCHEMA.TABLE" or it might just be the table name.
        OGR can find the table even if the db_schema is not specified, at least in certain circumstances,
        so we try to do that too.
        """

        all_tables = self.get_tables().keys()
        if table in all_tables:
            if (self.db_schema is None and "." in table
                    and self.db_type is not DbType.GPKG):
                db_schema, table = table.split(".", maxsplit=1)
                return db_schema, table
            else:
                return self.db_schema, table

        if self.db_schema is None and self.db_type is not DbType.GPKG:
            with self.engine.connect() as conn:
                db_schemas = self.db_class.db_schema_searchpath(conn)
            for db_schema in db_schemas:
                if f"{db_schema}.{table}" in all_tables:
                    return db_schema, table

        raise NotFound(
            f"Table '{table}' not found",
            exit_code=NO_TABLE,
        )
Example #4
0
 def get_sqlserver_driver(cls):
     """Return the name of the SQL Server driver."""
     drivers = cls.get_odbc_drivers()
     mssql_drivers = [
         d for d in drivers
         if re.search("SQL Server", d, flags=re.IGNORECASE)
     ]
     if not mssql_drivers:
         raise NotFound(
             f"ODBC Driver for SQL Server is required but was not found.\nSee {cls.INSTALL_DOC_URL}",
             exit_code=NO_DRIVER,
         )
     return sorted(mssql_drivers)[-1]  # Latest driver
Example #5
0
    def get_odbc_drivers(cls):
        """Returns a list of names of all ODBC drivers."""
        try:
            import pyodbc
        except ImportError as e:
            # this likely means unixODBC isn't installed. But since the MSSQL
            # drivers on macOS/Linux depend on it then it'll be installed with them.
            L.debug("pyodbc import error: %s", e)
            raise NotFound(
                f"ODBC support for SQL Server is required but was not found.\nSee {cls.INSTALL_DOC_URL}",
                exit_code=NO_DRIVER,
            )

        return pyodbc.drivers()
Example #6
0
    def prompt_for_table(self, prompt):
        table_list = list(self.get_tables().keys())

        if len(table_list) == 1:
            return table_list[0]
        else:
            self.print_table_list()
            if get_input_mode() == InputMode.NO_INPUT:
                raise NotFound("No table specified", exit_code=NO_TABLE)
            t_choices = click.Choice(choices=table_list)
            t_default = table_list[0] if len(table_list) == 1 else None
            return click.prompt(
                f"\n{prompt}",
                type=t_choices,
                show_choices=False,
                default=t_default,
            )
Example #7
0
    def open(cls, spec, table=None):
        db_type = DbType.from_spec(spec)
        if db_type is None:
            raise cls._bad_import_source_spec(spec)

        if db_type.clearly_doesnt_exist(spec):
            raise NotFound(f"Couldn't find '{spec}'",
                           exit_code=NO_IMPORT_SOURCE)

        path_length = db_type.path_length(spec)
        longest_allowed_path_length = (db_type.path_length_for_table
                                       if not table else
                                       db_type.path_length_for_table_container)
        shortest_allowed_path_length = max(
            db_type.path_length_for_table_container - 1, 0)

        if not (shortest_allowed_path_length <= path_length <=
                longest_allowed_path_length):
            raise cls._bad_import_source_spec(spec)

        connect_url = spec
        db_schema = None

        # Handle the case where specification already points to a single table.
        if path_length == db_type.path_length_for_table:
            connect_url, table = separate_last_path_part(connect_url)
            path_length -= 1

        # Handle the case where specification points to a database schema (or similar).
        if path_length > shortest_allowed_path_length:
            connect_url, db_schema = separate_last_path_part(connect_url)

        engine = db_type.class_.create_engine(connect_url)
        return SqlAlchemyTableImportSource(spec,
                                           db_type=db_type,
                                           engine=engine,
                                           db_schema=db_schema,
                                           table=table)
Example #8
0
def point_cloud_import(ctx, convert_to_copc, ds_path, sources):
    """
    Experimental command for importing point cloud datasets. Work-in-progress.
    Will eventually be merged with the main `import` command.

    SOURCES should be one or more LAZ or LAS files (or wildcards that match multiple LAZ or LAS files).
    """
    import pdal

    repo = ctx.obj.repo

    # TODO - improve path validation to make sure datasets of any type don't collide with each other
    # or with attachments.
    validate_dataset_paths([ds_path])

    for source in sources:
        if not (Path() / source).is_file():
            raise NotFound(f"No data found at {source}",
                           exit_code=NO_IMPORT_SOURCE)

    compressed_set = ListBasedSet()
    version_set = ListBasedSet()
    copc_version_set = ListBasedSet()
    pdrf_set = ListBasedSet()
    pdr_length_set = ListBasedSet()
    crs_set = ListBasedSet()
    transform = None
    schema = None
    crs_name = None

    per_source_info = {}

    for source in sources:
        click.echo(f"Checking {source}...          \r", nl=False)
        config = [{
            "type": "readers.las",
            "filename": source,
            "count": 0,  # Don't read any individual points.
        }]
        if schema is None:
            config.append({"type": "filters.info"})

        pipeline = pdal.Pipeline(json.dumps(config))
        try:
            pipeline.execute()
        except RuntimeError:
            raise InvalidOperation(f"Error reading {source}",
                                   exit_code=INVALID_FILE_FORMAT)

        metadata = _unwrap_metadata(pipeline.metadata)

        info = metadata["readers.las"]

        compressed_set.add(info["compressed"])
        if len(compressed_set) > 1:
            raise _non_homogenous_error("filetype", "LAS vs LAZ")

        version = f"{info['major_version']}.{info['minor_version']}"
        version_set.add(version)
        if len(version_set) > 1:
            raise _non_homogenous_error("version", version_set)

        copc_version_set.add(get_copc_version(info))
        if len(copc_version_set) > 1:
            raise _non_homogenous_error("COPC version", copc_version_set)

        pdrf_set.add(info["dataformat_id"])
        if len(pdrf_set) > 1:
            raise _non_homogenous_error("Point Data Record Format", pdrf_set)

        pdr_length_set.add(info["point_length"])
        if len(pdr_length_set) > 1:
            raise _non_homogenous_error("Point Data Record Length",
                                        pdr_length_set)

        crs_set.add(info["srs"]["wkt"])
        if len(crs_set) > 1:
            raise _non_homogenous_error(
                "CRS",
                "\n vs \n".join((format_wkt_for_output(wkt, sys.stderr)
                                 for wkt in crs_set)),
            )

        if transform is None:
            transform = _make_transform_to_crs84(crs_set.only())

        native_envelope = get_native_envelope(info)
        crs84_envelope = _transform_3d_envelope(transform, native_envelope)
        per_source_info[source] = {
            "count": info["count"],
            "native_envelope": native_envelope,
            "crs84_envelope": crs84_envelope,
        }

        if schema is None:
            crs_name = get_identifier_str(crs_set.only())
            schema = metadata["filters.info"]["schema"]
            schema["CRS"] = crs_name

    click.echo()

    version = version_set.only()
    copc_version = copc_version_set.only()
    is_laz = compressed_set.only() is True
    is_copc = is_laz and copc_version != NOT_COPC

    if is_copc:
        # Keep native format.
        import_func = get_hash_and_size_of_file_while_copying
        kart_format = f"pc:v1/copc-{copc_version}.0"
    elif is_laz:
        # Optionally Convert to COPC 1.0 if requested
        import_func = (_convert_tile_to_copc_lfs_blob if convert_to_copc else
                       get_hash_and_size_of_file_while_copying)
        kart_format = "pc:v1/copc-1.0" if convert_to_copc else f"pc:v1/laz-{version}"
    else:  # LAS
        if not convert_to_copc:
            raise InvalidOperation(
                "LAS datasets are not supported - dataset must be converted to LAZ / COPC",
                exit_code=INVALID_FILE_FORMAT,
            )
        import_func = _convert_tile_to_copc_lfs_blob
        kart_format = "pc:v1/copc-1.0"

    import_ext = ".copc.laz" if "copc" in kart_format else ".laz"

    # Set up LFS hooks.
    # TODO: This could eventually be moved to `kart init`.
    if not (repo.gitdir_path / "hooks" / "pre-push").is_file():
        subprocess.check_call(
            ["git", "-C",
             str(repo.gitdir_path), "lfs", "install", "hooks"])

    # We still need to write .kart.repostructure.version unfortunately, even though it's only relevant to tabular datasets.
    assert repo.table_dataset_version in SUPPORTED_VERSIONS
    extra_blobs = (extra_blobs_for_version(repo.table_dataset_version)
                   if not repo.head_commit else [])

    header = generate_header(
        repo,
        None,
        f"Importing {len(sources)} LAZ tiles as {ds_path}",
        repo.head_branch,
        repo.head_commit,
    )

    ds_inner_path = f"{ds_path}/.point-cloud-dataset.v1"

    lfs_tmp_path = repo.gitdir_path / "lfs" / "objects" / "tmp"
    lfs_tmp_path.mkdir(parents=True, exist_ok=True)

    with git_fast_import(repo,
                         *FastImportSettings().as_args(), "--quiet") as proc:
        proc.stdin.write(header.encode("utf8"))

        for i, blob_path in write_blobs_to_stream(proc.stdin, extra_blobs):
            pass

        for source in sources:
            click.echo(f"Importing {source}...")

            tmp_object_path = lfs_tmp_path / str(uuid.uuid4())
            oid, size = import_func(source, tmp_object_path)
            actual_object_path = get_local_path_from_lfs_hash(repo, oid)
            actual_object_path.parents[0].mkdir(parents=True, exist_ok=True)
            tmp_object_path.rename(actual_object_path)

            # TODO - is this the right prefix and name?
            tilename = os.path.splitext(
                os.path.basename(source))[0] + import_ext
            tile_prefix = hexhash(tilename)[0:2]
            blob_path = f"{ds_inner_path}/tile/{tile_prefix}/{tilename}"
            info = per_source_info[source]
            pointer_dict = {
                "version": "https://git-lfs.github.com/spec/v1",
                # TODO - available.<URL-IDX> <URL>
                "kart.extent.crs84": _format_array(info["crs84_envelope"]),
                "kart.extent.native": _format_array(info["native_envelope"]),
                "kart.format": kart_format,
                "kart.pc.count": info["count"],
                "oid": f"sha256:{oid}",
                "size": size,
            }
            write_blob_to_stream(proc.stdin, blob_path,
                                 dict_to_pointer_file_bytes(pointer_dict))

        write_blob_to_stream(proc.stdin, f"{ds_inner_path}/meta/schema.json",
                             json_pack(schema))
        write_blob_to_stream(
            proc.stdin,
            f"{ds_inner_path}/meta/crs/{crs_name}.wkt",
            ensure_bytes(normalise_wkt(crs_set.only())),
        )

    click.echo("Updating working copy...")
    reset_wc_if_needed(repo)

    # TODO - fix up reset code - there should be a single function you can call that updates all working copies.
    tabular_wc = repo.get_working_copy(allow_uncreated=True)
    if tabular_wc is not None:
        tabular_wc.reset(repo.head_commit)