def load_repo_config(cls, repo): from kart.repo import KartConfigKeys geometry_spec = repo.get_config_str( KartConfigKeys.KART_SPATIALFILTER_GEOMETRY) crs_spec = repo.get_config_str(KartConfigKeys.KART_SPATIALFILTER_CRS) if geometry_spec: if not crs_spec: raise NotFound( "Spatial filter CRS is missing from config", exit_code=NO_SPATIAL_FILTER, ) return {"geometry": geometry_spec, "crs": crs_spec} ref_spec = repo.get_config_str( KartConfigKeys.KART_SPATIALFILTER_REFERENCE) oid_spec = repo.get_config_str( KartConfigKeys.KART_SPATIALFILTER_OBJECTID) if ref_spec: if not oid_spec: raise NotFound( "Spatial filter object ID is missing from config", exit_code=NO_SPATIAL_FILTER, ) if ref_spec not in repo.references: click.echo( f"The current spatial filter has been deleted from {ref_spec} - to unapply this filter, run: " f"kart checkout --spatial-filter=", err=True, ) elif str(repo.references[ref_spec].resolve().target) != oid_spec: # TODO - Improve handling of changed spatial filter - maybe reapply it automatically if WC is clean. click.echo( f"The spatial filter at {ref_spec} has changed since it was applied - to apply the new filter, " f"run: kart checkout --spatial-filter={ref_spec}", err=True, ) contents = repo[oid_spec].data.decode("utf-8") crs_spec, geometry_spec = ReferenceSpatialFilterSpec.split_file( contents) return { "reference": ref_spec, "objectId": oid_spec, "geometry": geometry_spec, "crs": crs_spec, } return None
def _resolve_ref_oid_blob(self, repo): """ Returns a tuple (ref, oid, blob), where ref, oid and blob are the reference, object ID and blob indicated by self.ref_or_oid (but ref will be None if self.ref_or_oid is an object ID). """ ref = None oid = None obj = None try: oid = self.ref_or_oid obj = repo[oid] except (KeyError, ValueError): pass if obj is None: ref = self.ref_or_oid if not ref.startswith("refs/"): ref = f"refs/filters/{ref}" if ref in repo.references: oid = str(repo.references[ref].resolve().target) try: obj = repo[oid] except (KeyError, ValueError): pass if obj is None or obj.type_str != "blob": ref_desc = " or ".join(set([oid, ref])) raise NotFound( f"No spatial filter object was found in the repository at {ref_desc}", exit_code=NO_SPATIAL_FILTER, ) return ref, oid, obj
def validate_table(self, table): """ Find the db-schema and the table, given a table name that the user supplied. The table-name might be in the format "DBSCHEMA.TABLE" or it might just be the table name. OGR can find the table even if the db_schema is not specified, at least in certain circumstances, so we try to do that too. """ all_tables = self.get_tables().keys() if table in all_tables: if (self.db_schema is None and "." in table and self.db_type is not DbType.GPKG): db_schema, table = table.split(".", maxsplit=1) return db_schema, table else: return self.db_schema, table if self.db_schema is None and self.db_type is not DbType.GPKG: with self.engine.connect() as conn: db_schemas = self.db_class.db_schema_searchpath(conn) for db_schema in db_schemas: if f"{db_schema}.{table}" in all_tables: return db_schema, table raise NotFound( f"Table '{table}' not found", exit_code=NO_TABLE, )
def get_sqlserver_driver(cls): """Return the name of the SQL Server driver.""" drivers = cls.get_odbc_drivers() mssql_drivers = [ d for d in drivers if re.search("SQL Server", d, flags=re.IGNORECASE) ] if not mssql_drivers: raise NotFound( f"ODBC Driver for SQL Server is required but was not found.\nSee {cls.INSTALL_DOC_URL}", exit_code=NO_DRIVER, ) return sorted(mssql_drivers)[-1] # Latest driver
def get_odbc_drivers(cls): """Returns a list of names of all ODBC drivers.""" try: import pyodbc except ImportError as e: # this likely means unixODBC isn't installed. But since the MSSQL # drivers on macOS/Linux depend on it then it'll be installed with them. L.debug("pyodbc import error: %s", e) raise NotFound( f"ODBC support for SQL Server is required but was not found.\nSee {cls.INSTALL_DOC_URL}", exit_code=NO_DRIVER, ) return pyodbc.drivers()
def prompt_for_table(self, prompt): table_list = list(self.get_tables().keys()) if len(table_list) == 1: return table_list[0] else: self.print_table_list() if get_input_mode() == InputMode.NO_INPUT: raise NotFound("No table specified", exit_code=NO_TABLE) t_choices = click.Choice(choices=table_list) t_default = table_list[0] if len(table_list) == 1 else None return click.prompt( f"\n{prompt}", type=t_choices, show_choices=False, default=t_default, )
def open(cls, spec, table=None): db_type = DbType.from_spec(spec) if db_type is None: raise cls._bad_import_source_spec(spec) if db_type.clearly_doesnt_exist(spec): raise NotFound(f"Couldn't find '{spec}'", exit_code=NO_IMPORT_SOURCE) path_length = db_type.path_length(spec) longest_allowed_path_length = (db_type.path_length_for_table if not table else db_type.path_length_for_table_container) shortest_allowed_path_length = max( db_type.path_length_for_table_container - 1, 0) if not (shortest_allowed_path_length <= path_length <= longest_allowed_path_length): raise cls._bad_import_source_spec(spec) connect_url = spec db_schema = None # Handle the case where specification already points to a single table. if path_length == db_type.path_length_for_table: connect_url, table = separate_last_path_part(connect_url) path_length -= 1 # Handle the case where specification points to a database schema (or similar). if path_length > shortest_allowed_path_length: connect_url, db_schema = separate_last_path_part(connect_url) engine = db_type.class_.create_engine(connect_url) return SqlAlchemyTableImportSource(spec, db_type=db_type, engine=engine, db_schema=db_schema, table=table)
def point_cloud_import(ctx, convert_to_copc, ds_path, sources): """ Experimental command for importing point cloud datasets. Work-in-progress. Will eventually be merged with the main `import` command. SOURCES should be one or more LAZ or LAS files (or wildcards that match multiple LAZ or LAS files). """ import pdal repo = ctx.obj.repo # TODO - improve path validation to make sure datasets of any type don't collide with each other # or with attachments. validate_dataset_paths([ds_path]) for source in sources: if not (Path() / source).is_file(): raise NotFound(f"No data found at {source}", exit_code=NO_IMPORT_SOURCE) compressed_set = ListBasedSet() version_set = ListBasedSet() copc_version_set = ListBasedSet() pdrf_set = ListBasedSet() pdr_length_set = ListBasedSet() crs_set = ListBasedSet() transform = None schema = None crs_name = None per_source_info = {} for source in sources: click.echo(f"Checking {source}... \r", nl=False) config = [{ "type": "readers.las", "filename": source, "count": 0, # Don't read any individual points. }] if schema is None: config.append({"type": "filters.info"}) pipeline = pdal.Pipeline(json.dumps(config)) try: pipeline.execute() except RuntimeError: raise InvalidOperation(f"Error reading {source}", exit_code=INVALID_FILE_FORMAT) metadata = _unwrap_metadata(pipeline.metadata) info = metadata["readers.las"] compressed_set.add(info["compressed"]) if len(compressed_set) > 1: raise _non_homogenous_error("filetype", "LAS vs LAZ") version = f"{info['major_version']}.{info['minor_version']}" version_set.add(version) if len(version_set) > 1: raise _non_homogenous_error("version", version_set) copc_version_set.add(get_copc_version(info)) if len(copc_version_set) > 1: raise _non_homogenous_error("COPC version", copc_version_set) pdrf_set.add(info["dataformat_id"]) if len(pdrf_set) > 1: raise _non_homogenous_error("Point Data Record Format", pdrf_set) pdr_length_set.add(info["point_length"]) if len(pdr_length_set) > 1: raise _non_homogenous_error("Point Data Record Length", pdr_length_set) crs_set.add(info["srs"]["wkt"]) if len(crs_set) > 1: raise _non_homogenous_error( "CRS", "\n vs \n".join((format_wkt_for_output(wkt, sys.stderr) for wkt in crs_set)), ) if transform is None: transform = _make_transform_to_crs84(crs_set.only()) native_envelope = get_native_envelope(info) crs84_envelope = _transform_3d_envelope(transform, native_envelope) per_source_info[source] = { "count": info["count"], "native_envelope": native_envelope, "crs84_envelope": crs84_envelope, } if schema is None: crs_name = get_identifier_str(crs_set.only()) schema = metadata["filters.info"]["schema"] schema["CRS"] = crs_name click.echo() version = version_set.only() copc_version = copc_version_set.only() is_laz = compressed_set.only() is True is_copc = is_laz and copc_version != NOT_COPC if is_copc: # Keep native format. import_func = get_hash_and_size_of_file_while_copying kart_format = f"pc:v1/copc-{copc_version}.0" elif is_laz: # Optionally Convert to COPC 1.0 if requested import_func = (_convert_tile_to_copc_lfs_blob if convert_to_copc else get_hash_and_size_of_file_while_copying) kart_format = "pc:v1/copc-1.0" if convert_to_copc else f"pc:v1/laz-{version}" else: # LAS if not convert_to_copc: raise InvalidOperation( "LAS datasets are not supported - dataset must be converted to LAZ / COPC", exit_code=INVALID_FILE_FORMAT, ) import_func = _convert_tile_to_copc_lfs_blob kart_format = "pc:v1/copc-1.0" import_ext = ".copc.laz" if "copc" in kart_format else ".laz" # Set up LFS hooks. # TODO: This could eventually be moved to `kart init`. if not (repo.gitdir_path / "hooks" / "pre-push").is_file(): subprocess.check_call( ["git", "-C", str(repo.gitdir_path), "lfs", "install", "hooks"]) # We still need to write .kart.repostructure.version unfortunately, even though it's only relevant to tabular datasets. assert repo.table_dataset_version in SUPPORTED_VERSIONS extra_blobs = (extra_blobs_for_version(repo.table_dataset_version) if not repo.head_commit else []) header = generate_header( repo, None, f"Importing {len(sources)} LAZ tiles as {ds_path}", repo.head_branch, repo.head_commit, ) ds_inner_path = f"{ds_path}/.point-cloud-dataset.v1" lfs_tmp_path = repo.gitdir_path / "lfs" / "objects" / "tmp" lfs_tmp_path.mkdir(parents=True, exist_ok=True) with git_fast_import(repo, *FastImportSettings().as_args(), "--quiet") as proc: proc.stdin.write(header.encode("utf8")) for i, blob_path in write_blobs_to_stream(proc.stdin, extra_blobs): pass for source in sources: click.echo(f"Importing {source}...") tmp_object_path = lfs_tmp_path / str(uuid.uuid4()) oid, size = import_func(source, tmp_object_path) actual_object_path = get_local_path_from_lfs_hash(repo, oid) actual_object_path.parents[0].mkdir(parents=True, exist_ok=True) tmp_object_path.rename(actual_object_path) # TODO - is this the right prefix and name? tilename = os.path.splitext( os.path.basename(source))[0] + import_ext tile_prefix = hexhash(tilename)[0:2] blob_path = f"{ds_inner_path}/tile/{tile_prefix}/{tilename}" info = per_source_info[source] pointer_dict = { "version": "https://git-lfs.github.com/spec/v1", # TODO - available.<URL-IDX> <URL> "kart.extent.crs84": _format_array(info["crs84_envelope"]), "kart.extent.native": _format_array(info["native_envelope"]), "kart.format": kart_format, "kart.pc.count": info["count"], "oid": f"sha256:{oid}", "size": size, } write_blob_to_stream(proc.stdin, blob_path, dict_to_pointer_file_bytes(pointer_dict)) write_blob_to_stream(proc.stdin, f"{ds_inner_path}/meta/schema.json", json_pack(schema)) write_blob_to_stream( proc.stdin, f"{ds_inner_path}/meta/crs/{crs_name}.wkt", ensure_bytes(normalise_wkt(crs_set.only())), ) click.echo("Updating working copy...") reset_wc_if_needed(repo) # TODO - fix up reset code - there should be a single function you can call that updates all working copies. tabular_wc = repo.get_working_copy(allow_uncreated=True) if tabular_wc is not None: tabular_wc.reset(repo.head_commit)