def all_v2_meta_items_from_gpkg_meta_items(cls, gpkg_meta_items, id_salt=None, include_metadata_json=False): """ Generate all the V2 meta items from the given gpkg_meta_items lists / dicts - either loaded from JSON, or generated directly from the database. Varying the id_salt varies the ids that are generated for the schema.json item. """ title = cls._nested_get(gpkg_meta_items, "gpkg_contents", "identifier") description = cls._nested_get(gpkg_meta_items, "gpkg_contents", "description") yield "title", title yield "description", description id_salt = id_salt or cls._nested_get(gpkg_meta_items, "gpkg_contents", "table_name") schema = cls._gpkg_to_v2_schema(gpkg_meta_items, id_salt) yield "schema.json", schema.to_column_dicts() if schema else None if include_metadata_json: yield "metadata/dataset.json", cls.gpkg_to_json_metadata( gpkg_meta_items) yield "metadata.xml", cls.gpkg_to_xml_metadata(gpkg_meta_items) gpkg_spatial_ref_sys = gpkg_meta_items.get("gpkg_spatial_ref_sys") for gsrs in gpkg_spatial_ref_sys: d = gsrs["definition"] if not d or d == "undefined": continue id_str = crs_util.get_identifier_str(d) yield f"crs/{id_str}.wkt", crs_util.normalise_wkt(d)
def crs_from_oid(self, crs_oid): wkt = normalise_wkt(self.repo[crs_oid].data.decode("utf-8")) result = make_crs(wkt) for prior_result in self._distinct_crs_list: if result.IsSame(prior_result): return prior_result self._distinct_crs_list.append(result) return result
def encode_to_bytes(self, meta_item): if meta_item is None: return meta_item if self == self.JSON: return json_pack(meta_item) elif self == self.WKT: return ensure_bytes(crs_util.normalise_wkt(meta_item)) return ensure_bytes(meta_item)
def decode_from_bytes(self, data): if data is None: return None if self == self.BYTES: return data elif self in (self.TEXT, self.XML): return ensure_text(data) elif self == self.JSON: return json_unpack(data) elif self == self.WKT: return crs_util.normalise_wkt(ensure_text(data)) else: try: return ensure_text(data) except UnicodeDecodeError: return binascii.hexlify(data).decode()
def all_v2_meta_items_including_empty(cls, sess, db_schema, table_name, id_salt, include_legacy_items=False): """ Generate all V2 meta items for the given table. Varying the id_salt varies the ids that are generated for the schema.json item. """ table_identifier = cls.quote_table(db_schema=db_schema, table_name=table_name) title = sess.scalar( "SELECT obj_description((:table_identifier)::regclass, 'pg_class');", {"table_identifier": table_identifier}, ) yield "title", title primary_key_sql = """ SELECT KCU.* FROM information_schema.key_column_usage KCU INNER JOIN information_schema.table_constraints TC ON KCU.constraint_schema = TC.constraint_schema AND KCU.constraint_name = TC.constraint_name WHERE TC.constraint_type = 'PRIMARY KEY' """ table_info_sql = f""" SELECT C.column_name, C.ordinal_position, C.data_type, C.udt_name, C.character_maximum_length, C.numeric_precision, C.numeric_scale, PK.ordinal_position AS pk_ordinal_position, upper(postgis_typmod_type(A.atttypmod)) AS geometry_type, postgis_typmod_srid(A.atttypmod) AS geometry_srid FROM information_schema.columns C LEFT OUTER JOIN ({primary_key_sql}) PK ON (PK.table_schema = C.table_schema) AND (PK.table_name = C.table_name) AND (PK.column_name = C.column_name) LEFT OUTER JOIN pg_attribute A ON (A.attname = C.column_name) AND (A.attrelid = (:table_identifier)::regclass::oid) WHERE C.table_schema=:table_schema AND C.table_name=:table_name ORDER BY C.ordinal_position; """ r = sess.execute( table_info_sql, { "table_identifier": table_identifier, "table_schema": db_schema, "table_name": table_name, }, ) pg_table_info = list(r) # Get all the information on the geometry columns that we can get without sampling the geometries: geom_cols_info_sql = """ SELECT GC.f_geometry_column AS column_name, GC.srid, SRS.srtext FROM geometry_columns GC LEFT OUTER JOIN spatial_ref_sys SRS ON (GC.srid = SRS.srid) WHERE GC.f_table_schema=:table_schema AND GC.f_table_name=:table_name; """ r = sess.execute( geom_cols_info_sql, { "table_schema": db_schema, "table_name": table_name }, ) geom_cols_info = [cls._filter_row_to_dict(row) for row in r] # Improve the geometry information by sampling one geometry from each column, where available. for col_info in geom_cols_info: c = col_info["column_name"] row = sess.execute( f""" SELECT ST_Zmflag({cls.quote(c)}) AS zm, ST_SRID({cls.quote(c)}) AS srid, SRS.srtext FROM {table_identifier} LEFT OUTER JOIN spatial_ref_sys SRS ON SRS.srid = ST_SRID({cls.quote(c)}) WHERE {cls.quote(c)} IS NOT NULL LIMIT 1; """, ).fetchone() if row: sampled_info = cls._filter_row_to_dict(row) sampled_info["zm"] = cls.ZM_FLAG_TO_STRING.get( sampled_info.get("zm")) # Original col_info from geometry_columns takes precedence, where it exists: col_info.update({**sampled_info, **col_info}) schema = cls.postgis_to_v2_schema(pg_table_info, geom_cols_info, id_salt) yield "schema.json", schema.to_column_dicts() if schema else None for col_info in geom_cols_info: try: wkt = col_info["srtext"] except KeyError: # no CRS defined for this geometry column continue id_str = crs_util.get_identifier_str(wkt) yield f"crs/{id_str}.wkt", crs_util.normalise_wkt(wkt)
def all_v2_meta_items_including_empty( cls, sess, db_schema, table_name, id_salt=None, include_legacy_items=False ): """ Generate all V2 meta items for the given table. Varying the id_salt varies the ids that are generated for the schema.json item. """ title = sess.scalar( """ SELECT CAST(value AS NVARCHAR) FROM::fn_listextendedproperty( 'MS_Description', 'schema', :schema, 'table', :table, null, null); """, {"schema": db_schema, "table": table_name}, ) yield "title", title primary_key_sql = """ SELECT KCU.* FROM information_schema.key_column_usage KCU INNER JOIN information_schema.table_constraints TC ON KCU.constraint_schema = TC.constraint_schema AND KCU.constraint_name = TC.constraint_name WHERE TC.constraint_type = 'PRIMARY KEY' """ table_info_sql = f""" SELECT C.column_name, C.ordinal_position, C.data_type, C.character_maximum_length, C.numeric_precision, C.numeric_scale, PK.ordinal_position AS pk_ordinal_position FROM information_schema.columns C LEFT OUTER JOIN ({primary_key_sql}) PK ON (PK.table_schema = C.table_schema) AND (PK.table_name = C.table_name) AND (PK.column_name = C.column_name) WHERE C.table_schema=:table_schema AND C.table_name=:table_name ORDER BY C.ordinal_position; """ r = sess.execute( table_info_sql, {"table_schema": db_schema, "table_name": table_name}, ) ms_table_info = list(r) geom_cols = [ row["column_name"] for row in ms_table_info if row["data_type"] in ("geometry", "geography") ] table_identifier = cls.quote_table(db_schema=db_schema, table_name=table_name) ms_spatial_ref_sys = [ sess.execute( f""" SELECT TOP 1 :column_name AS column_name, {cls.quote(g)}.STSrid AS srid, SRS.* FROM {table_identifier} LEFT OUTER JOIN sys.spatial_reference_systems SRS ON SRS.spatial_reference_id = {cls.quote(g)}.STSrid WHERE {cls.quote(g)} IS NOT NULL; """, {"column_name": g}, ).fetchone() for g in geom_cols ] ms_spatial_ref_sys = list(filter(None, ms_spatial_ref_sys)) # Remove nulls. schema = KartAdapter_SqlServer.sqlserver_to_v2_schema( ms_table_info, ms_spatial_ref_sys, id_salt ) yield "schema.json", schema.to_column_dicts() if schema else None for crs_info in ms_spatial_ref_sys: auth_name = crs_info["authority_name"] auth_code = crs_info["authorized_spatial_reference_id"] if not auth_name and not auth_code: auth_name, auth_code = "CUSTOM", crs_info["srid"] wkt = crs_info["well_known_text"] or "" yield f"crs/{auth_name}:{auth_code}.wkt", crs_util.normalise_wkt( crs_util.ensure_authority_specified(wkt, auth_name, auth_code) )
def all_v2_meta_items_including_empty(cls, sess, db_schema, table_name, id_salt=None, include_legacy_items=False): title = sess.scalar( """ SELECT table_comment FROM information_schema.tables WHERE table_schema=:table_schema AND table_name=:table_name; """, { "table_schema": db_schema, "table_name": table_name }, ) yield "title", title # Primary key SQL is a bit different for MySQL since constraints are named within the namespace of a table - # they don't names that are globally unique within the db-schema. primary_key_sql = """ SELECT KCU.* FROM information_schema.key_column_usage KCU INNER JOIN information_schema.table_constraints TC ON KCU.table_schema = TC.table_schema AND KCU.table_name = TC.table_name AND KCU.constraint_schema = TC.constraint_schema AND KCU.constraint_name = TC.constraint_name WHERE TC.constraint_type = 'PRIMARY KEY' """ table_info_sql = f""" SELECT C.column_name, C.ordinal_position, C.data_type, C.srs_id, C.character_maximum_length, C.numeric_precision, C.numeric_scale, PK.ordinal_position AS pk_ordinal_position FROM information_schema.columns C LEFT OUTER JOIN ({primary_key_sql}) PK ON (PK.table_schema = C.table_schema) AND (PK.table_name = C.table_name) AND (PK.column_name = C.column_name) WHERE C.table_schema=:table_schema AND C.table_name=:table_name ORDER BY C.ordinal_position; """ r = sess.execute( table_info_sql, { "table_schema": db_schema, "table_name": table_name }, ) mysql_table_info = list(r) spatial_ref_sys_sql = """ SELECT SRS.* FROM information_schema.st_spatial_reference_systems SRS LEFT OUTER JOIN information_schema.st_geometry_columns GC ON (GC.srs_id = SRS.srs_id) WHERE GC.table_schema=:table_schema AND GC.table_name=:table_name; """ r = sess.execute( spatial_ref_sys_sql, { "table_schema": db_schema, "table_name": table_name }, ) mysql_spatial_ref_sys = list(r) schema = KartAdapter_MySql.sqlserver_to_v2_schema( mysql_table_info, mysql_spatial_ref_sys, id_salt) yield "schema.json", schema.to_column_dicts() for crs_info in mysql_spatial_ref_sys: wkt = crs_info["DEFINITION"] id_str = crs_util.get_identifier_str(wkt) yield f"crs/{id_str}.wkt", crs_util.normalise_wkt(wkt)
def point_cloud_import(ctx, convert_to_copc, ds_path, sources): """ Experimental command for importing point cloud datasets. Work-in-progress. Will eventually be merged with the main `import` command. SOURCES should be one or more LAZ or LAS files (or wildcards that match multiple LAZ or LAS files). """ import pdal repo = ctx.obj.repo # TODO - improve path validation to make sure datasets of any type don't collide with each other # or with attachments. validate_dataset_paths([ds_path]) for source in sources: if not (Path() / source).is_file(): raise NotFound(f"No data found at {source}", exit_code=NO_IMPORT_SOURCE) compressed_set = ListBasedSet() version_set = ListBasedSet() copc_version_set = ListBasedSet() pdrf_set = ListBasedSet() pdr_length_set = ListBasedSet() crs_set = ListBasedSet() transform = None schema = None crs_name = None per_source_info = {} for source in sources: click.echo(f"Checking {source}... \r", nl=False) config = [{ "type": "readers.las", "filename": source, "count": 0, # Don't read any individual points. }] if schema is None: config.append({"type": "filters.info"}) pipeline = pdal.Pipeline(json.dumps(config)) try: pipeline.execute() except RuntimeError: raise InvalidOperation(f"Error reading {source}", exit_code=INVALID_FILE_FORMAT) metadata = _unwrap_metadata(pipeline.metadata) info = metadata["readers.las"] compressed_set.add(info["compressed"]) if len(compressed_set) > 1: raise _non_homogenous_error("filetype", "LAS vs LAZ") version = f"{info['major_version']}.{info['minor_version']}" version_set.add(version) if len(version_set) > 1: raise _non_homogenous_error("version", version_set) copc_version_set.add(get_copc_version(info)) if len(copc_version_set) > 1: raise _non_homogenous_error("COPC version", copc_version_set) pdrf_set.add(info["dataformat_id"]) if len(pdrf_set) > 1: raise _non_homogenous_error("Point Data Record Format", pdrf_set) pdr_length_set.add(info["point_length"]) if len(pdr_length_set) > 1: raise _non_homogenous_error("Point Data Record Length", pdr_length_set) crs_set.add(info["srs"]["wkt"]) if len(crs_set) > 1: raise _non_homogenous_error( "CRS", "\n vs \n".join((format_wkt_for_output(wkt, sys.stderr) for wkt in crs_set)), ) if transform is None: transform = _make_transform_to_crs84(crs_set.only()) native_envelope = get_native_envelope(info) crs84_envelope = _transform_3d_envelope(transform, native_envelope) per_source_info[source] = { "count": info["count"], "native_envelope": native_envelope, "crs84_envelope": crs84_envelope, } if schema is None: crs_name = get_identifier_str(crs_set.only()) schema = metadata["filters.info"]["schema"] schema["CRS"] = crs_name click.echo() version = version_set.only() copc_version = copc_version_set.only() is_laz = compressed_set.only() is True is_copc = is_laz and copc_version != NOT_COPC if is_copc: # Keep native format. import_func = get_hash_and_size_of_file_while_copying kart_format = f"pc:v1/copc-{copc_version}.0" elif is_laz: # Optionally Convert to COPC 1.0 if requested import_func = (_convert_tile_to_copc_lfs_blob if convert_to_copc else get_hash_and_size_of_file_while_copying) kart_format = "pc:v1/copc-1.0" if convert_to_copc else f"pc:v1/laz-{version}" else: # LAS if not convert_to_copc: raise InvalidOperation( "LAS datasets are not supported - dataset must be converted to LAZ / COPC", exit_code=INVALID_FILE_FORMAT, ) import_func = _convert_tile_to_copc_lfs_blob kart_format = "pc:v1/copc-1.0" import_ext = ".copc.laz" if "copc" in kart_format else ".laz" # Set up LFS hooks. # TODO: This could eventually be moved to `kart init`. if not (repo.gitdir_path / "hooks" / "pre-push").is_file(): subprocess.check_call( ["git", "-C", str(repo.gitdir_path), "lfs", "install", "hooks"]) # We still need to write .kart.repostructure.version unfortunately, even though it's only relevant to tabular datasets. assert repo.table_dataset_version in SUPPORTED_VERSIONS extra_blobs = (extra_blobs_for_version(repo.table_dataset_version) if not repo.head_commit else []) header = generate_header( repo, None, f"Importing {len(sources)} LAZ tiles as {ds_path}", repo.head_branch, repo.head_commit, ) ds_inner_path = f"{ds_path}/.point-cloud-dataset.v1" lfs_tmp_path = repo.gitdir_path / "lfs" / "objects" / "tmp" lfs_tmp_path.mkdir(parents=True, exist_ok=True) with git_fast_import(repo, *FastImportSettings().as_args(), "--quiet") as proc: proc.stdin.write(header.encode("utf8")) for i, blob_path in write_blobs_to_stream(proc.stdin, extra_blobs): pass for source in sources: click.echo(f"Importing {source}...") tmp_object_path = lfs_tmp_path / str(uuid.uuid4()) oid, size = import_func(source, tmp_object_path) actual_object_path = get_local_path_from_lfs_hash(repo, oid) actual_object_path.parents[0].mkdir(parents=True, exist_ok=True) tmp_object_path.rename(actual_object_path) # TODO - is this the right prefix and name? tilename = os.path.splitext( os.path.basename(source))[0] + import_ext tile_prefix = hexhash(tilename)[0:2] blob_path = f"{ds_inner_path}/tile/{tile_prefix}/{tilename}" info = per_source_info[source] pointer_dict = { "version": "https://git-lfs.github.com/spec/v1", # TODO - available.<URL-IDX> <URL> "kart.extent.crs84": _format_array(info["crs84_envelope"]), "kart.extent.native": _format_array(info["native_envelope"]), "kart.format": kart_format, "kart.pc.count": info["count"], "oid": f"sha256:{oid}", "size": size, } write_blob_to_stream(proc.stdin, blob_path, dict_to_pointer_file_bytes(pointer_dict)) write_blob_to_stream(proc.stdin, f"{ds_inner_path}/meta/schema.json", json_pack(schema)) write_blob_to_stream( proc.stdin, f"{ds_inner_path}/meta/crs/{crs_name}.wkt", ensure_bytes(normalise_wkt(crs_set.only())), ) click.echo("Updating working copy...") reset_wc_if_needed(repo) # TODO - fix up reset code - there should be a single function you can call that updates all working copies. tabular_wc = repo.get_working_copy(allow_uncreated=True) if tabular_wc is not None: tabular_wc.reset(repo.head_commit)