Esempio n. 1
0
    def all_v2_meta_items_from_gpkg_meta_items(cls,
                                               gpkg_meta_items,
                                               id_salt=None,
                                               include_metadata_json=False):
        """
        Generate all the V2 meta items from the given gpkg_meta_items lists / dicts -
        either loaded from JSON, or generated directly from the database.
        Varying the id_salt varies the ids that are generated for the schema.json item.
        """

        title = cls._nested_get(gpkg_meta_items, "gpkg_contents", "identifier")
        description = cls._nested_get(gpkg_meta_items, "gpkg_contents",
                                      "description")
        yield "title", title
        yield "description", description

        id_salt = id_salt or cls._nested_get(gpkg_meta_items, "gpkg_contents",
                                             "table_name")
        schema = cls._gpkg_to_v2_schema(gpkg_meta_items, id_salt)
        yield "schema.json", schema.to_column_dicts() if schema else None

        if include_metadata_json:
            yield "metadata/dataset.json", cls.gpkg_to_json_metadata(
                gpkg_meta_items)
        yield "metadata.xml", cls.gpkg_to_xml_metadata(gpkg_meta_items)

        gpkg_spatial_ref_sys = gpkg_meta_items.get("gpkg_spatial_ref_sys")
        for gsrs in gpkg_spatial_ref_sys:
            d = gsrs["definition"]
            if not d or d == "undefined":
                continue
            id_str = crs_util.get_identifier_str(d)
            yield f"crs/{id_str}.wkt", crs_util.normalise_wkt(d)
Esempio n. 2
0
 def crs_from_oid(self, crs_oid):
     wkt = normalise_wkt(self.repo[crs_oid].data.decode("utf-8"))
     result = make_crs(wkt)
     for prior_result in self._distinct_crs_list:
         if result.IsSame(prior_result):
             return prior_result
     self._distinct_crs_list.append(result)
     return result
Esempio n. 3
0
 def encode_to_bytes(self, meta_item):
     if meta_item is None:
         return meta_item
     if self == self.JSON:
         return json_pack(meta_item)
     elif self == self.WKT:
         return ensure_bytes(crs_util.normalise_wkt(meta_item))
     return ensure_bytes(meta_item)
Esempio n. 4
0
 def decode_from_bytes(self, data):
     if data is None:
         return None
     if self == self.BYTES:
         return data
     elif self in (self.TEXT, self.XML):
         return ensure_text(data)
     elif self == self.JSON:
         return json_unpack(data)
     elif self == self.WKT:
         return crs_util.normalise_wkt(ensure_text(data))
     else:
         try:
             return ensure_text(data)
         except UnicodeDecodeError:
             return binascii.hexlify(data).decode()
Esempio n. 5
0
    def all_v2_meta_items_including_empty(cls,
                                          sess,
                                          db_schema,
                                          table_name,
                                          id_salt,
                                          include_legacy_items=False):
        """
        Generate all V2 meta items for the given table.
        Varying the id_salt varies the ids that are generated for the schema.json item.
        """
        table_identifier = cls.quote_table(db_schema=db_schema,
                                           table_name=table_name)

        title = sess.scalar(
            "SELECT obj_description((:table_identifier)::regclass, 'pg_class');",
            {"table_identifier": table_identifier},
        )
        yield "title", title

        primary_key_sql = """
            SELECT KCU.* FROM information_schema.key_column_usage KCU
            INNER JOIN information_schema.table_constraints TC
            ON KCU.constraint_schema = TC.constraint_schema
            AND KCU.constraint_name = TC.constraint_name
            WHERE TC.constraint_type = 'PRIMARY KEY'
        """

        table_info_sql = f"""
            SELECT
                C.column_name, C.ordinal_position, C.data_type, C.udt_name,
                C.character_maximum_length, C.numeric_precision, C.numeric_scale,
                PK.ordinal_position AS pk_ordinal_position,
                upper(postgis_typmod_type(A.atttypmod)) AS geometry_type,
                postgis_typmod_srid(A.atttypmod) AS geometry_srid
            FROM information_schema.columns C
            LEFT OUTER JOIN ({primary_key_sql}) PK
            ON (PK.table_schema = C.table_schema)
            AND (PK.table_name = C.table_name)
            AND (PK.column_name = C.column_name)
            LEFT OUTER JOIN pg_attribute A
            ON (A.attname = C.column_name)
            AND (A.attrelid = (:table_identifier)::regclass::oid)
            WHERE C.table_schema=:table_schema AND C.table_name=:table_name
            ORDER BY C.ordinal_position;
        """
        r = sess.execute(
            table_info_sql,
            {
                "table_identifier": table_identifier,
                "table_schema": db_schema,
                "table_name": table_name,
            },
        )
        pg_table_info = list(r)

        # Get all the information on the geometry columns that we can get without sampling the geometries:
        geom_cols_info_sql = """
            SELECT GC.f_geometry_column AS column_name, GC.srid, SRS.srtext
            FROM geometry_columns GC
            LEFT OUTER JOIN spatial_ref_sys SRS ON (GC.srid = SRS.srid)
            WHERE GC.f_table_schema=:table_schema AND GC.f_table_name=:table_name;
        """
        r = sess.execute(
            geom_cols_info_sql,
            {
                "table_schema": db_schema,
                "table_name": table_name
            },
        )
        geom_cols_info = [cls._filter_row_to_dict(row) for row in r]

        # Improve the geometry information by sampling one geometry from each column, where available.
        for col_info in geom_cols_info:
            c = col_info["column_name"]
            row = sess.execute(
                f"""
                SELECT ST_Zmflag({cls.quote(c)}) AS zm,
                ST_SRID({cls.quote(c)}) AS srid, SRS.srtext
                FROM {table_identifier} LEFT OUTER JOIN spatial_ref_sys SRS
                ON SRS.srid = ST_SRID({cls.quote(c)})
                WHERE {cls.quote(c)} IS NOT NULL LIMIT 1;
                """, ).fetchone()
            if row:
                sampled_info = cls._filter_row_to_dict(row)
                sampled_info["zm"] = cls.ZM_FLAG_TO_STRING.get(
                    sampled_info.get("zm"))
                # Original col_info from geometry_columns takes precedence, where it exists:
                col_info.update({**sampled_info, **col_info})

        schema = cls.postgis_to_v2_schema(pg_table_info, geom_cols_info,
                                          id_salt)
        yield "schema.json", schema.to_column_dicts() if schema else None

        for col_info in geom_cols_info:
            try:
                wkt = col_info["srtext"]
            except KeyError:
                # no CRS defined for this geometry column
                continue
            id_str = crs_util.get_identifier_str(wkt)
            yield f"crs/{id_str}.wkt", crs_util.normalise_wkt(wkt)
Esempio n. 6
0
    def all_v2_meta_items_including_empty(
        cls, sess, db_schema, table_name, id_salt=None, include_legacy_items=False
    ):
        """
        Generate all V2 meta items for the given table.
        Varying the id_salt varies the ids that are generated for the schema.json item.
        """
        title = sess.scalar(
            """
            SELECT CAST(value AS NVARCHAR) FROM::fn_listextendedproperty(
                'MS_Description', 'schema', :schema, 'table', :table, null, null);
            """,
            {"schema": db_schema, "table": table_name},
        )
        yield "title", title

        primary_key_sql = """
            SELECT KCU.* FROM information_schema.key_column_usage KCU
            INNER JOIN information_schema.table_constraints TC
            ON KCU.constraint_schema = TC.constraint_schema
            AND KCU.constraint_name = TC.constraint_name
            WHERE TC.constraint_type = 'PRIMARY KEY'
        """

        table_info_sql = f"""
            SELECT
                C.column_name, C.ordinal_position, C.data_type,
                C.character_maximum_length, C.numeric_precision, C.numeric_scale,
                PK.ordinal_position AS pk_ordinal_position
            FROM information_schema.columns C
            LEFT OUTER JOIN ({primary_key_sql}) PK
            ON (PK.table_schema = C.table_schema)
            AND (PK.table_name = C.table_name)
            AND (PK.column_name = C.column_name)
            WHERE C.table_schema=:table_schema AND C.table_name=:table_name
            ORDER BY C.ordinal_position;
        """
        r = sess.execute(
            table_info_sql,
            {"table_schema": db_schema, "table_name": table_name},
        )
        ms_table_info = list(r)

        geom_cols = [
            row["column_name"]
            for row in ms_table_info
            if row["data_type"] in ("geometry", "geography")
        ]

        table_identifier = cls.quote_table(db_schema=db_schema, table_name=table_name)
        ms_spatial_ref_sys = [
            sess.execute(
                f"""
                SELECT TOP 1 :column_name AS column_name, {cls.quote(g)}.STSrid AS srid, SRS.*
                FROM {table_identifier}
                LEFT OUTER JOIN sys.spatial_reference_systems SRS
                ON SRS.spatial_reference_id = {cls.quote(g)}.STSrid
                WHERE {cls.quote(g)} IS NOT NULL;
                """,
                {"column_name": g},
            ).fetchone()
            for g in geom_cols
        ]
        ms_spatial_ref_sys = list(filter(None, ms_spatial_ref_sys))  # Remove nulls.

        schema = KartAdapter_SqlServer.sqlserver_to_v2_schema(
            ms_table_info, ms_spatial_ref_sys, id_salt
        )
        yield "schema.json", schema.to_column_dicts() if schema else None

        for crs_info in ms_spatial_ref_sys:
            auth_name = crs_info["authority_name"]
            auth_code = crs_info["authorized_spatial_reference_id"]
            if not auth_name and not auth_code:
                auth_name, auth_code = "CUSTOM", crs_info["srid"]
            wkt = crs_info["well_known_text"] or ""
            yield f"crs/{auth_name}:{auth_code}.wkt", crs_util.normalise_wkt(
                crs_util.ensure_authority_specified(wkt, auth_name, auth_code)
            )
Esempio n. 7
0
    def all_v2_meta_items_including_empty(cls,
                                          sess,
                                          db_schema,
                                          table_name,
                                          id_salt=None,
                                          include_legacy_items=False):
        title = sess.scalar(
            """
            SELECT table_comment FROM information_schema.tables
            WHERE table_schema=:table_schema AND table_name=:table_name;
            """,
            {
                "table_schema": db_schema,
                "table_name": table_name
            },
        )
        yield "title", title

        # Primary key SQL is a bit different for MySQL since constraints are named within the namespace of a table -
        # they don't names that are globally unique within the db-schema.
        primary_key_sql = """
            SELECT KCU.* FROM information_schema.key_column_usage KCU
            INNER JOIN information_schema.table_constraints TC
            ON KCU.table_schema = TC.table_schema
            AND KCU.table_name = TC.table_name
            AND KCU.constraint_schema = TC.constraint_schema
            AND KCU.constraint_name = TC.constraint_name
            WHERE TC.constraint_type = 'PRIMARY KEY'
        """

        table_info_sql = f"""
            SELECT
                C.column_name, C.ordinal_position, C.data_type, C.srs_id,
                C.character_maximum_length, C.numeric_precision, C.numeric_scale,
                PK.ordinal_position AS pk_ordinal_position
            FROM information_schema.columns C
            LEFT OUTER JOIN ({primary_key_sql}) PK
            ON (PK.table_schema = C.table_schema)
            AND (PK.table_name = C.table_name)
            AND (PK.column_name = C.column_name)
            WHERE C.table_schema=:table_schema AND C.table_name=:table_name
            ORDER BY C.ordinal_position;
        """
        r = sess.execute(
            table_info_sql,
            {
                "table_schema": db_schema,
                "table_name": table_name
            },
        )
        mysql_table_info = list(r)

        spatial_ref_sys_sql = """
            SELECT SRS.* FROM information_schema.st_spatial_reference_systems SRS
            LEFT OUTER JOIN information_schema.st_geometry_columns GC ON (GC.srs_id = SRS.srs_id)
            WHERE GC.table_schema=:table_schema AND GC.table_name=:table_name;
        """
        r = sess.execute(
            spatial_ref_sys_sql,
            {
                "table_schema": db_schema,
                "table_name": table_name
            },
        )
        mysql_spatial_ref_sys = list(r)

        schema = KartAdapter_MySql.sqlserver_to_v2_schema(
            mysql_table_info, mysql_spatial_ref_sys, id_salt)
        yield "schema.json", schema.to_column_dicts()

        for crs_info in mysql_spatial_ref_sys:
            wkt = crs_info["DEFINITION"]
            id_str = crs_util.get_identifier_str(wkt)
            yield f"crs/{id_str}.wkt", crs_util.normalise_wkt(wkt)
Esempio n. 8
0
def point_cloud_import(ctx, convert_to_copc, ds_path, sources):
    """
    Experimental command for importing point cloud datasets. Work-in-progress.
    Will eventually be merged with the main `import` command.

    SOURCES should be one or more LAZ or LAS files (or wildcards that match multiple LAZ or LAS files).
    """
    import pdal

    repo = ctx.obj.repo

    # TODO - improve path validation to make sure datasets of any type don't collide with each other
    # or with attachments.
    validate_dataset_paths([ds_path])

    for source in sources:
        if not (Path() / source).is_file():
            raise NotFound(f"No data found at {source}",
                           exit_code=NO_IMPORT_SOURCE)

    compressed_set = ListBasedSet()
    version_set = ListBasedSet()
    copc_version_set = ListBasedSet()
    pdrf_set = ListBasedSet()
    pdr_length_set = ListBasedSet()
    crs_set = ListBasedSet()
    transform = None
    schema = None
    crs_name = None

    per_source_info = {}

    for source in sources:
        click.echo(f"Checking {source}...          \r", nl=False)
        config = [{
            "type": "readers.las",
            "filename": source,
            "count": 0,  # Don't read any individual points.
        }]
        if schema is None:
            config.append({"type": "filters.info"})

        pipeline = pdal.Pipeline(json.dumps(config))
        try:
            pipeline.execute()
        except RuntimeError:
            raise InvalidOperation(f"Error reading {source}",
                                   exit_code=INVALID_FILE_FORMAT)

        metadata = _unwrap_metadata(pipeline.metadata)

        info = metadata["readers.las"]

        compressed_set.add(info["compressed"])
        if len(compressed_set) > 1:
            raise _non_homogenous_error("filetype", "LAS vs LAZ")

        version = f"{info['major_version']}.{info['minor_version']}"
        version_set.add(version)
        if len(version_set) > 1:
            raise _non_homogenous_error("version", version_set)

        copc_version_set.add(get_copc_version(info))
        if len(copc_version_set) > 1:
            raise _non_homogenous_error("COPC version", copc_version_set)

        pdrf_set.add(info["dataformat_id"])
        if len(pdrf_set) > 1:
            raise _non_homogenous_error("Point Data Record Format", pdrf_set)

        pdr_length_set.add(info["point_length"])
        if len(pdr_length_set) > 1:
            raise _non_homogenous_error("Point Data Record Length",
                                        pdr_length_set)

        crs_set.add(info["srs"]["wkt"])
        if len(crs_set) > 1:
            raise _non_homogenous_error(
                "CRS",
                "\n vs \n".join((format_wkt_for_output(wkt, sys.stderr)
                                 for wkt in crs_set)),
            )

        if transform is None:
            transform = _make_transform_to_crs84(crs_set.only())

        native_envelope = get_native_envelope(info)
        crs84_envelope = _transform_3d_envelope(transform, native_envelope)
        per_source_info[source] = {
            "count": info["count"],
            "native_envelope": native_envelope,
            "crs84_envelope": crs84_envelope,
        }

        if schema is None:
            crs_name = get_identifier_str(crs_set.only())
            schema = metadata["filters.info"]["schema"]
            schema["CRS"] = crs_name

    click.echo()

    version = version_set.only()
    copc_version = copc_version_set.only()
    is_laz = compressed_set.only() is True
    is_copc = is_laz and copc_version != NOT_COPC

    if is_copc:
        # Keep native format.
        import_func = get_hash_and_size_of_file_while_copying
        kart_format = f"pc:v1/copc-{copc_version}.0"
    elif is_laz:
        # Optionally Convert to COPC 1.0 if requested
        import_func = (_convert_tile_to_copc_lfs_blob if convert_to_copc else
                       get_hash_and_size_of_file_while_copying)
        kart_format = "pc:v1/copc-1.0" if convert_to_copc else f"pc:v1/laz-{version}"
    else:  # LAS
        if not convert_to_copc:
            raise InvalidOperation(
                "LAS datasets are not supported - dataset must be converted to LAZ / COPC",
                exit_code=INVALID_FILE_FORMAT,
            )
        import_func = _convert_tile_to_copc_lfs_blob
        kart_format = "pc:v1/copc-1.0"

    import_ext = ".copc.laz" if "copc" in kart_format else ".laz"

    # Set up LFS hooks.
    # TODO: This could eventually be moved to `kart init`.
    if not (repo.gitdir_path / "hooks" / "pre-push").is_file():
        subprocess.check_call(
            ["git", "-C",
             str(repo.gitdir_path), "lfs", "install", "hooks"])

    # We still need to write .kart.repostructure.version unfortunately, even though it's only relevant to tabular datasets.
    assert repo.table_dataset_version in SUPPORTED_VERSIONS
    extra_blobs = (extra_blobs_for_version(repo.table_dataset_version)
                   if not repo.head_commit else [])

    header = generate_header(
        repo,
        None,
        f"Importing {len(sources)} LAZ tiles as {ds_path}",
        repo.head_branch,
        repo.head_commit,
    )

    ds_inner_path = f"{ds_path}/.point-cloud-dataset.v1"

    lfs_tmp_path = repo.gitdir_path / "lfs" / "objects" / "tmp"
    lfs_tmp_path.mkdir(parents=True, exist_ok=True)

    with git_fast_import(repo,
                         *FastImportSettings().as_args(), "--quiet") as proc:
        proc.stdin.write(header.encode("utf8"))

        for i, blob_path in write_blobs_to_stream(proc.stdin, extra_blobs):
            pass

        for source in sources:
            click.echo(f"Importing {source}...")

            tmp_object_path = lfs_tmp_path / str(uuid.uuid4())
            oid, size = import_func(source, tmp_object_path)
            actual_object_path = get_local_path_from_lfs_hash(repo, oid)
            actual_object_path.parents[0].mkdir(parents=True, exist_ok=True)
            tmp_object_path.rename(actual_object_path)

            # TODO - is this the right prefix and name?
            tilename = os.path.splitext(
                os.path.basename(source))[0] + import_ext
            tile_prefix = hexhash(tilename)[0:2]
            blob_path = f"{ds_inner_path}/tile/{tile_prefix}/{tilename}"
            info = per_source_info[source]
            pointer_dict = {
                "version": "https://git-lfs.github.com/spec/v1",
                # TODO - available.<URL-IDX> <URL>
                "kart.extent.crs84": _format_array(info["crs84_envelope"]),
                "kart.extent.native": _format_array(info["native_envelope"]),
                "kart.format": kart_format,
                "kart.pc.count": info["count"],
                "oid": f"sha256:{oid}",
                "size": size,
            }
            write_blob_to_stream(proc.stdin, blob_path,
                                 dict_to_pointer_file_bytes(pointer_dict))

        write_blob_to_stream(proc.stdin, f"{ds_inner_path}/meta/schema.json",
                             json_pack(schema))
        write_blob_to_stream(
            proc.stdin,
            f"{ds_inner_path}/meta/crs/{crs_name}.wkt",
            ensure_bytes(normalise_wkt(crs_set.only())),
        )

    click.echo("Updating working copy...")
    reset_wc_if_needed(repo)

    # TODO - fix up reset code - there should be a single function you can call that updates all working copies.
    tabular_wc = repo.get_working_copy(allow_uncreated=True)
    if tabular_wc is not None:
        tabular_wc.reset(repo.head_commit)