コード例 #1
0
ファイル: utils.py プロジェクト: Amsterdam/dso-api
def patch_field_auth(schema: DatasetSchema, table_id, field_id, *subfields, auth: list[str]):
    """Monkeypatch an Amsterdam Schema to set "auth" on a table."""
    # This updates the low-level dict data so all high-level objects get it.
    schema.get_table_by_id(table_id).get_field_by_id(field_id)  # check existence

    raw_table = next(t for t in schema["tables"] if t.default["id"] == table_id)
    raw_field = next(
        f for f_id, f in raw_table.default["schema"]["properties"].items() if f_id == field_id
    )

    # Allow to resolve sub fields too
    for subfield in subfields:
        # Auto jump over array, object or "array of objects"
        if raw_field["type"] == "array":
            raw_field = raw_field["items"]
        if raw_field["type"] == "object":
            raw_field = raw_field["properties"]

        raw_field = raw_field[subfield]

    raw_field["auth"] = auth

    # Also patch the active model
    model = apps.get_model(schema.id, table_id)
    model_field = model._meta.get_field(to_snake_case(field_id))
    for subfield in subfields:
        model_field = model_field.related_model._meta.get_field(subfield)

    model_field.field_schema["auth"] = auth
コード例 #2
0
ファイル: utils.py プロジェクト: Amsterdam/dso-api
def patch_table_auth(schema: DatasetSchema, table_id, *, auth: list[str]):
    """Monkeypatch an Amsterdam Schema to set "auth" on a table."""
    # This updates the low-level dict data so all high-level objects get it.
    schema.get_table_by_id(table_id)  # checks errors

    raw_table = next(t for t in schema["tables"] if t.default["id"] == table_id)
    raw_table.default["auth"] = auth

    # Also patch the active model, as that's already loaded and has a copy of the table schema
    model = apps.get_model(schema.id, table_id)
    model.table_schema()["auth"] = auth
コード例 #3
0
ファイル: import_schemas.py プロジェクト: cvriel/schema-tools
    def import_schema(self, name: str,
                      schema: DatasetSchema) -> Optional[Dataset]:
        """Import a single dataset schema."""
        try:
            dataset = Dataset.objects.get(name=schema.id)
        except Dataset.DoesNotExist:
            dataset = Dataset.objects.create(name=schema.id,
                                             schema_data=schema.json_data())
            self.stdout.write(f"  Created {name}")
            return dataset
        else:
            dataset.schema_data = schema.json_data()
            if dataset.schema_data_changed():
                dataset.save()
                self.stdout.write(f"  Updated {name}")
                return dataset

        return None
コード例 #4
0
ファイル: db.py プロジェクト: cvriel/schema-tools
def create_meta_table_data(engine, dataset_schema: DatasetSchema):
    session = sessionmaker(bind=engine)()
    ds_content = {
        camel_case_to_snake(k): v
        for k, v in dataset_schema.items() if k != "tables"
    }
    ds_content["contact_point"] = str(ds_content.get("contact_point", ""))
    ds_transformer = transformer_factory(models.Dataset)
    dataset = models.Dataset(**ds_transformer(ds_content))
    session.add(dataset)

    for table_data in dataset_schema["tables"]:
        table_content = {
            camel_case_to_snake(k): v
            for k, v in table_data.items() if k != "schema"
        }

        table = models.Table(
            **{
                **table_content,
                **{
                    f: table_data["schema"].get(f)
                    for f in ("required", "display")
                },
            })
        table.dataset_id = dataset.id
        session.add(table)

        for field_name, field_value in table_data["schema"][
                "properties"].items():
            field_content = {
                k.replace("$", ""): v
                for k, v in field_value.items() if k not in {"$comment"}
            }
            field_content["name"] = field_name
            try:
                field = models.Field(**field_content)
            except TypeError as e:
                raise NotImplementedError(
                    f'Import failed: at "{field_name}": {field_value!r}:\n{e}'
                ) from e

            field.table_id = table.id
            field.dataset_id = dataset.id
            session.add(field)

    session.commit()
コード例 #5
0
def _load_geojson(postgres_conn_id):
    """As airflow executes tasks at different hosts,
    these tasks need to happen in a single call.

    Otherwise, the (large) file is downloaded by one host,
    and stored in the XCom table to be shared between tasks.
    """
    tmp_dir = Path(f"/tmp/{dag_id}")
    tmp_dir.mkdir(parents=True, exist_ok=True)

    # 1. download files
    files = {}
    for route in ROUTES:
        dest = f"{tmp_dir}/{route.name}.geojson"
        logger.info("Downloading %s to %s", route.url, dest)
        download_file(route.url, dest, http_conn_id=None)
        files[route.name] = dest

    # 2. generate schema ("schema introspect geojson *.geojson")
    schema = introspect_geojson_files(
        "gevaarlijke-routes", files=list(files.values())
    )
    schema = DatasetSchema.from_dict(schema)  # TODO: move to schema-tools?

    # XXX This is not running as one transaction atm, but autocommitting per chunk
    # 3. import data
    db_engine = get_engine()
    importer = GeoJSONImporter(schema, db_engine, logger=logger)
    for route in ROUTES:
        geojson_path = files[route.name]
        logger.info(
            "Importing %s into %s", route.name, route.tmp_db_table_name
        )
        importer.generate_db_objects(
            table_name=route.schema_table_name,
            db_table_name=route.tmp_db_table_name,
            truncate=True,  # when reexecuting the same task
            ind_tables=True,
            ind_extra_index=False,
        )
        importer.load_file(
            geojson_path,
        )
        if route.post_process:
            hook = PostgresHook(postgres_conn_id=postgres_conn_id)
            hook.run(route.post_process)
 def execute(self, context=None):
     """Executes the 'generate_db_object' method from schema-tools.
     Which leads to the creation of tables and/or an index on the identifier (as specified in the data JSON schema).
     By default both tables and the identifier and 'many-to-many table' indexes are created.
     By setting the boolean indicators in the method parameters, tables or an identifier index (per table) can be created.
     """
     data_schema_url = f"{SCHEMA_URL.split('//')[0]}//{self.data_schema_env}{SCHEMA_URL.split('//')[1]}{self.data_schema_name}/{self.data_schema_name}"
     data = schema_fetch_url_file(data_schema_url)
     engine = _get_engine(self.db_conn)
     parent_schema = SchemaType(data)
     dataset_schema = DatasetSchema(parent_schema)
     importer = BaseImporter(dataset_schema, engine)
     for table in data["tables"]:
         if (self.data_schema_name + "_" +
                 table["id"] == f"{self.data_table_name}"):
             importer.generate_db_objects(
                 table["id"],
                 ind_tables=self.ind_table,
                 ind_extra_index=self.ind_extra_index,
             )
         else:
             continue
コード例 #7
0
def vestiging_schema(vestiging_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(vestiging_schema_json)
コード例 #8
0
def parkeervakken_schema(parkeervakken_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(parkeervakken_schema_json)
コード例 #9
0
ファイル: models.py プロジェクト: cvriel/schema-tools
    def schema(self) -> DatasetSchema:
        """Provide access to the schema data"""
        if not self.schema_data:
            raise RuntimeError("Dataset.schema_data is empty")

        return DatasetSchema.from_dict(self.schema_data)
コード例 #10
0
def woningbouwplannen_schema(woningbouwplannen_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(woningbouwplannen_schema_json)
コード例 #11
0
def gebieden_schema(gebieden_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(gebieden_schema_json)
コード例 #12
0
def download_url_schema(download_url_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(download_url_schema_json)
コード例 #13
0
def explosieven_schema(explosieven_schema_json, ) -> DatasetSchema:
    return DatasetSchema.from_dict(explosieven_schema_json)
コード例 #14
0
def afval_schema(afval_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(afval_schema_json)
コード例 #15
0
def fietspaaltjes_schema(fietspaaltjes_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(fietspaaltjes_schema_json)
コード例 #16
0
def fietspaaltjes_schema_no_display(
    fietspaaltjes_schema_json_no_display, ) -> DatasetSchema:
    return DatasetSchema.from_dict(fietspaaltjes_schema_json_no_display)
コード例 #17
0
def afval_schema_backwards_embedded(
    afval_schema_backwards_embedded_json, ) -> DatasetSchema:
    return DatasetSchema.from_dict(afval_schema_backwards_embedded_json)
コード例 #18
0
def indirect_self_ref_schema(indirect_self_ref_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(indirect_self_ref_schema_json)
コード例 #19
0
def afval_schema_backwards_summary(
    afval_schema_backwards_summary_json, ) -> DatasetSchema:
    return DatasetSchema.from_dict(afval_schema_backwards_summary_json)
コード例 #20
0
def meldingen_schema(meldingen_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(meldingen_schema_json)
コード例 #21
0
def bommen_v2_schema(bommen_v2_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(bommen_v2_schema_json)
コード例 #22
0
def bag_schema(bag_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(bag_schema_json)
コード例 #23
0
def geometry_zoom_schema():
    return DatasetSchema.from_dict(
        json.loads((HERE / "files" / "geometry_zoom.json").read_text()))
コード例 #24
0
TABLE_SCHEMA = {
    "id": "mytable",
    "type": "table",
    "version": str(V1),
    "schema": {
        "$schema": "http://json-schema.org/draft-07/schema#",
    },
}
REMOTE_SCHEMA = DatasetTableSchema(
    TABLE_SCHEMA,
    parent_schema=DatasetSchema(
        {
            "id": "adhoc",
            "tables": [
                TableVersions(
                    id=TABLE_SCHEMA["id"],
                    default_version_number=V1,
                    active=dict(V1=TABLE_SCHEMA),
                )
            ],
        }
    ),
)


@pytest.mark.parametrize(
    "case",
    [
        ("http://remote", "http://remote/foo?bar=baz"),
        ("http://remote/", "http://remote/foo?bar=baz"),
        ("http://remote/quux/{table_id}", "http://remote/quux/mytable/foo?bar=baz"),
        ("http://remote/quux/{table_id}/", "http://remote/quux/mytable/foo?bar=baz"),
コード例 #25
0
def geometry_authdataset_schema(
        geometry_authdataset_schema_json) -> DatasetSchema:
    return DatasetSchema.from_dict(geometry_authdataset_schema_json)