def _build_db_viewsets(self, db_datasets: Iterable[Dataset]): """Initialize viewsets that are linked to Django database models.""" tmp_router = routers.SimpleRouter() db_datasets = {dataset.schema.id: dataset for dataset in db_datasets} # Generate views now that all models have been created. # This makes sure the 'to' field is resolved to an actual model class. for app_label, models_by_name in self.all_models.items(): if app_label not in db_datasets: logger.debug("Skipping API creation for dataset: %s", app_label) continue for model in models_by_name.values(): _validate_model(model) if model.has_parent_table(): # Do not create separate viewsets for nested tables. continue dataset_id = to_snake_case(model.get_dataset_id()) # Determine the URL prefix for the model url_prefix = self.make_url(model.get_dataset_path(), model.get_table_id()) logger.debug("Created viewset %s", url_prefix) viewset = viewset_factory(model) table_id = to_snake_case(model.get_table_id()) tmp_router.register( prefix=url_prefix, viewset=viewset, basename=f"{dataset_id}-{table_id}", ) return tmp_router.registry
def generate_field_serializer( # noqa: C901 model, model_field, new_attrs, fields, extra_kwargs): orig_name = model_field.name # Instead of having to apply camelize() on every response, # create converted field names on the serializer construction. camel_name = toCamelCase(model_field.name) depth = extra_kwargs.get("depth", 0) depth += 1 if isinstance(model_field, models.ManyToOneRel): for name, relation in model._table_schema.relations.items(): if (depth <= 2 and relation["table"] == toCamelCase( model_field.related_model._meta.model_name) and relation["field"] == toCamelCase(model_field.field.name)): format1 = relation.get("format", "summary") att_name = model_field.name if format1 == "embedded": view_name = "dynamic_api:{}-{}-detail".format( to_snake_case(model._table_schema.dataset.id), to_snake_case( model_field.related_model._table_schema.id), ) new_attrs[name] = TemporalHyperlinkedRelatedField( many=True, view_name=view_name, queryset=getattr(model, att_name), ) fields.append(name) elif format1 == "summary": new_attrs[name] = _RelatedSummaryField() fields.append(name) break return if model.has_parent_table() and model_field.name in ["id", "parent"]: # Do not render PK and FK to parent on nested tables return # Instead of having to apply camelize() on every response, # create converted field names on the serializer construction. camel_name = toCamelCase(model_field.name) # Add extra embedded part for foreign keys if isinstance(model_field, models.ForeignKey): if depth <= 1: new_attrs[camel_name] = EmbeddedField( serializer_class=serializer_factory(model_field.related_model, depth=depth, flat=True), source=model_field.name, ) camel_id_name = toCamelCase(model_field.attname) fields.append(camel_id_name) if model_field.attname != camel_id_name: extra_kwargs[camel_id_name] = {"source": model_field.attname} fields.append(camel_name) if orig_name != camel_name: extra_kwargs[camel_name] = {"source": model_field.name}
def get_view_name(model: Type[DynamicModel], suffix: str): """Return the URL pattern for a dynamically generated model. :param suffix: This can be "detail" or "list". """ dataset_id = to_snake_case(model.get_dataset_id()) table_id = to_snake_case(model.get_table_id()) return f"dynamic_api:{dataset_id}-{table_id}-{suffix}"
def _get_field_snake_name(field: DatasetFieldSchema) -> str: """Find the snake and camel names of a field""" snake_name = to_snake_case(field.name) parent_field = field.parent_field while parent_field is not None: parent_snake_name = to_snake_case(parent_field.name) snake_name = f"{parent_snake_name}.{snake_name}" parent_field = parent_field.parent_field return snake_name
def execute(self, context=None): dataset = schema_def_from_url(SCHEMA_URL, self.dataset_name) print(dataset) pg_hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) sqls = [] existing_tables_lookup = self._get_existing_tables( pg_hook, dataset.tables, pg_schema=self.pg_schema) snaked_tablenames = existing_tables_lookup.keys() existing_columns = self._get_existing_columns(pg_hook, snaked_tablenames, pg_schema=self.pg_schema) if self.rename_indexes: for table_name, index_names in self._get_existing_indexes( pg_hook, snaked_tablenames, pg_schema=self.pg_schema).items(): if table_name not in existing_tables_lookup: continue for index_name in index_names: new_table_name = existing_tables_lookup[table_name].id new_index_name = index_name.replace( table_name, to_snake_case(f"{dataset.id}_{new_table_name}")) if index_name != new_index_name: sqls.append( f"""ALTER INDEX {self.pg_schema}.{index_name} RENAME TO {new_index_name}""") for snaked_tablename, table in existing_tables_lookup.items(): for field in table.fields: provenance = field.get("provenance") if provenance is not None: snaked_field_name = to_snake_case(field.name) if "relation" in field: snaked_field_name += "_id" if provenance.lower( ) in existing_columns[snaked_tablename]: # quotes are applied on the provenance name in case the source uses a space in the name sqls.append( f"""ALTER TABLE {self.pg_schema}.{snaked_tablename} RENAME COLUMN "{provenance}" TO {snaked_field_name}""" ) provenance = table.get("provenance") if provenance is not None: sqls.append( f"""ALTER TABLE IF EXISTS {self.pg_schema}.{snaked_tablename} RENAME TO {to_snake_case(table.id)}""") pg_hook.run(sqls)
def execute(self, context=None): dataset = schema_def_from_url(SCHEMA_URL, self.dataset_name) pg_hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) sqls = [] dataset_id = to_snake_case(dataset.id) for table in dataset.tables: table_id = to_snake_case(table.id) sqls.append(f""" DROP TABLE IF EXISTS {self.to_pg_schema}.{dataset_id}_{table_id}; ALTER TABLE {self.from_pg_schema}.{table_id} SET SCHEMA {self.to_pg_schema}; ALTER TABLE {table_id} RENAME TO {dataset_id}_{table_id}; """) pg_hook.run(sqls)
def _remote_object_field_factory(field: DatasetFieldSchema, **kwargs) -> RemoteFieldSerializer: """Generate a serializer for an sub-object field""" table_schema = field.table dataset = table_schema.dataset safe_dataset_id = to_snake_case(dataset.id) serializer_name = ( f"{dataset.id.title()}{table_schema.id.title()}_{field.name.title()}Serializer" ).replace(" ", "_") new_attrs = { "table_schema": table_schema, "field_schema": field, "__module__": f"dso_api.dynamic_api.remote.serializers.{safe_dataset_id}", } declared_fields = _build_declared_fields(field.subfields) # Generate Meta section and serializer class new_attrs.update(declared_fields) new_attrs["Meta"] = type("Meta", (), {"fields": list(declared_fields.keys())}) serializer_class = type(serializer_name, (RemoteFieldSerializer, ), new_attrs) return serializer_class(**kwargs)
def serializer_factory(model: Type[DynamicModel], depth: int, flat=None) -> Type[DynamicSerializer]: """Generate the DRF serializer class for a specific dataset model.""" fields = ["_links", "schema"] if isinstance(model, str): raise ImproperlyConfigured(f"Model {model} could not be resolved.") if model.has_parent_table(): # Inner tables have no schema or links defined. fields = [] safe_dataset_id = to_snake_case(model.get_dataset_id()) serializer_name = f"{safe_dataset_id.title()}{model.__name__}Serializer" new_attrs = { "table_schema": model._table_schema, "__module__": f"dso_api.dynamic_api.serializers.{safe_dataset_id}", } # Parse fields for serializer extra_kwargs = {"depth": depth} for model_field in model._meta.get_fields(): generate_field_serializer(model, model_field, new_attrs, fields, extra_kwargs) # Generate embedded relations if not flat: generate_embedded_relations(model, fields, new_attrs) # Generate Meta section and serializer class new_attrs["Meta"] = type("Meta", (), { "model": model, "fields": fields, "extra_kwargs": extra_kwargs }) return type(serializer_name, (DynamicSerializer, ), new_attrs)
def patch_field_auth(schema: DatasetSchema, table_id, field_id, *subfields, auth: list[str]): """Monkeypatch an Amsterdam Schema to set "auth" on a table.""" # This updates the low-level dict data so all high-level objects get it. schema.get_table_by_id(table_id).get_field_by_id(field_id) # check existence raw_table = next(t for t in schema["tables"] if t.default["id"] == table_id) raw_field = next( f for f_id, f in raw_table.default["schema"]["properties"].items() if f_id == field_id ) # Allow to resolve sub fields too for subfield in subfields: # Auto jump over array, object or "array of objects" if raw_field["type"] == "array": raw_field = raw_field["items"] if raw_field["type"] == "object": raw_field = raw_field["properties"] raw_field = raw_field[subfield] raw_field["auth"] = auth # Also patch the active model model = apps.get_model(schema.id, table_id) model_field = model._meta.get_field(to_snake_case(field_id)) for subfield in subfields: model_field = model_field.related_model._meta.get_field(subfield) model_field.field_schema["auth"] = auth
def render_dataset_docs(dataset: DatasetSchema, paths: dict[str, str]): snake_name = to_snake_case(dataset.id) dataset_path = paths[dataset.id] main_title = dataset.title or snake_name.replace("_", " ").capitalize() tables = [_get_table_context(t, paths) for t in dataset.tables] if any(t["has_geometry"] for t in tables): wfs_url = f"{BASE_URL}/v1/wfs/{dataset_path}/" else: wfs_url = None render_template( "datasets/dataset.rst.j2", f"datasets/{dataset_path}.rst", { "schema": dataset, "schema_name": snake_name, "schema_auth": dataset.auth, "main_title": main_title, "tables": tables, "wfs_url": wfs_url, "swagger_url": f"{BASE_URL}/v1/{dataset_path}/", }, ) return dataset_path
def get_ordering(self, request, queryset, view): if self.ordering_param not in request.query_params: # Allow DSO 1.0 Dutch "sorteer" parameter # Can adjust 'self' as this instance is recreated each request. if "sorteer" in request.query_params: self.ordering_param = "sorteer" ordering = super().get_ordering(request, queryset, view) if ordering is None: return ordering # Convert identifiers to snake_case, preserving `-` (descending sort). return [ "-" + to_snake_case(part[1:]) if part.startswith("-") else to_snake_case(part) for part in ordering ]
def generate_relation_filters(model: Type[DynamicModel]): # NoQA """ Generates additional filters for relations, including sub items. """ fields = dict() filters = dict() for relation in model._meta.related_objects: schema_fields = dict([(f.name, f) for f in model._table_schema.fields]) if relation.name not in schema_fields: fields[relation.name] = ["exact"] continue if not schema_fields[relation.name].is_nested_table: continue relation_properties = schema_fields[ relation.name]["items"]["properties"] for field_name, field_schema in relation_properties.items(): # contert space separated property name into snake_case name model_field_name = to_snake_case(field_name) model_field = getattr(relation.related_model, model_field_name).field filter_class = dso_filters.DSOFilterSet.FILTER_DEFAULTS.get( model_field.__class__) if filter_class is None: # No mapping found for this model field, skip it. continue filter_class = filter_class["filter_class"] # Filter name presented in API filter_name = "{}.{}".format( toCamelCase(relation.name), toCamelCase(field_name), ) filter_lookups = _get_field_lookups(model_field) for lookup_expr in filter_lookups: # Generate set of filters per lookup (e.g. __lte, __gte etc) subfilter_name = filter_name if lookup_expr not in ["exact", "contains"]: subfilter_name = f"{filter_name}[{lookup_expr}]" filter_instance = filter_class( field_name="__".join([relation.name, model_field_name]), lookup_expr=lookup_expr, label=dso_filters.DSOFilterSet.FILTER_HELP_TEXT.get( filter_class, lookup_expr), ) if lookup_expr == "not": # Allow multiple NOT filters filter_instance = dso_filters.MultipleValueFilter( filter_instance) filters[subfilter_name] = filter_instance fields[subfilter_name] = filter_lookups return filters
def filterset_factory(model: type[DynamicModel]) -> type[DynamicFilterSet]: # noqa: C901 """Generate the filterset based on the dynamic model.""" # See https://django-filter.readthedocs.io/en/master/guide/usage.html on how filters are used. # Determine which fields are included: # Excluding geometry fields for now, as the default filter only performs exact matches. # This isn't useful for polygon fields, and excluding it avoids support issues later. filters = {} # declared filters fields = {} # generated filters # Generate the generated filters (and declared filters for FK sunfields) for f in model._meta.get_fields(): if isinstance(f, models.fields.Field): fields[f.attname] = _get_field_lookups(f) if isinstance(f, ForeignKey): fields[f.attname] = _get_field_lookups(f) # backwards compat # In case of a composite FK, get the loose relations # associated with this ForeignKey object and add dotted syntax. schema_field = get_field_schema(f) prefix = f.attname.removesuffix("_id") for s in schema_field.subfields: related_field_name = to_snake_case(s.id) model_field_name = f"{prefix}_{related_field_name}" try: model_field = model._meta.get_field(model_field_name) except FieldDoesNotExist: # the field is not part of the compound identifier # we only support filtering on identifiers for now continue filter_class = dso_filters.DSOFilterSet.FILTER_DEFAULTS[model_field.__class__][ "filter_class" ] for lookup in _get_field_lookups(model_field): sub_filter_name = f"{prefix}.{related_field_name}" if lookup not in ("contains", "exact"): sub_filter_name = f"{sub_filter_name}[{lookup}]" filters[sub_filter_name] = filter_class( field_name=model_field_name, lookup_expr=lookup, label=dso_filters.DSOFilterSet.FILTER_HELP_TEXT.get(filter_class, lookup), ) # Generate the declared filters filters.update(_generate_relation_filters(model)) # Generate the class meta_attrs = { "model": model, "fields": fields, } meta = type("Meta", (), meta_attrs) return type(f"{model.__name__}FilterSet", (DynamicFilterSet,), {"Meta": meta, **filters})
def _generate_relation_filters(model: type[DynamicModel]): # noqa: C901 """ Generates additional filters for relations, including sub items. """ filters = dict() schema_fields = {f.name: f for f in model._table_schema.fields} for relation in model._meta.related_objects: if relation.name not in schema_fields: continue if not schema_fields[relation.name].is_nested_table: continue relation_properties = schema_fields[relation.name]["items"]["properties"] for field_name, field_schema in relation_properties.items(): # getattr() retrieved a DeferredAttribute here, hence the .field. model_field_name = to_snake_case(field_name) try: model_field = getattr(relation.related_model, model_field_name).field except AttributeError as e: raise AttributeError( f"Unable to initialize dataset {model.get_dataset_id()}: {e}" ) from e filter_class = dso_filters.DSOFilterSet.FILTER_DEFAULTS.get(model_field.__class__) if filter_class is None: # No mapping found for this model field, skip it. continue filter_class = filter_class["filter_class"] # Filter name presented in API filter_name = "{}.{}".format( toCamelCase(relation.name), toCamelCase(field_name), ) filter_lookups = _get_field_lookups(model_field) for lookup_expr in filter_lookups: # Generate set of filters per lookup (e.g. __lte, __gte etc) subfilter_name = filter_name if lookup_expr not in ("exact", "contains"): subfilter_name = f"{filter_name}[{lookup_expr}]" filter_instance = filter_class( field_name="__".join([relation.name, model_field_name]), lookup_expr=lookup_expr, label=dso_filters.DSOFilterSet.FILTER_HELP_TEXT.get(filter_class, lookup_expr), ) if lookup_expr == "not": # Allow multiple NOT filters filter_instance = dso_filters.MultipleValueFilter(filter_instance) filters[subfilter_name] = filter_instance return filters
def make_url(self, prefix, *parts): """Generate the URL for the viewset""" parts = [to_snake_case(part) for part in parts] url_path = "/".join(parts) # Allow to add a prefix prefix = prefix.strip("/") # extra strip for safety if prefix: url_path = f"{prefix}/{url_path}" return url_path
def _get_feature_type_context(table: DatasetTableSchema, paths: dict[str, str]): """Collect all table data for the WFS server spec.""" snake_name = to_snake_case(table.dataset.id) snake_id = to_snake_case(table["id"]) parent_path = paths[table.dataset.id] uri = f"{BASE_URL}/v1/wfs/{parent_path}/" fields = _get_fields(table.fields) has_geometry = _has_geometry(table) return { "title": snake_id.replace("_", " ").capitalize(), "typenames": [f"app:{snake_id}", snake_id], "doc_id": f"{table.dataset.id}/{table.id}", "uri": uri, "description": table.get("description"), "fields": [_get_field_context(field) for field in fields], "auth": table.auth, "expands": _get_table_expands(table, rel_id_separator="/"), "source": table, "has_geometry": has_geometry, "wfs_typename": f"app:{snake_name}", "wfs_csv": (f"{uri}?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature" f"&TYPENAMES={snake_id}&OUTPUTFORMAT=csv" if has_geometry else ""), "wfs_geojson": (f"{uri}?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature" f"&TYPENAMES={snake_id}&OUTPUTFORMAT=geojson" if has_geometry else ""), }
def execute(self, context: Optional[Dict] = None) -> None: """Moves database objects (in this case tables) to other schema owner Args: context: When this operator is created the context parameter is used to refer to get_template_context for more context as part of inheritance of the BaseOperator. It is set to None in this case. Executes: SQL alter statement to change the schema owner of the table so the table is moved to the defined schema (a.k.a. schema swapping) """ dataset = schema_def_from_url(SCHEMA_URL, self.dataset_name) pg_hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) sqls = [] dataset_id = to_snake_case(dataset.id) tables = dataset.tables if self.subset_tables: subset_tables = [ to_snake_case(table) for table in self.subset_tables ] tables = [ table for table in tables if to_snake_case(table["id"]) in subset_tables ] for table in tables: table_id = to_snake_case(table.id) sqls.append(f""" DROP TABLE IF EXISTS {self.to_pg_schema}.{dataset_id}_{table_id}; ALTER TABLE IF EXISTS {self.from_pg_schema}.{table_id} SET SCHEMA {self.to_pg_schema}; ALTER TABLE IF EXISTS {table_id} RENAME TO {dataset_id}_{table_id}; """) pg_hook.run(sqls)
def generate_embedded_relations(model, fields, new_attrs): schema_fields = { to_snake_case(f._name): f for f in model._table_schema.fields } for item in model._meta.related_objects: # Do not create fields for django-created relations. if item.name in schema_fields and schema_fields[ item.name].is_nested_table: related_serializer = serializer_factory(item.related_model, 0, flat=True) fields.append(item.name) new_attrs[item.name] = related_serializer(many=True)
def _get_table_expands(table: DatasetTableSchema, rel_id_separator=":"): """Return which relations can be expanded""" expands = [{ "id": field.id, "camel_name": toCamelCase(field.id), "snake_name": to_snake_case(field.id), "relation_id": field["relation"].replace(":", rel_id_separator), "target_doc_id": field["relation"].replace(":", rel_id_separator), "related_table": field.related_table, } for field in table.get_fields(include_subfields=False) if field.get("relation") is not None] # Reverse relations can also be expanded for additional_relation in table.additional_relations: related_table = additional_relation.related_table expands.append({ "id": additional_relation.id, "camel_name": toCamelCase(additional_relation.id), "snake_name": to_snake_case(additional_relation.id), "relation_id": additional_relation.relation.replace(":", rel_id_separator), "target_doc_id": f"{related_table.dataset.id}{rel_id_separator}{related_table.id}", "related_table": related_table, }) return sorted(expands, key=lambda item: item["id"])
def _build_db_viewsets(self): """Initialize viewsets that are linked to Django database models.""" tmp_router = routers.SimpleRouter() generated_models = [] datasets = {} for dataset in Dataset.objects.db_enabled(): # type: Dataset dataset_id = dataset.schema.id # not dataset.name! datasets[dataset_id] = dataset new_models = {} for model in dataset.create_models(): logger.debug("Created model %s.%s", dataset_id, model.__name__) # Register model in Django apps under Datasets application name, # because django requires fully set up app for model discovery to work. register_model(dataset, model) if dataset.enable_api: new_models[model._meta.model_name] = model self.all_models[dataset_id] = new_models generated_models.extend(new_models.values()) # Generate views now that all models have been created. # This makes sure the 'to' field is resolved to an actual model class. for app_label, models_by_name in self.all_models.items(): for model in models_by_name.values(): if model.has_parent_table(): # Do not create separate viewsets for nested tables. continue dataset_id = model.get_dataset_id() dataset = datasets[dataset_id] # Determine the URL prefix for the model url_prefix = self.make_url(dataset.url_prefix, dataset_id, model.get_table_id()) logger.debug("Created viewset %s", url_prefix) viewset = viewset_factory(model) table_id = to_snake_case(model.get_table_id()) tmp_router.register( prefix=url_prefix, viewset=viewset, basename=f"{dataset_id}-{table_id}", ) return tmp_router.registry, generated_models
def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) ds = get_object_or_404(Dataset.objects.api_enabled().db_enabled(), name=kwargs["dataset_name"]) geo_tables = sorted( to_snake_case(table.name) for table in ds.schema.tables if any(field.is_geo for field in table.fields)) if len(geo_tables) == 0: raise Http404("Dataset does not support MVT") from None context["name"] = ds.name context["tables"] = geo_tables context["schema"] = ds.schema return context
def get_ordering(self, request, queryset, view): if self.ordering_param not in request.query_params: # Allow DSO 1.0 Dutch "sorteer" parameter # Can adjust 'self' as this instance is recreated each request. if "sorteer" in request.query_params: self.ordering_param = "sorteer" ordering = super().get_ordering(request, queryset, view) if ordering is None: return ordering # convert to snake_case, preserving `-` if needed correct_ordering = [ "-".join([to_snake_case(y) for y in x.split("-")]) for x in ordering ] return correct_ordering
def _get_existing_tables(self, pg_hook: PostgresHook, tables: List, pg_schema: str = "public") -> Dict[str, Any]: """Looks up the table name in schema (provenance can contain the orginal (real) name) and relates them to existing table in database Args: pg_hook: Postgres connection tables: list of table names of type string pg_schema: name of database schema where table are located Return: dictionary of tables as objects """ if not tables: return {} if self.subset_tables: tables = [ table for table in tables if table["id"] in self.subset_tables ] table_lookup = {} for table in tables: real_tablename = table.get( "provenance", self.prefix_table_name + table.id + self.postfix_table_name, ) table_lookup[to_snake_case(real_tablename)] = table snaked_tablenames_str = self._snake_tablenames(table_lookup.keys()) rows = pg_hook.get_records(f""" SELECT tablename FROM pg_tables WHERE schemaname = '{pg_schema}' AND tablename IN ({snaked_tablenames_str}) """) return { row["tablename"]: table_lookup[row["tablename"]] for row in rows }
def _get_existing_tables(self, pg_hook, tables, pg_schema="public"): if not tables: return [] table_lookup = {} for table in tables: real_tablename = table.get( "provenance", self.prefix_table_name + table.id + self.postfix_table_name, ) table_lookup[to_snake_case(real_tablename)] = table snaked_tablenames_str = self._snake_tablenames(table_lookup.keys()) rows = pg_hook.get_records(f""" SELECT tablename FROM pg_tables WHERE schemaname = '{pg_schema}' AND tablename IN ({snaked_tablenames_str}) """) return { row["tablename"]: table_lookup[row["tablename"]] for row in rows }
def _get_table_context(table: DatasetTableSchema, paths: dict[str, str]): """Collect all table data for the REST API spec.""" uri = _get_table_uri(table, paths) table_fields = list(table.get_fields(include_subfields=False)) fields = _get_fields(table_fields) filters = _get_filters(table_fields) return { "title": to_snake_case(table.id).replace("_", " ").capitalize(), "doc_id": f"{table.dataset.id}:{table.id}", "uri": uri, "rest_csv": f"{uri}?_format=csv", "rest_geojson": f"{uri}?_format=geojson", "description": table.get("description"), "fields": [_get_field_context(field) for field in fields], "filters": filters, "auth": table.auth | table.dataset.auth, "expands": _get_table_expands(table), "additional_filters": table.additional_filters, "source": table, "has_geometry": _has_geometry(table), }
def render_wfs_dataset_docs(dataset: DatasetSchema, paths: dict[str, str]): """Render the docs for the WFS dataset.""" snake_name = to_snake_case(dataset.id) dataset_path = paths[dataset.id] main_title = dataset.title or snake_name.replace("_", " ").capitalize() tables = [_get_feature_type_context(t, paths) for t in dataset.tables] if all(not t["has_geometry"] for t in tables): return None render_template( "wfs-datasets/dataset.rst.j2", f"wfs-datasets/{dataset_path}.rst", { "schema": dataset, "schema_name": snake_name, "main_title": main_title, "tables": tables, "wfs_url": f"{BASE_URL}/v1/wfs/{dataset_path}/", }, ) return dataset_path
def convert_field_name(self, field_name): if "." in field_name: return "__".join([ self.convert_field_name(part) for part in field_name.split(".") ]) return to_snake_case(field_name)
def _get_table_uri(table: DatasetTableSchema, paths: dict[str, str]) -> str: """Tell where the endpoint of a table will be""" dataset_path = paths[table.dataset.id] snake_id = to_snake_case(table.id) return f"{BASE_URL}/v1/{dataset_path}/{snake_id}/"
def execute(self, context: Optional[Dict[str, Any]] = None) -> None: # NoQA C901 """translates table, column and index names based on provenance specification in schema Args: context: When this operator is created the context parameter is used to refer to get_template_context for more context as part of inheritance of the BaseOperator. It is set to None in this case. Executes: SQL alter statements to change database table names, columns and or indexes """ dataset = schema_def_from_url(SCHEMA_URL, self.dataset_name) pg_hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) sqls = [] existing_tables_lookup = self._get_existing_tables( pg_hook, dataset.tables, pg_schema=self.pg_schema) snaked_tablenames = existing_tables_lookup.keys() existing_columns = self._get_existing_columns(pg_hook, snaked_tablenames, pg_schema=self.pg_schema) if self.rename_indexes: for table_name, index_names in self._get_existing_indexes( pg_hook, snaked_tablenames, pg_schema=self.pg_schema).items(): if table_name not in existing_tables_lookup: continue for index_name in index_names: new_table_name = existing_tables_lookup[table_name].id new_index_name = index_name.replace( table_name, to_snake_case(f"{dataset.id}_{new_table_name}")) if index_name != new_index_name: sqls.append( f"""ALTER INDEX {self.pg_schema}.{index_name} RENAME TO {new_index_name}""") for snaked_tablename, table in existing_tables_lookup.items(): for field in table.fields: provenance = field.get("provenance") if provenance is not None: snaked_field_name = to_snake_case(field.name) if "relation" in field: snaked_field_name += "_id" if provenance.lower( ) in existing_columns[snaked_tablename]: # quotes are applied on the provenance name in case the # source uses a space in the name sqls.append( f"""ALTER TABLE {self.pg_schema}.{snaked_tablename} RENAME COLUMN "{provenance}" TO {snaked_field_name}""" ) provenance = table.get("provenance") if provenance is not None: sqls.append( f"""ALTER TABLE IF EXISTS {self.pg_schema}.{snaked_tablename} RENAME TO {to_snake_case(table.id)}""") pg_hook.run(sqls)