def get_schema_metadata_for_custom_sql( self, columns: List[dict]) -> Optional[SchemaMetadata]: schema_metadata = None for field in columns: # Datasource fields fields = [] nativeDataType = field.get("remoteType", "UNKNOWN") TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass) schema_field = SchemaField( fieldPath=field.get("name", ""), type=SchemaFieldDataType(type=TypeClass()), nativeDataType=nativeDataType, description=field.get("description", ""), ) fields.append(schema_field) schema_metadata = SchemaMetadata( schemaName="test", platform=f"urn:li:dataPlatform:{self.platform}", version=0, fields=fields, hash="", platformSchema=OtherSchema(rawSchema=""), ) return schema_metadata
def _get_schema(self, looker_view: LookerView) -> SchemaMetadataClass: fields, primary_keys = self._get_fields_and_primary_keys(looker_view) schema_metadata = SchemaMetadata( schemaName=looker_view.view_name, platform=f"urn:li:dataPlatform:{self.source_config.platform_name}", version=0, fields=fields, primaryKeys=primary_keys, hash="", platformSchema=OtherSchema(rawSchema="looker-view"), ) return schema_metadata
def emit_upstream_tables(self) -> Iterable[MetadataWorkUnit]: for (table_urn, (columns, path, is_embedded)) in self.upstream_tables.items(): if not is_embedded and not self.config.ingest_tables_external: logger.error( f"Skipping external table {table_urn} as ingest_tables_external is set to False" ) continue dataset_snapshot = DatasetSnapshot( urn=table_urn, aspects=[], ) if path: # Browse path browse_paths = BrowsePathsClass( paths=[f"/{self.config.env.lower()}/{self.platform}/{path}"] ) dataset_snapshot.aspects.append(browse_paths) else: logger.debug(f"Browse path not set for table {table_urn}") schema_metadata = None if columns: fields = [] for field in columns: nativeDataType = field.get("remoteType", "UNKNOWN") TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass) schema_field = SchemaField( fieldPath=field["name"], type=SchemaFieldDataType(type=TypeClass()), description="", nativeDataType=nativeDataType, ) fields.append(schema_field) schema_metadata = SchemaMetadata( schemaName="test", platform=f"urn:li:dataPlatform:{self.platform}", version=0, fields=fields, hash="", platformSchema=OtherSchema(rawSchema=""), ) if schema_metadata is not None: dataset_snapshot.aspects.append(schema_metadata) yield self.get_metadata_change_event(dataset_snapshot)
def _get_schema(self, looker_view: LookerView, actor: str, sys_time: int) -> SchemaMetadataClass: fields, primary_keys = self._get_fields_and_primary_keys(looker_view) stamp = AuditStamp(time=sys_time, actor=actor) schema_metadata = SchemaMetadata( schemaName=looker_view.view_name, platform=f"urn:li:dataPlatform:{self.source_config.platform_name}", version=0, fields=fields, primaryKeys=primary_keys, created=stamp, lastModified=stamp, hash="", platformSchema=OtherSchema(rawSchema="looker-view"), ) return schema_metadata
def _get_schema( platform_name: str, schema_name: str, view_fields: List[ViewField], reporter: SourceReport, ) -> SchemaMetadataClass: fields, primary_keys = LookerUtil._get_fields_and_primary_keys( view_fields=view_fields, reporter=reporter ) schema_metadata = SchemaMetadata( schemaName=schema_name, platform=f"urn:li:dataPlatform:{platform_name}", version=0, fields=fields, primaryKeys=primary_keys, hash="", platformSchema=OtherSchema(rawSchema=""), ) return schema_metadata
def _get_schema_metadata_for_datasource( self, datasource_fields: List[dict] ) -> Optional[SchemaMetadata]: fields = [] schema_metadata = None for field in datasource_fields: # check datasource - custom sql relations from a field being referenced self._track_custom_sql_ids(field) nativeDataType = field.get("dataType", "UNKNOWN") TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass) schema_field = SchemaField( fieldPath=field["name"], type=SchemaFieldDataType(type=TypeClass()), description=make_description_from_params( field.get("description", ""), field.get("formula") ), nativeDataType=nativeDataType, globalTags=get_tags_from_params( [ field.get("role", ""), field.get("__typename", ""), field.get("aggregation", ""), ] ) if self.config.ingest_tags else None, ) fields.append(schema_field) if fields: schema_metadata = SchemaMetadata( schemaName="test", platform=f"urn:li:dataPlatform:{self.platform}", version=0, fields=fields, hash="", platformSchema=OtherSchema(rawSchema=""), ) return schema_metadata
def emit_upstream_tables(self) -> Iterable[MetadataWorkUnit]: for (table_urn, (columns, path)) in self.upstream_tables.items(): dataset_snapshot = DatasetSnapshot( urn=table_urn, aspects=[], ) # Browse path browse_paths = BrowsePathsClass( paths=[f"/{self.config.env.lower()}/{self.platform}/{path}"]) dataset_snapshot.aspects.append(browse_paths) fields = [] for field in columns: nativeDataType = field.get("remoteType", "UNKNOWN") TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass) schema_field = SchemaField( fieldPath=field["name"], type=SchemaFieldDataType(type=TypeClass()), description="", nativeDataType=nativeDataType, ) fields.append(schema_field) schema_metadata = SchemaMetadata( schemaName="test", platform=f"urn:li:dataPlatform:{self.platform}", version=0, fields=fields, hash="", platformSchema=OtherSchema(rawSchema=""), ) if schema_metadata is not None: dataset_snapshot.aspects.append(schema_metadata) yield self.get_metadata_change_event(dataset_snapshot)