Esempio n. 1
0
    def get_schema_metadata_for_custom_sql(
            self, columns: List[dict]) -> Optional[SchemaMetadata]:
        schema_metadata = None
        for field in columns:
            # Datasource fields
            fields = []
            nativeDataType = field.get("remoteType", "UNKNOWN")
            TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass)
            schema_field = SchemaField(
                fieldPath=field.get("name", ""),
                type=SchemaFieldDataType(type=TypeClass()),
                nativeDataType=nativeDataType,
                description=field.get("description", ""),
            )
            fields.append(schema_field)

            schema_metadata = SchemaMetadata(
                schemaName="test",
                platform=f"urn:li:dataPlatform:{self.platform}",
                version=0,
                fields=fields,
                hash="",
                platformSchema=OtherSchema(rawSchema=""),
            )
        return schema_metadata
Esempio n. 2
0
 def _get_schema(self, looker_view: LookerView) -> SchemaMetadataClass:
     fields, primary_keys = self._get_fields_and_primary_keys(looker_view)
     schema_metadata = SchemaMetadata(
         schemaName=looker_view.view_name,
         platform=f"urn:li:dataPlatform:{self.source_config.platform_name}",
         version=0,
         fields=fields,
         primaryKeys=primary_keys,
         hash="",
         platformSchema=OtherSchema(rawSchema="looker-view"),
     )
     return schema_metadata
Esempio n. 3
0
    def emit_upstream_tables(self) -> Iterable[MetadataWorkUnit]:
        for (table_urn, (columns, path, is_embedded)) in self.upstream_tables.items():
            if not is_embedded and not self.config.ingest_tables_external:
                logger.error(
                    f"Skipping external table {table_urn} as ingest_tables_external is set to False"
                )
                continue

            dataset_snapshot = DatasetSnapshot(
                urn=table_urn,
                aspects=[],
            )
            if path:
                # Browse path
                browse_paths = BrowsePathsClass(
                    paths=[f"/{self.config.env.lower()}/{self.platform}/{path}"]
                )
                dataset_snapshot.aspects.append(browse_paths)
            else:
                logger.debug(f"Browse path not set for table {table_urn}")
            schema_metadata = None
            if columns:
                fields = []
                for field in columns:
                    nativeDataType = field.get("remoteType", "UNKNOWN")
                    TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass)

                    schema_field = SchemaField(
                        fieldPath=field["name"],
                        type=SchemaFieldDataType(type=TypeClass()),
                        description="",
                        nativeDataType=nativeDataType,
                    )

                    fields.append(schema_field)

                schema_metadata = SchemaMetadata(
                    schemaName="test",
                    platform=f"urn:li:dataPlatform:{self.platform}",
                    version=0,
                    fields=fields,
                    hash="",
                    platformSchema=OtherSchema(rawSchema=""),
                )
            if schema_metadata is not None:
                dataset_snapshot.aspects.append(schema_metadata)

            yield self.get_metadata_change_event(dataset_snapshot)
Esempio n. 4
0
 def _get_schema(self, looker_view: LookerView, actor: str,
                 sys_time: int) -> SchemaMetadataClass:
     fields, primary_keys = self._get_fields_and_primary_keys(looker_view)
     stamp = AuditStamp(time=sys_time, actor=actor)
     schema_metadata = SchemaMetadata(
         schemaName=looker_view.view_name,
         platform=f"urn:li:dataPlatform:{self.source_config.platform_name}",
         version=0,
         fields=fields,
         primaryKeys=primary_keys,
         created=stamp,
         lastModified=stamp,
         hash="",
         platformSchema=OtherSchema(rawSchema="looker-view"),
     )
     return schema_metadata
Esempio n. 5
0
 def _get_schema(
     platform_name: str,
     schema_name: str,
     view_fields: List[ViewField],
     reporter: SourceReport,
 ) -> SchemaMetadataClass:
     fields, primary_keys = LookerUtil._get_fields_and_primary_keys(
         view_fields=view_fields, reporter=reporter
     )
     schema_metadata = SchemaMetadata(
         schemaName=schema_name,
         platform=f"urn:li:dataPlatform:{platform_name}",
         version=0,
         fields=fields,
         primaryKeys=primary_keys,
         hash="",
         platformSchema=OtherSchema(rawSchema=""),
     )
     return schema_metadata
Esempio n. 6
0
    def _get_schema_metadata_for_datasource(
        self, datasource_fields: List[dict]
    ) -> Optional[SchemaMetadata]:
        fields = []
        schema_metadata = None
        for field in datasource_fields:
            # check datasource - custom sql relations from a field being referenced
            self._track_custom_sql_ids(field)

            nativeDataType = field.get("dataType", "UNKNOWN")
            TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass)

            schema_field = SchemaField(
                fieldPath=field["name"],
                type=SchemaFieldDataType(type=TypeClass()),
                description=make_description_from_params(
                    field.get("description", ""), field.get("formula")
                ),
                nativeDataType=nativeDataType,
                globalTags=get_tags_from_params(
                    [
                        field.get("role", ""),
                        field.get("__typename", ""),
                        field.get("aggregation", ""),
                    ]
                )
                if self.config.ingest_tags
                else None,
            )
            fields.append(schema_field)

        if fields:
            schema_metadata = SchemaMetadata(
                schemaName="test",
                platform=f"urn:li:dataPlatform:{self.platform}",
                version=0,
                fields=fields,
                hash="",
                platformSchema=OtherSchema(rawSchema=""),
            )

        return schema_metadata
Esempio n. 7
0
    def emit_upstream_tables(self) -> Iterable[MetadataWorkUnit]:
        for (table_urn, (columns, path)) in self.upstream_tables.items():
            dataset_snapshot = DatasetSnapshot(
                urn=table_urn,
                aspects=[],
            )
            # Browse path
            browse_paths = BrowsePathsClass(
                paths=[f"/{self.config.env.lower()}/{self.platform}/{path}"])
            dataset_snapshot.aspects.append(browse_paths)

            fields = []
            for field in columns:
                nativeDataType = field.get("remoteType", "UNKNOWN")
                TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType,
                                                   NullTypeClass)

                schema_field = SchemaField(
                    fieldPath=field["name"],
                    type=SchemaFieldDataType(type=TypeClass()),
                    description="",
                    nativeDataType=nativeDataType,
                )

                fields.append(schema_field)

            schema_metadata = SchemaMetadata(
                schemaName="test",
                platform=f"urn:li:dataPlatform:{self.platform}",
                version=0,
                fields=fields,
                hash="",
                platformSchema=OtherSchema(rawSchema=""),
            )
            if schema_metadata is not None:
                dataset_snapshot.aspects.append(schema_metadata)

            yield self.get_metadata_change_event(dataset_snapshot)