def _aggregate_operation_aspect_events( self, events: List[RedshiftJoinedAccessEvent], operation_type: Union[str, "OperationTypeClass"], ) -> Iterable[MetadataWorkUnit]: for event in events: if (event.database and event.usename and event.schema_ and event.table and event.endtime): resource = f"{event.database}.{event.schema_}.{event.table}" last_updated_timestamp: int = int(event.endtime.timestamp() * 1000) user_email = event.usename operation_aspect = OperationClass( timestampMillis=last_updated_timestamp, lastUpdatedTimestamp=last_updated_timestamp, actor=builder.make_user_urn(user_email.split("@")[0]), operationType=operation_type, ) mcp = MetadataChangeProposalWrapper( entityType="dataset", aspectName="operation", changeType=ChangeTypeClass.UPSERT, entityUrn=builder.make_dataset_urn("redshift", resource.lower(), self.config.env), aspect=operation_aspect, ) wu = MetadataWorkUnit( id= f"operation-aspect-{event.table}-{event.endtime.isoformat()}", mcp=mcp, ) yield wu
def _get_operation_aspect_work_units( self, events: Iterable[SnowflakeJoinedAccessEvent] ) -> Iterable[MetadataWorkUnit]: for event in events: if event.query_start_time and event.query_type in OPERATION_STATEMENT_TYPES: start_time = event.query_start_time query_type = event.query_type user_email = event.email operation_type = OPERATION_STATEMENT_TYPES[query_type] last_updated_timestamp: int = int(start_time.timestamp() * 1000) user_urn = builder.make_user_urn(user_email.split("@")[0]) for obj in event.base_objects_accessed: resource = obj.objectName dataset_urn = builder.make_dataset_urn( "snowflake", resource.lower(), self.config.env) operation_aspect = OperationClass( timestampMillis=last_updated_timestamp, lastUpdatedTimestamp=last_updated_timestamp, actor=user_urn, operationType=operation_type, ) mcp = MetadataChangeProposalWrapper( entityType="dataset", aspectName="operation", changeType=ChangeTypeClass.UPSERT, entityUrn=dataset_urn, aspect=operation_aspect, ) wu = MetadataWorkUnit( id= f"operation-aspect-{resource}-{start_time.isoformat()}", mcp=mcp, ) yield wu
def _create_operation_aspect_work_unit( self, event: QueryEvent) -> Optional[MetadataWorkUnit]: if event.statementType in OPERATION_STATEMENT_TYPES and event.destinationTable: destination_table: BigQueryTableRef try: destination_table = event.destinationTable.remove_extras() except Exception as e: self.report.report_warning( str(event.destinationTable), f"Failed to clean up destination table, {e}", ) return None reported_time: int = int(time.time() * 1000) last_updated_timestamp: int = int(event.timestamp.timestamp() * 1000) affected_datasets = [] if event.referencedTables: for table in event.referencedTables: try: affected_datasets.append( _table_ref_to_urn( table.remove_extras(), self.config.env, )) except Exception as e: self.report.report_warning( str(table), f"Failed to clean up table, {e}", ) operation_aspect = OperationClass( timestampMillis=reported_time, lastUpdatedTimestamp=last_updated_timestamp, actor=builder.make_user_urn(event.actor_email.split("@")[0]), operationType=OPERATION_STATEMENT_TYPES[event.statementType], affectedDatasets=affected_datasets, ) mcp = MetadataChangeProposalWrapper( entityType="dataset", aspectName="operation", changeType=ChangeTypeClass.UPSERT, entityUrn=_table_ref_to_urn( destination_table, env=self.config.env, ), aspect=operation_aspect, ) return MetadataWorkUnit( id= f"{event.timestamp.isoformat()}-operation-aspect-{destination_table}", mcp=mcp, ) return None
def _gen_operation_aspect_workunits_from_access_events( self, events_iterable: Iterable[RedshiftAccessEvent], ) -> Iterable[MetadataWorkUnit]: self.report.num_operational_stats_workunits_emitted = 0 for event in events_iterable: if not ( event.database and event.username and event.schema_ and event.table and event.endtime and event.operation_type ): continue assert event.operation_type in ["insert", "delete"] resource: str = f"{event.database}.{event.schema_}.{event.table}" reported_time: int = int(time.time() * 1000) last_updated_timestamp: int = int(event.endtime.timestamp() * 1000) user_email: str = event.username operation_aspect = OperationClass( timestampMillis=reported_time, lastUpdatedTimestamp=last_updated_timestamp, actor=builder.make_user_urn(user_email.split("@")[0]), operationType=( OperationTypeClass.INSERT if event.operation_type == "insert" else OperationTypeClass.DELETE ), ) mcp = MetadataChangeProposalWrapper( entityType="dataset", aspectName="operation", changeType=ChangeTypeClass.UPSERT, entityUrn=builder.make_dataset_urn_with_platform_instance( "redshift", resource.lower(), self.config.platform_instance, self.config.env, ), aspect=operation_aspect, ) wu = MetadataWorkUnit( id=f"operation-aspect-{event.table}-{event.endtime.isoformat()}", mcp=mcp, ) self.report.report_workunit(wu) self.report.num_operational_stats_workunits_emitted += 1 yield wu
def _gen_operation_aspect_workunits_by_type_from_access_events( self, events_iterable: Iterable[RedshiftAccessEvent], operation_type: Union[str, "OperationTypeClass"], ) -> Iterable[MetadataWorkUnit]: for event in events_iterable: if not (event.database and event.username and event.schema_ and event.table and event.endtime): continue resource: str = f"{event.database}.{event.schema_}.{event.table}" last_updated_timestamp: int = int(event.endtime.timestamp() * 1000) user_email: str = event.username operation_aspect = OperationClass( timestampMillis=last_updated_timestamp, lastUpdatedTimestamp=last_updated_timestamp, actor=builder.make_user_urn(user_email.split("@")[0]), operationType=operation_type, ) mcp = MetadataChangeProposalWrapper( entityType="dataset", aspectName="operation", changeType=ChangeTypeClass.UPSERT, entityUrn=builder.make_dataset_urn_with_platform_instance( "redshift", resource.lower(), self.config.platform_instance, self.config.env, ), aspect=operation_aspect, ) wu = MetadataWorkUnit( id= f"operation-aspect-{event.table}-{event.endtime.isoformat()}", mcp=mcp, ) self.report.report_workunit(wu) yield wu