def get_usage_aspects_from_urn( self, entity_urn: str, start_timestamp: int, end_timestamp: int ) -> Optional[List[DatasetUsageStatisticsClass]]: payload = { "urn": entity_urn, "entity": "dataset", "aspect": "datasetUsageStatistics", "startTimeMillis": start_timestamp, "endTimeMillis": end_timestamp, } headers: Dict[str, Any] = {} url = f"{self._gms_server}/aspects?action=getTimeseriesAspectValues" try: usage_aspects: List[DatasetUsageStatisticsClass] = [] response = self._session.post( url, data=json.dumps(payload), headers=headers ) if response.status_code != 200: logger.debug( f"Non 200 status found while fetching usage aspects - {response.status_code}" ) return None json_resp = response.json() all_aspects = json_resp.get("value", {}).get("values", []) for aspect in all_aspects: if aspect.get("aspect") and aspect.get("aspect").get("value"): usage_aspects.append( DatasetUsageStatisticsClass.from_obj( json.loads(aspect.get("aspect").get("value")), tuples=True ) ) return usage_aspects except Exception as e: logger.error("Error while getting usage aspects.", e) return None
def make_usage_workunit( self, bucket_duration: BucketDuration, urn_builder: Callable[[ResourceType], str], top_n_queries: int, format_sql_queries: bool, ) -> MetadataWorkUnit: budget_per_query: int = int(self.total_budget_for_query_list / top_n_queries) usageStats = DatasetUsageStatisticsClass( timestampMillis=int(self.bucket_start_time.timestamp() * 1000), eventGranularity=TimeWindowSizeClass(unit=bucket_duration, multiple=1), uniqueUserCount=len(self.userFreq), totalSqlQueries=self.queryCount, topSqlQueries=[ self.trim_query( format_sql_query(query, keyword_case="upper", reindent_aligned=True) if format_sql_queries else query, budget_per_query, ) for query, _ in self.queryFreq.most_common(top_n_queries) ], userCounts=[ DatasetUserUsageCountsClass( user=builder.make_user_urn(user_email.split("@")[0]), count=count, userEmail=user_email, ) for user_email, count in self.userFreq.most_common() ], fieldCounts=[ DatasetFieldUsageCountsClass( fieldPath=column, count=count, ) for column, count in self.columnFreq.most_common() ], ) mcp = MetadataChangeProposalWrapper( entityType="dataset", aspectName="datasetUsageStatistics", changeType=ChangeTypeClass.UPSERT, entityUrn=urn_builder(self.resource), aspect=usageStats, ) return MetadataWorkUnit( id=f"{self.bucket_start_time.isoformat()}-{self.resource}", mcp=mcp )