def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None, repository_name=None, namespace_name=None, filter_kinds=None): if filter_kinds is not None: assert all( isinstance(kind_name, str) for kind_name in filter_kinds) if end_datetime - start_datetime >= timedelta(weeks=4): raise Exception( 'Cannot lookup aggregated logs over a period longer than a month' ) repository = None if repository_name and namespace_name: repository = model.repository.get_repository( namespace_name, repository_name) performer = None if performer_name: performer = model.user.get_user(performer_name) entries = {} for log_model in LOG_MODELS: aggregated = model.log.get_aggregated_logs( start_datetime, end_datetime, performer=performer, repository=repository, namespace=namespace_name, ignore=filter_kinds, model=log_model) for entry in aggregated: synthetic_date = datetime(start_datetime.year, start_datetime.month, int(entry.day), tzinfo=get_localzone()) if synthetic_date.day < start_datetime.day: synthetic_date = synthetic_date + relativedelta(months=1) key = '%s-%s' % (entry.kind_id, entry.day) if key in entries: entries[key] = AggregatedLogCount( entry.kind_id, entry.count + entries[key].count, synthetic_date) else: entries[key] = AggregatedLogCount(entry.kind_id, entry.count, synthetic_date) return entries.values()
def _merge_aggregated_log_counts(*args): """ Merge two lists of AggregatedLogCount based on the value of their kind_id and datetime. """ matching_keys = {} aggregated_log_counts_list = itertools.chain.from_iterable(args) def canonical_key_from_kind_date_tuple(kind_id, dt): """ Return a comma separated key from an AggregatedLogCount's kind_id and datetime. """ return str(kind_id) + "," + str(dt) for kind_id, count, dt in aggregated_log_counts_list: kind_date_key = canonical_key_from_kind_date_tuple(kind_id, dt) if kind_date_key in matching_keys: existing_count = matching_keys[kind_date_key][2] matching_keys[kind_date_key] = (kind_id, dt, existing_count + count) else: matching_keys[kind_date_key] = (kind_id, dt, count) return [ AggregatedLogCount(kind_id, count, dt) for (kind_id, dt, count) in list(matching_keys.values()) ]
def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None, repository_name=None, namespace_name=None, filter_kinds=None): entries = {} for log_and_repo in self._filter_logs(start_datetime, end_datetime, performer_name, repository_name, namespace_name, filter_kinds): entry = log_and_repo.log synthetic_date = datetime(start_datetime.year, start_datetime.month, int(entry.datetime.day), tzinfo=get_localzone()) if synthetic_date.day < start_datetime.day: synthetic_date = synthetic_date + relativedelta(months=1) key = '%s-%s' % (entry.kind_id, entry.datetime.day) if key in entries: entries[key] = AggregatedLogCount(entry.kind_id, entries[key].count + 1, synthetic_date) else: entries[key] = AggregatedLogCount(entry.kind_id, 1, synthetic_date) return entries.values()
def get_aggregated_log_counts( self, start_datetime, end_datetime, performer_name=None, repository_name=None, namespace_name=None, filter_kinds=None, ): if end_datetime - start_datetime >= timedelta(days=DATE_RANGE_LIMIT): raise Exception( "Cannot lookup aggregated logs over a period longer than a month" ) repository_id, account_id, performer_id = DocumentLogsModel._get_ids_by_names( repository_name, namespace_name, performer_name) with CloseForLongOperation(config.app_config): search = self._base_query_date_range(start_datetime, end_datetime, performer_id, repository_id, account_id, filter_kinds) search.aggs.bucket("by_id", "terms", field="kind_id").bucket("by_date", "date_histogram", field="datetime", interval="day") # es returns all buckets when size=0 search = search.extra(size=0) resp = search.execute() if not resp.aggregations: return [] counts = [] by_id = resp.aggregations["by_id"] for id_bucket in by_id.buckets: for date_bucket in id_bucket.by_date.buckets: if date_bucket.doc_count > 0: counts.append( AggregatedLogCount(id_bucket.key, date_bucket.doc_count, date_bucket.key)) return counts
day = datetime.today() - timedelta(minutes=60) simple_repo = model.repository.get_repository("devtable", "simple") count = logs_model.count_repository_actions(simple_repo, day) assert count == 0 org_repo = model.repository.get_repository("buynlarge", "orgrepo") count = logs_model.count_repository_actions(org_repo, day) assert count == 3 @pytest.mark.parametrize( "aggregated_log_counts1, aggregated_log_counts2, expected_result", [ pytest.param( [ AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1 AggregatedLogCount(1, 3, datetime(2019, 6, 7, 0, 0)), # 2 ], [ AggregatedLogCount(1, 5, datetime(2019, 6, 6, 0, 0)), # 1 AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2 AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)), # 3 ], [ AggregatedLogCount(1, 8, datetime(2019, 6, 6, 0, 0)), # 1 AggregatedLogCount(1, 10, datetime(2019, 6, 7, 0, 0)), # 2 AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)), # 3 ], ), pytest.param( [AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)),], # 1
}, "aggs": { "by_date": { "date_histogram": { "field": "datetime", "interval": "day" } } }, } }, "size": 0, } AGGS_COUNT = [ AggregatedLogCount(1, 1, parse("2009-11-15T00:00:00.000")), AggregatedLogCount(2, 1, parse("2009-11-12T00:00:00.000")), AggregatedLogCount(2, 2, parse("2009-11-14T00:00:00.000")), ] COUNT_REQUEST = { "query": { "bool": { "filter": [{ "term": { "repository_id": 1 } }] } } }