Beispiel #1
0
    def get_aggregated_log_counts(self,
                                  start_datetime,
                                  end_datetime,
                                  performer_name=None,
                                  repository_name=None,
                                  namespace_name=None,
                                  filter_kinds=None):
        if filter_kinds is not None:
            assert all(
                isinstance(kind_name, str) for kind_name in filter_kinds)

        if end_datetime - start_datetime >= timedelta(weeks=4):
            raise Exception(
                'Cannot lookup aggregated logs over a period longer than a month'
            )

        repository = None
        if repository_name and namespace_name:
            repository = model.repository.get_repository(
                namespace_name, repository_name)

        performer = None
        if performer_name:
            performer = model.user.get_user(performer_name)

        entries = {}
        for log_model in LOG_MODELS:
            aggregated = model.log.get_aggregated_logs(
                start_datetime,
                end_datetime,
                performer=performer,
                repository=repository,
                namespace=namespace_name,
                ignore=filter_kinds,
                model=log_model)

            for entry in aggregated:
                synthetic_date = datetime(start_datetime.year,
                                          start_datetime.month,
                                          int(entry.day),
                                          tzinfo=get_localzone())
                if synthetic_date.day < start_datetime.day:
                    synthetic_date = synthetic_date + relativedelta(months=1)

                key = '%s-%s' % (entry.kind_id, entry.day)

                if key in entries:
                    entries[key] = AggregatedLogCount(
                        entry.kind_id, entry.count + entries[key].count,
                        synthetic_date)
                else:
                    entries[key] = AggregatedLogCount(entry.kind_id,
                                                      entry.count,
                                                      synthetic_date)

        return entries.values()
Beispiel #2
0
def _merge_aggregated_log_counts(*args):
    """
    Merge two lists of AggregatedLogCount based on the value of their kind_id and datetime.
    """
    matching_keys = {}
    aggregated_log_counts_list = itertools.chain.from_iterable(args)

    def canonical_key_from_kind_date_tuple(kind_id, dt):
        """
        Return a comma separated key from an AggregatedLogCount's kind_id and datetime.
        """
        return str(kind_id) + "," + str(dt)

    for kind_id, count, dt in aggregated_log_counts_list:
        kind_date_key = canonical_key_from_kind_date_tuple(kind_id, dt)
        if kind_date_key in matching_keys:
            existing_count = matching_keys[kind_date_key][2]
            matching_keys[kind_date_key] = (kind_id, dt, existing_count + count)
        else:
            matching_keys[kind_date_key] = (kind_id, dt, count)

    return [
        AggregatedLogCount(kind_id, count, dt)
        for (kind_id, dt, count) in list(matching_keys.values())
    ]
Beispiel #3
0
  def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None,
                                repository_name=None, namespace_name=None, filter_kinds=None):
    entries = {}
    for log_and_repo in self._filter_logs(start_datetime, end_datetime, performer_name,
                                          repository_name, namespace_name, filter_kinds):
      entry = log_and_repo.log
      synthetic_date = datetime(start_datetime.year, start_datetime.month, int(entry.datetime.day),
                                tzinfo=get_localzone())
      if synthetic_date.day < start_datetime.day:
        synthetic_date = synthetic_date + relativedelta(months=1)

      key = '%s-%s' % (entry.kind_id, entry.datetime.day)

      if key in entries:
        entries[key] = AggregatedLogCount(entry.kind_id, entries[key].count + 1,
                                          synthetic_date)
      else:
        entries[key] = AggregatedLogCount(entry.kind_id, 1, synthetic_date)

    return entries.values()
    def get_aggregated_log_counts(
        self,
        start_datetime,
        end_datetime,
        performer_name=None,
        repository_name=None,
        namespace_name=None,
        filter_kinds=None,
    ):
        if end_datetime - start_datetime >= timedelta(days=DATE_RANGE_LIMIT):
            raise Exception(
                "Cannot lookup aggregated logs over a period longer than a month"
            )

        repository_id, account_id, performer_id = DocumentLogsModel._get_ids_by_names(
            repository_name, namespace_name, performer_name)

        with CloseForLongOperation(config.app_config):
            search = self._base_query_date_range(start_datetime, end_datetime,
                                                 performer_id, repository_id,
                                                 account_id, filter_kinds)
            search.aggs.bucket("by_id", "terms",
                               field="kind_id").bucket("by_date",
                                                       "date_histogram",
                                                       field="datetime",
                                                       interval="day")
            # es returns all buckets when size=0
            search = search.extra(size=0)
            resp = search.execute()

        if not resp.aggregations:
            return []

        counts = []
        by_id = resp.aggregations["by_id"]

        for id_bucket in by_id.buckets:
            for date_bucket in id_bucket.by_date.buckets:
                if date_bucket.doc_count > 0:
                    counts.append(
                        AggregatedLogCount(id_bucket.key,
                                           date_bucket.doc_count,
                                           date_bucket.key))

        return counts
Beispiel #5
0
    day = datetime.today() - timedelta(minutes=60)
    simple_repo = model.repository.get_repository("devtable", "simple")
    count = logs_model.count_repository_actions(simple_repo, day)
    assert count == 0

    org_repo = model.repository.get_repository("buynlarge", "orgrepo")
    count = logs_model.count_repository_actions(org_repo, day)
    assert count == 3


@pytest.mark.parametrize(
    "aggregated_log_counts1, aggregated_log_counts2, expected_result",
    [
        pytest.param(
            [
                AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)),  # 1
                AggregatedLogCount(1, 3, datetime(2019, 6, 7, 0, 0)),  # 2
            ],
            [
                AggregatedLogCount(1, 5, datetime(2019, 6, 6, 0, 0)),  # 1
                AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)),  # 2
                AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)),  # 3
            ],
            [
                AggregatedLogCount(1, 8, datetime(2019, 6, 6, 0, 0)),  # 1
                AggregatedLogCount(1, 10, datetime(2019, 6, 7, 0, 0)),  # 2
                AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)),  # 3
            ],
        ),
        pytest.param(
            [AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)),],  # 1
Beispiel #6
0
            },
            "aggs": {
                "by_date": {
                    "date_histogram": {
                        "field": "datetime",
                        "interval": "day"
                    }
                }
            },
        }
    },
    "size": 0,
}

AGGS_COUNT = [
    AggregatedLogCount(1, 1, parse("2009-11-15T00:00:00.000")),
    AggregatedLogCount(2, 1, parse("2009-11-12T00:00:00.000")),
    AggregatedLogCount(2, 2, parse("2009-11-14T00:00:00.000")),
]

COUNT_REQUEST = {
    "query": {
        "bool": {
            "filter": [{
                "term": {
                    "repository_id": 1
                }
            }]
        }
    }
}