Beispiel #1
0
def get_repository_action_counts(repo, start_date):
    """ Returns the daily aggregated action counts for the given repository, starting at the given
      start date.
  """
    return RepositoryActionCount.select().where(
        RepositoryActionCount.repository == repo,
        RepositoryActionCount.date >= start_date)
def update_repository_score(repo):
    """
    Updates the repository score entry for the given table by retrieving information from the
    RepositoryActionCount table.

    Note that count_repository_actions for the repo should be called first. Returns True if the row
    was updated and False otherwise.
    """
    today = date.today()

    # Retrieve the counts for each bucket and calculate the final score.
    final_score = 0.0
    last_end_timedelta = timedelta(days=0)

    for bucket in SEARCH_BUCKETS:
        start_date = today - bucket.delta
        end_date = today - last_end_timedelta
        last_end_timedelta = bucket.delta

        query = RepositoryActionCount.select(
            fn.Sum(RepositoryActionCount.count), fn.Count(RepositoryActionCount.id)
        ).where(
            RepositoryActionCount.date >= start_date,
            RepositoryActionCount.date < end_date,
            RepositoryActionCount.repository == repo,
        )

        bucket_tuple = query.tuples()[0]
        logger.debug(
            "Got bucket tuple %s for bucket %s for repository %s", bucket_tuple, bucket, repo.id
        )

        if bucket_tuple[0] is None:
            continue

        bucket_sum = float(bucket_tuple[0])
        bucket_count = int(bucket_tuple[1])
        if not bucket_count:
            continue

        bucket_score = bucket_sum / (bucket_count * 1.0)
        final_score += bucket_score * bucket.weight

    # Update the existing repo search score row or create a new one.
    normalized_score = int(final_score * 100.0)
    try:
        try:
            search_score_row = RepositorySearchScore.get(repository=repo)
            search_score_row.last_updated = datetime.now()
            search_score_row.score = normalized_score
            search_score_row.save()
            return True
        except RepositorySearchScore.DoesNotExist:
            RepositorySearchScore.create(
                repository=repo, score=normalized_score, last_updated=today
            )
            return True
    except IntegrityError:
        logger.debug("RepositorySearchScore row already existed; skipping")
        return False
def delete_expired_entries(repo, limit=50):
    """ Deletes expired entries from the RepositoryActionCount table for a specific repository.
        Returns the number of entries removed.
    """
    threshold_date = datetime.today() - RAC_RETENTION_PERIOD
    found = list(
        RepositoryActionCount.select()
        .where(
            RepositoryActionCount.repository == repo, RepositoryActionCount.date < threshold_date
        )
        .limit(limit)
    )

    if not found:
        return 0

    assert len(found) <= limit

    count_removed = 0
    for entry in found:
        try:
            entry.delete_instance(recursive=False)
            count_removed += 1
        except IntegrityError:
            continue

    return count_removed
Beispiel #4
0
def missing_counts_query(date):
    """ Returns a query to find all Repository's with missing RAC entries for the given date. """
    subquery = (RepositoryActionCount.select(
        RepositoryActionCount.id, RepositoryActionCount.repository).where(
            RepositoryActionCount.date == date).alias("rac"))

    return (Repository.select().join(
        subquery,
        JOIN.LEFT_OUTER,
        on=(Repository.id == subquery.c.repository_id
            )).where(subquery.c.id >> None))
Beispiel #5
0
def find_uncounted_repository():
    """ Returns a repository that has not yet had an entry added into the RepositoryActionCount
      table for yesterday.
  """
    try:
        # Get a random repository to count.
        today = date.today()
        yesterday = today - timedelta(days=1)
        has_yesterday_actions = RepositoryActionCount.select(
            RepositoryActionCount.repository).where(
                RepositoryActionCount.date == yesterday)

        to_count = (Repository.select().where(
            ~(Repository.id << (has_yesterday_actions))).order_by(
                db_random_func()).get())
        return to_count
    except Repository.DoesNotExist:
        return None
Beispiel #6
0
def get_repositories_action_sums(repository_ids):
    """ Returns a map from repository ID to total actions within that repository in the last week. """
    if not repository_ids:
        return {}

    # Filter the join to recent entries only.
    last_week = datetime.now() - timedelta(weeks=1)
    tuples = (RepositoryActionCount.select(
        RepositoryActionCount.repository,
        fn.Sum(RepositoryActionCount.count)).where(
            RepositoryActionCount.repository << repository_ids).where(
                RepositoryActionCount.date >= last_week).group_by(
                    RepositoryActionCount.repository).tuples())

    action_count_map = {}
    for record in tuples:
        action_count_map[record[0]] = record[1]

    return action_count_map
Beispiel #7
0
def found_entry_count(day):
    """
    Returns the number of entries for the given day in the RAC table.
    """
    return RepositoryActionCount.select().where(
        RepositoryActionCount.date == day).count()