def get_repository_action_counts(repo, start_date): """ Returns the daily aggregated action counts for the given repository, starting at the given start date. """ return RepositoryActionCount.select().where( RepositoryActionCount.repository == repo, RepositoryActionCount.date >= start_date)
def update_repository_score(repo): """ Updates the repository score entry for the given table by retrieving information from the RepositoryActionCount table. Note that count_repository_actions for the repo should be called first. Returns True if the row was updated and False otherwise. """ today = date.today() # Retrieve the counts for each bucket and calculate the final score. final_score = 0.0 last_end_timedelta = timedelta(days=0) for bucket in SEARCH_BUCKETS: start_date = today - bucket.delta end_date = today - last_end_timedelta last_end_timedelta = bucket.delta query = RepositoryActionCount.select( fn.Sum(RepositoryActionCount.count), fn.Count(RepositoryActionCount.id) ).where( RepositoryActionCount.date >= start_date, RepositoryActionCount.date < end_date, RepositoryActionCount.repository == repo, ) bucket_tuple = query.tuples()[0] logger.debug( "Got bucket tuple %s for bucket %s for repository %s", bucket_tuple, bucket, repo.id ) if bucket_tuple[0] is None: continue bucket_sum = float(bucket_tuple[0]) bucket_count = int(bucket_tuple[1]) if not bucket_count: continue bucket_score = bucket_sum / (bucket_count * 1.0) final_score += bucket_score * bucket.weight # Update the existing repo search score row or create a new one. normalized_score = int(final_score * 100.0) try: try: search_score_row = RepositorySearchScore.get(repository=repo) search_score_row.last_updated = datetime.now() search_score_row.score = normalized_score search_score_row.save() return True except RepositorySearchScore.DoesNotExist: RepositorySearchScore.create( repository=repo, score=normalized_score, last_updated=today ) return True except IntegrityError: logger.debug("RepositorySearchScore row already existed; skipping") return False
def delete_expired_entries(repo, limit=50): """ Deletes expired entries from the RepositoryActionCount table for a specific repository. Returns the number of entries removed. """ threshold_date = datetime.today() - RAC_RETENTION_PERIOD found = list( RepositoryActionCount.select() .where( RepositoryActionCount.repository == repo, RepositoryActionCount.date < threshold_date ) .limit(limit) ) if not found: return 0 assert len(found) <= limit count_removed = 0 for entry in found: try: entry.delete_instance(recursive=False) count_removed += 1 except IntegrityError: continue return count_removed
def missing_counts_query(date): """ Returns a query to find all Repository's with missing RAC entries for the given date. """ subquery = (RepositoryActionCount.select( RepositoryActionCount.id, RepositoryActionCount.repository).where( RepositoryActionCount.date == date).alias("rac")) return (Repository.select().join( subquery, JOIN.LEFT_OUTER, on=(Repository.id == subquery.c.repository_id )).where(subquery.c.id >> None))
def find_uncounted_repository(): """ Returns a repository that has not yet had an entry added into the RepositoryActionCount table for yesterday. """ try: # Get a random repository to count. today = date.today() yesterday = today - timedelta(days=1) has_yesterday_actions = RepositoryActionCount.select( RepositoryActionCount.repository).where( RepositoryActionCount.date == yesterday) to_count = (Repository.select().where( ~(Repository.id << (has_yesterday_actions))).order_by( db_random_func()).get()) return to_count except Repository.DoesNotExist: return None
def get_repositories_action_sums(repository_ids): """ Returns a map from repository ID to total actions within that repository in the last week. """ if not repository_ids: return {} # Filter the join to recent entries only. last_week = datetime.now() - timedelta(weeks=1) tuples = (RepositoryActionCount.select( RepositoryActionCount.repository, fn.Sum(RepositoryActionCount.count)).where( RepositoryActionCount.repository << repository_ids).where( RepositoryActionCount.date >= last_week).group_by( RepositoryActionCount.repository).tuples()) action_count_map = {} for record in tuples: action_count_map[record[0]] = record[1] return action_count_map
def found_entry_count(day): """ Returns the number of entries for the given day in the RAC table. """ return RepositoryActionCount.select().where( RepositoryActionCount.date == day).count()