def log_progress_query_end( self, queryset: QuerySet, existing: QuerySet, pending: QuerySet, log_level: int = logging.INFO, ) -> None: """ Logs the execution progress query's result. Parameters ---------- queryset : QuerySet Full execution queryset existing : QuerySet Instances with existing reults pending : QuerySet Instances pending execution log_level : int, optional Logging level to use, by default 20 (INFO) See Also -------- :func:`query_progress` """ if pending.exists(): self.log_pending(existing, pending, log_level=log_level) else: self.log_none_pending(queryset, log_level=log_level)
def queryset_iterator(queryset: QuerySet, chunksize: int=5000) -> Iterator[Any]: queryset = queryset.order_by('id') while queryset.exists(): for row in queryset[:chunksize]: msg_id = row.id yield row queryset = queryset.filter(id__gt=msg_id)
def queryset_iterator(queryset: QuerySet, chunksize: int = 5000) -> Iterator[Any]: queryset = queryset.order_by("id") while queryset.exists(): for row in queryset[:chunksize]: msg_id = row.id yield row queryset = queryset.filter(id__gt=msg_id)
def update_if_not_equal(obj: QuerySet, row: dict): # Can not delete the "id" key-value from `rows` as it will manipulate the data which is later used in # `delete_unused_objects` attributes = {k: v for k, v in row.items() if k != "id"} obj = obj.exclude(**attributes) if obj.exists(): obj.update(**attributes) if not settings.SUPPRESS_TEST_OUTPUT: print(f"UPDATED {obj.model.__name__}: {dict(row)}")
def add_users(self, users_qs: QuerySet) -> None: assert isinstance(users_qs.first(), User), f"{users_qs.first()} is given" if users_qs.exists(): not_added_users_id_qs = users_qs.exclude( id__in=self.users.values_list("id", flat=True)).values("id") UserTeam.objects.bulk_create([ UserTeam(user_id=user_id["id"], team_id=self.id) for user_id in list(not_added_users_id_qs) ])
def find_similarities(self, language: str, queryset: QuerySet) -> Generator: """Given a queryset, will yield tuples of (id, (similar_1, similar_2, ...)) based on text content. """ if not queryset.exists(): return df = pandas.DataFrame(queryset.values("id", "extracted_text")) df.drop_duplicates(inplace=True) vec = TfidfVectorizer( stop_words=get_stopwords(language), max_features=3000, ngram_range=(1, 2), ) try: count_matrix = vec.fit_transform(df["extracted_text"]) except ValueError: # empty set return cosine_sim = cosine_similarity(count_matrix) for index in df.index: current_id = df.loc[index, "id"] try: similar = list(enumerate(cosine_sim[index])) except IndexError: continue sorted_similar = sorted(similar, key=operator.itemgetter(1), reverse=True)[:self.num_matches] matches = [(df.loc[row, "id"], similarity) for row, similarity in sorted_similar if df.loc[row, "id"] != current_id] yield (current_id, matches)
def sort_records_by_value(records: QuerySet, chart_id: int) -> QuerySet: """ Best value first - lowest if ascending sort, highest if descending. """ # Determine the ordering direction. if chart_id is not None: chart = Chart.objects.get(id=chart_id) value_ascending = chart.chart_type.order_ascending elif records.exists(): # There's at least one record. # Ordering by value across charts assumes all of these charts # use the same order. So we'll just take the order of any # record's chart. a_record = records.first() value_ascending = a_record.chart.chart_type.order_ascending else: # There are no records. The order doesn't matter anyway, # so we arbitrarily pick ascending. value_ascending = True value_order_str = 'value' if value_ascending else '-value' # Tiebreak by earliest achieved, followed by earliest submitted. return records.order_by(value_order_str, 'date_achieved', 'id')
def votes_to_percentages(votes: QuerySet) -> List[Tuple[str, float]]: """Returns a descending list of categories with confidence percentages Arguments: votes {QuerySet} -- DisposableVotes to analyze Returns: List[Tuple[str, float]] -- list of categories with confidence percentages in ('category', percentage) format """ if not isinstance(votes, QuerySet) or votes.model is not DisposableVote: raise TypeError('votes must be a QuerySet of DisposableVotes') if not votes.exists(): raise ValueError('votes cannot be empty') total = 0 for vote in votes: total += vote.count # If there are less than MIN_NORMALIZE_COUNT votes, treat it as less certain if total < settings.MIN_NORMALIZE_COUNT: total = settings.MIN_NORMALIZE_COUNT normalized_dict = {v.category.name: 100 * v.count / total for v in votes} return sorted(normalized_dict.items(), key=lambda x: x[1], reverse=True)