Exemple #1
0
    def log_progress_query_end(
        self,
        queryset: QuerySet,
        existing: QuerySet,
        pending: QuerySet,
        log_level: int = logging.INFO,
    ) -> None:
        """
        Logs the execution progress query's result.

        Parameters
        ----------
        queryset : QuerySet
            Full execution queryset
        existing : QuerySet
            Instances with existing reults
        pending : QuerySet
            Instances pending execution
        log_level : int, optional
            Logging level to use, by default 20 (INFO)

        See Also
        --------
        :func:`query_progress`
        """
        if pending.exists():
            self.log_pending(existing, pending, log_level=log_level)
        else:
            self.log_none_pending(queryset, log_level=log_level)
Exemple #2
0
def queryset_iterator(queryset: QuerySet, chunksize: int=5000) -> Iterator[Any]:
    queryset = queryset.order_by('id')
    while queryset.exists():
        for row in queryset[:chunksize]:
            msg_id = row.id
            yield row
        queryset = queryset.filter(id__gt=msg_id)
Exemple #3
0
def queryset_iterator(queryset: QuerySet,
                      chunksize: int = 5000) -> Iterator[Any]:
    queryset = queryset.order_by("id")
    while queryset.exists():
        for row in queryset[:chunksize]:
            msg_id = row.id
            yield row
        queryset = queryset.filter(id__gt=msg_id)
Exemple #4
0
 def update_if_not_equal(obj: QuerySet, row: dict):
     # Can not delete the "id" key-value from `rows` as it will manipulate the data which is later used in
     # `delete_unused_objects`
     attributes = {k: v for k, v in row.items() if k != "id"}
     obj = obj.exclude(**attributes)
     if obj.exists():
         obj.update(**attributes)
         if not settings.SUPPRESS_TEST_OUTPUT:
             print(f"UPDATED {obj.model.__name__}: {dict(row)}")
Exemple #5
0
 def add_users(self, users_qs: QuerySet) -> None:
     assert isinstance(users_qs.first(),
                       User), f"{users_qs.first()} is given"
     if users_qs.exists():
         not_added_users_id_qs = users_qs.exclude(
             id__in=self.users.values_list("id", flat=True)).values("id")
         UserTeam.objects.bulk_create([
             UserTeam(user_id=user_id["id"], team_id=self.id)
             for user_id in list(not_added_users_id_qs)
         ])
Exemple #6
0
    def find_similarities(self, language: str,
                          queryset: QuerySet) -> Generator:
        """Given a queryset, will yield tuples of
        (id, (similar_1, similar_2, ...)) based on text content.
        """
        if not queryset.exists():
            return

        df = pandas.DataFrame(queryset.values("id", "extracted_text"))

        df.drop_duplicates(inplace=True)

        vec = TfidfVectorizer(
            stop_words=get_stopwords(language),
            max_features=3000,
            ngram_range=(1, 2),
        )

        try:
            count_matrix = vec.fit_transform(df["extracted_text"])
        except ValueError:
            # empty set
            return

        cosine_sim = cosine_similarity(count_matrix)

        for index in df.index:
            current_id = df.loc[index, "id"]
            try:
                similar = list(enumerate(cosine_sim[index]))
            except IndexError:
                continue
            sorted_similar = sorted(similar,
                                    key=operator.itemgetter(1),
                                    reverse=True)[:self.num_matches]
            matches = [(df.loc[row, "id"], similarity)
                       for row, similarity in sorted_similar
                       if df.loc[row, "id"] != current_id]
            yield (current_id, matches)
def sort_records_by_value(records: QuerySet, chart_id: int) -> QuerySet:
    """
    Best value first - lowest if ascending sort, highest if descending.
    """
    # Determine the ordering direction.
    if chart_id is not None:
        chart = Chart.objects.get(id=chart_id)
        value_ascending = chart.chart_type.order_ascending
    elif records.exists():
        # There's at least one record.
        # Ordering by value across charts assumes all of these charts
        # use the same order. So we'll just take the order of any
        # record's chart.
        a_record = records.first()
        value_ascending = a_record.chart.chart_type.order_ascending
    else:
        # There are no records. The order doesn't matter anyway,
        # so we arbitrarily pick ascending.
        value_ascending = True
    value_order_str = 'value' if value_ascending else '-value'

    # Tiebreak by earliest achieved, followed by earliest submitted.
    return records.order_by(value_order_str, 'date_achieved', 'id')
Exemple #8
0
def votes_to_percentages(votes: QuerySet) -> List[Tuple[str, float]]:
    """Returns a descending list of categories with confidence percentages

    Arguments:
        votes {QuerySet} -- DisposableVotes to analyze

    Returns:
        List[Tuple[str, float]] -- list of categories with confidence percentages
            in ('category', percentage) format
    """
    if not isinstance(votes, QuerySet) or votes.model is not DisposableVote:
        raise TypeError('votes must be a QuerySet of DisposableVotes')

    if not votes.exists():
        raise ValueError('votes cannot be empty')

    total = 0
    for vote in votes:
        total += vote.count
    # If there are less than MIN_NORMALIZE_COUNT votes, treat it as less certain
    if total < settings.MIN_NORMALIZE_COUNT:
        total = settings.MIN_NORMALIZE_COUNT
    normalized_dict = {v.category.name: 100 * v.count / total for v in votes}
    return sorted(normalized_dict.items(), key=lambda x: x[1], reverse=True)