def chunks(r, n, id_chunking=False, min_id=None, max_id=None, auto_chunking=False, id_field='id'): """ Splits a list, dict or QuerySet into chunks of n. Returns a list of iterables of n length. r a list, dict or QuerySet n an integer id_chunking we'll find a low and high primary key id and chunk between them (if records have been deleted, you may get records that are smaller than n). min_id and max_id are manual ways to be a little faster since it doesn't need to do a complex filter to find the chunk endpoints auto_chunking is an automatic way to find the chunk endpoints that clears the filters and finds absolute chunks endpoints For simpler queries that just return a lot of data (IE: OFFSET is cheap): for users in chunks(User.objects.complex(), 1000): # each users is guaranteed to be 1000 for user in users: print user For fairly complex queries where OFFSETs are quite expensive, try auto_chunking: for users in chunks(User.objects.complex(), 1000, auto_chunking=True): # each users is guaranteed to be equal to or less than 1000 for user in users: print user """ if isinstance(r, RawQuerySet): raise NotImplementedError( 'RawQuerySet (or .raw() usage) is not supported.') if isinstance(r, set): r = list(r) if isinstance(r, dict): if not r: return [] keys, vals = zip(*r.items()) return ({keys[ii]: vals[ii] for ii in range(i, i + n) if ii < len(r)} for i in range(0, len(r), n)) elif isinstance(r, list): if not r: return [] return (r[i:i + n] for i in range(0, len(r), n)) elif isinstance(r, str): return (u''.join(c) for c in chunks(list(r), n)) elif isinstance(r, QuerySet): if id_chunking or auto_chunking: w = r if auto_chunking and not (min_id and max_id): w = QuerySet(r.model, using=r._db).all() agg = w.aggregate(Min(id_field), Max(id_field)) mn = min_id or agg[id_field + '__min'] mx = max_id or agg[id_field + '__max'] if not ((mn is None) or (mx is None)): return (r.filter(**{ id_field + '__gte': i, id_field + '__lt': i + n }) for i in range(mn, max(mx, mn + n), n)) # don't allow result cache to skrew up slicing r._result_cache = None count = r.count() if not count: return [] return (r[i:i + n] for i in range(0, count, n)) elif isinstance(r, collections.Iterable): return _gen_chunks(r, n) else: raise Exception('r must be a dict or list') return []
def calculateStats(searched_term: str, unprocessed_stats: QuerySet, source: str, sentimentable_model: Model): try: term = search_term.objects.get(term=searched_term) if unprocessed_stats.count() == 0: return True processed_weight = stat_processor.processDataWeight( searched_term_id=term.id, data_source=source, new_record_count=unprocessed_stats.count(), new_data=False) unprocessed_weight = stat_processor.processDataWeight( searched_term_id=term.id, data_source=source, new_record_count=unprocessed_stats.count()) new_neutral_avg = round( unprocessed_stats.aggregate(Avg('nlp_neutral_sentiment')) ['nlp_neutral_sentiment__avg'], 5) new_mixed_avg = round( unprocessed_stats.aggregate( Avg('nlp_mixed_sentiment'))['nlp_mixed_sentiment__avg'], 5) new_positive_avg = round( unprocessed_stats.aggregate(Avg('nlp_positive_sentiment')) ['nlp_positive_sentiment__avg'], 5) new_negative_avg = round( unprocessed_stats.aggregate(Avg('nlp_negative_sentiment')) ['nlp_negative_sentiment__avg'], 5) if processed_weight == 0: with transaction.atomic(): sentiment_stat = pie_chart_sentiment_stat( neutral_sentiment_aggregate=new_neutral_avg, mixed_sentiment_aggregate=new_mixed_avg, positive_sentiment_aggregate=new_positive_avg, negative_sentiment_aggregate=new_negative_avg, processed_records_count=unprocessed_stats.count(), term_id=term.id, data_source=source) sentiment_stat.save() sentimentable_model.objects.filter(id__in=list( unprocessed_stats.values_list( 'id', flat=True))).update(pie_stat_processed=True) return True else: try: with transaction.atomic(): old_stats = pie_chart_sentiment_stat.objects.get( term_id=term.id, data_source=source) old_stats.neutral_sentiment_aggregate = ( processed_weight * old_stats.neutral_sentiment_aggregate) + ( unprocessed_weight * new_neutral_avg) old_stats.mixed_sentiment_aggregate = ( processed_weight * old_stats.mixed_sentiment_aggregate) + ( unprocessed_weight * new_mixed_avg) old_stats.positive_sentiment_aggregate = ( processed_weight * old_stats.positive_sentiment_aggregate) + ( unprocessed_weight * new_positive_avg) old_stats.negative_sentiment_aggregate = ( processed_weight * old_stats.negative_sentiment_aggregate) + ( unprocessed_weight * new_negative_avg) old_stats.processed_records_count = old_stats.processed_records_count + unprocessed_stats.count( ) old_stats.save() sentimentable_model.objects.filter(id__in=list( unprocessed_stats.values_list('id', flat=True) )).update(pie_stat_processed=True) return True except ObjectDoesNotExist as ex_stat: print( 'Error! The term ' + searched_term + ' does not exist when looking for existing aggregate stats! Exception: ' + str(ex_stat)) logging.error( 'Error! The term ' + searched_term + ' does not exist when looking for existing aggregate stats! Exception: ' + str(ex_stat)) return False except ObjectDoesNotExist as ex_term: print( 'Error! The term ' + searched_term + ' does not exist when searching for terms in the term table! Exception: ' + str(ex_term)) logging.error( 'Error! The term ' + searched_term + ' does not exist when searching for terms in the term table! Exception: ' + str(ex_term)) return False