def _calculate_people(events: QuerySet): shown_as = request.GET.get("shown_as") if shown_as is not None and shown_as == "Stickiness": stickiness_days = int(request.GET["stickiness_days"]) events = (events.values("person_id").annotate(day_count=Count( functions.TruncDay("timestamp"), distinct=True)).filter( day_count=stickiness_days)) else: events = events.values("person_id").distinct() if (request.GET.get("breakdown_type") == "cohort" and request.GET.get("breakdown_value") != "all"): events = events.filter( Exists( CohortPeople.objects.filter( cohort_id=int(request.GET["breakdown_value"]), person_id=OuterRef("person_id"), ).only("id"))) people = Person.objects.filter( team=team, id__in=[p["person_id"] for p in events[0:100]]) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) return serialize_people(people=people, request=request)
def split_images(self, images: QuerySet = None, test_fraction: float = 0.2): if images is None: images = GroundTruthImage.objects.all() images.values('specie__name').annotate( Count('specie') ) # TODO A tester pas sûr du tout que ça marche mais permet de gérer un queryset en entrée à priori
def import_own_files( files: Mapping[str, TemporaryUploadedFile], *, qs: QuerySet, voice: str ) -> Generator[Mapping[Optional[str], Optional[str]], None, None]: """ Import own file """ audio_list = [qs_object['name'] for qs_object in qs.values('name')] converter = SoxTransformerMixin() safe_extensions = ['.mp3', '.wav', '.raw'] for file_name, file_ in files.items(): file_name, extension = os.path.splitext(file_name) if extension not in safe_extensions: yield {'error': f'{file_name}{extension}', 'success': None} if file_name not in audio_list: yield {'error': file_name, 'success': None} else: if extension == '.mp3': # SoX do not nothing about mp3 o_O file_ = convert_from_mp3(file_) audio_record: AudioRecord = qs.get(name__exact=file_name) wav_file = converter.convert_audio_type_format( user_file=file_.file, extension_to='wav', normalise=True) audio_record.source = Source.objects.get(name='Voice actor') audio_record.voice = voice audio_record.emote = 'Живые эмоции' audio_record.playing_speed = Decimal(1.0) with wav_file as converted_: content = converted_.read() audio_record.audio.save(f'{file_name}_{datetime.now()}.wav', ContentFile(content)) audio_record.default_audio.save( f'{file_name}_{datetime.now()}-default.wav', ContentFile(content)) yield {'error': None, 'success': file_name}
def plot_bokeh_sex_pie(queryset: QuerySet) -> Figure: values = list(queryset.values("sex")) data = (pd.DataFrame(values).replace({ "sex": SEX_VALUES }).value_counts().reset_index(name="value").rename(columns={ "index": "sex" }).merge(SEX_COLORS, left_on="sex", right_index=True)) data["angle"] = data["value"] / data["value"].sum() * 2 * pi p = figure( plot_height=250, plot_width=350, title="Sex", toolbar_location=None, tools="hover", tooltips="@sex: @value", x_range=(-0.5, 1.0), ) p.wedge( x=0, y=1, radius=0.3, start_angle=cumsum("angle", include_zero=True), end_angle=cumsum("angle"), line_color="white", fill_color="color", legend_field="sex", source=data, ) p.axis.axis_label = None p.axis.visible = False p.grid.grid_line_color = None return p
def _calculate_people(events: QuerySet, offset: int): events = events.values("person_id").distinct() if request.GET.get( "breakdown_type" ) == "cohort" and request.GET.get("breakdown_value") != "all": events = events.filter( Exists( CohortPeople.objects.filter( cohort_id=int(request.GET["breakdown_value"]), person_id=OuterRef("person_id"), ).only("id"))) if request.GET.get("breakdown_type") == "person": events = events.filter( Exists( Person.objects.filter( **{ "id": OuterRef("person_id"), "properties__{}".format(request.GET["breakdown"]): request.GET["breakdown_value"], }).only("id"))) people = Person.objects.filter( team=team, id__in=[p["person_id"] for p in events[offset:offset + 100]]) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) return PersonSerializer(people, context={ "request": request }, many=True).data
def _calculate_people(events: QuerySet): shown_as = request.GET.get('shown_as') if shown_as is not None and shown_as == 'Stickiness': stickiness_days = int(request.GET['stickiness_days']) events = events\ .values('person_id')\ .annotate(day_count=Count(functions.TruncDay('timestamp'), distinct=True))\ .filter(day_count=stickiness_days) else: events = events.values('person_id').distinct() if request.GET.get('breakdown_type') == 'cohort' and request.GET.get('breakdown_value') != 'all': events = events.filter(Exists( CohortPeople.objects.filter( cohort_id=int(request.GET['breakdown_value']), person_id=OuterRef('person_id') ).only('id') )) people = Person.objects\ .filter(team=team, id__in=[p['person_id'] for p in events[0:100]]) people = people.prefetch_related(Prefetch('persondistinctid_set', to_attr='distinct_ids_cache')) return self._serialize_people( people=people, request=request )
def get_count(self, queryset: QuerySet) -> int: id_field = 'id' try: if issubclass(queryset.model, AccountsModel): id_field = 'user_id' return queryset.values(id_field).count() except (AttributeError, TypeError): return len(queryset)
def get_std_job_data(queryset: QuerySet, prefix=''): return queryset.values( prefix + 'position_id', prefix + 'position_name', prefix + 'position_tags', prefix + 'position_region', prefix + 'position_min_wages', prefix + 'position_sender_position', prefix + 'position_max_wages', prefix + 'company__company_name', prefix + 'company__company_employees_num', prefix + 'company__company_financing_progress', prefix + 'company__company_owner__user_name', prefix + 'company__company_owner__user_avatar_url')
def org_units_to_gpkg(queryset: QuerySet) -> bytes: """Export the provided org unit queryset in Geopackage (gpkg) format.""" # create df with queryset, excluding entries without geo info queryset = queryset.exclude( Q(location=None) & Q(geom=None) & Q(simplified_geom=None)) ou_df = pd.DataFrame(queryset.values(*ORG_UNIT_COLUMNS)) # cleanup / transforms ou_df["parent"] = ou_df["parent__name"] + " (" + ou_df[ "parent__org_unit_type__name"] + ")" ou_df["geography"] = ou_df["geom"].fillna(ou_df["simplified_geom"].fillna( ou_df["location"])) ou_df = ou_df.drop(columns=[ "geom", "simplified_geom", "location", "parent__name", "parent__org_unit_type__name" ]) ou_df = ou_df.rename(columns={ "org_unit_type__name": "type", "org_unit_type__depth": "depth" }) ou_df["depth"] = ou_df["depth"].fillna(999) ou_df = ou_df.set_index("uuid") # Convert django geometry values (GEOS) to shapely models ou_df["geography"] = ou_df["geography"].map(geos_to_shapely) # filter empty geometries ou_df["geography"] = ou_df["geography"].loc[ou_df["geography"].notnull()] # Convert to geo dataframe, and group by org unit type to handle multiple layers ou_gdf = gpd.GeoDataFrame(ou_df, geometry="geography") ou_gdf["group_key"] = ou_gdf["depth"].astype(str) + "-" + ou_gdf["type"] ou_gdf_by_type = ou_gdf.groupby("group_key") # Write to content file # Tried to use a mkstemp but it prevents the group.to_file from writing to it and is hard to remove later on # NamedTemporaryFile works but the handle cannot be used to read again. So left the plain uuid thing. path = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) i = 1 for group_key, group in ou_gdf_by_type: group = group.drop(columns=["depth", "group_key"]) layer = group_key.split("-", 1)[1] group.to_file(path, driver="GPKG", layer=layer) i = i + 1 f = open(path, "rb") content = f.read() f.close() os.remove(path) return content
def export_as_csv(self, request: HttpRequest, queryset: models.QuerySet): response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = 'attachment; filename="data.csv"' fieldnames = [ 'name', 'phone', 'created', 'email', 'city', 'validation', 'comment', 'internal_comment' ] writer = csv.DictWriter(response, fieldnames=fieldnames) writer.writeheader() for row in queryset.values(*fieldnames): writer.writerow(row) return response
def _get_sorting_annotation(queryset: models.QuerySet, field_name: str, annotations: dict) -> models.Case: queryset = queryset.values(field_name) queryset = queryset.annotate(**annotations) queryset = queryset.order_by('value') return models.Case(*[ models.When(**{ "{}".format(field_name): group[field_name], "then": index }) for index, group in enumerate(queryset) ], output_field=models.IntegerField())
def export_raw_accesslogs_to_stream_lowlevel(self, stream: IO, queryset: QuerySet): text_id_to_text = { dt['id']: dt['text'] for dt in DimensionText.objects.all().values('id', 'text') } rt_to_dimensions = { rt.pk: rt.dimensions_sorted for rt in ReportType.objects.filter(pk__in=queryset.distinct( 'report_type_id').values('report_type_id')) } # get all field names for the CSV field_name_map = {(f'{dim}__{attr}' if attr else dim): dim for dim, attr in self.implicit_dims.items()} field_name_map.update( {f'target__{attr}': attr for attr in self.title_attrs}) field_names = list(field_name_map.values()) for tr, dims in rt_to_dimensions.items(): field_names += [ dim.short_name for dim in dims if dim.short_name not in field_names ] field_names.append('value') # values that will be retrieved from the accesslogs values = ['value', 'report_type_id'] values += list(field_name_map.keys()) values += [f'dim{i+1}' for i in range(7)] # crate the writer writer = csv.DictWriter(stream, field_names) writer.writeheader() # write the records for rec_num, log in enumerate( queryset.values(*values).iterator()): # type: int, dict record = { attr_out: log.get(attr_in) for attr_in, attr_out in field_name_map.items() } record['value'] = log['value'] record['date'] = log['date'] for i, dim in enumerate(rt_to_dimensions[log['report_type_id']]): value = log.get(f'dim{i+1}') if dim.type == dim.TYPE_TEXT: record[dim.short_name] = text_id_to_text.get(value, value) else: record[dim.short_name] = value writer.writerow(record) if rec_num % 999 == 0: self.store_progress(rec_num + 1) self.store_progress(rec_num + 1)
def _calculate_people(events: QuerySet): shown_as = request.GET.get('shown_as') if shown_as is not None and shown_as == 'Stickiness': stickiness_days = int(request.GET['stickiness_days']) events = events\ .values('person_id')\ .annotate(day_count=Count(functions.TruncDay('timestamp'), distinct=True))\ .filter(day_count=stickiness_days) else: events = events.values('person_id').distinct() people = Person.objects\ .filter(team=team, id__in=[p['person_id'] for p in events[0:100]]) return self._serialize_people(people=people, request=request)
def get_total_balance_of(queryset: QuerySet, currency: str) -> Money: """ This function sums all balances in `queryset` according to their currencies. And converts them to one `currency`. And it returns a Money object with `currency` and a convertible balance. """ balance_in_currency = Money(0, currency) balances_in_currencies = queryset.values('balance_currency').annotate( balance=Sum('balance')) for balance in balances_in_currencies: balance = Money(balance['balance'], balance['balance_currency']) balance_in_currency += convert_money(balance, currency) return round(balance_in_currency, 2)
def _calculate_people(entity: Entity, events: QuerySet): if request.GET.get('shown_as', 'Volume') == 'Volume': events = events.values('person_id').distinct() elif request.GET['shown_as'] == 'Stickiness': stickiness_days = int(request.GET['stickiness_days']) events = events\ .values('person_id')\ .annotate(day_count=Count(functions.TruncDay('timestamp'), distinct=True))\ .filter(day_count=stickiness_days) people = Person.objects\ .filter(team=team, id__in=[p['person_id'] for p in events[0:100]]) return self._serialize_people(entity=entity, people=people, request=request)
def split_images(self, images: QuerySet = None, test_fraction: float = 0.2): if images is None: images = GroundTruthImage.objects.all() if self.specialized_organ: images = images.filter(plant_organ=self.specialized_organ) if self.specialized_background: images = images.filter(background_type=self.specialized_background) species = images.values('specie__name').annotate( nb_image=Count('specie')).filter(nb_image__gte=50) for specie in species.iterator(): print(specie['specie__name'], specie['nb_image']) specie_to_pos = {} self.save() # allow to create ref to CNN in classes for i in range(species.count()): specie = Specie.objects.get(latin_name=species[i]['specie__name']) try: class_m = Class.objects.get(cnn=self, specie=specie) except Class.DoesNotExist: class_m = Class(cnn=self, specie=specie) class_m.pos = i class_m.save() specie_to_pos[specie] = i data_images, data_labels = [], [] for image in images.iterator(): if image.specie in specie_to_pos: data_images.append(image.preprocess()) data_labels.append(specie_to_pos[image.specie]) data_images_np = np.array(data_images) data_labels_np = np.array(data_labels) shufflesplit = StratifiedShuffleSplit(n_splits=2, test_size=0.2) train_index, test_index = list( shufflesplit.split(data_images_np, data_labels_np))[0] self.train_images, self.test_images = data_images_np[ train_index], data_images_np[test_index] self.train_labels, self.test_labels = to_categorical( data_labels_np[train_index]), to_categorical( data_labels_np[test_index]) print(self.train_images.shape)
def get_next_id(queryset: QuerySet, id_field: Field, max_len: int): """ Fetch the next sequential ID value by incrementing the maximum ID value in a queryset. :param queryset QuerySet: The queryset to get the next sequential ID from :param id_field Field: The ID field to consider :param max_len int: The maximum length of an ID value """ if not queryset: return "1".zfill(max_len) return (queryset.annotate(next_id=Func( Cast(F(id_field.name), IntegerField()) + 1, Value(f"FM{'0' * max_len}"), function="TO_CHAR", output_field=id_field, ), ).exclude(next_id__in=queryset.values(id_field.name), ).order_by( id_field.name).first().next_id)
def calculate_people(team: Team, events: QuerySet, filter: Filter, use_offset: bool = True) -> QuerySet: events = events.values("person_id").distinct() events = _filter_cohort_breakdown(events, filter) events = _filter_person_prop_breakdown(events, filter) people = Person.objects.filter( team=team, id__in=[ p["person_id"] for p in (events[filter.offset:filter.offset + 100] if use_offset else events) ], ) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) return people
async def exec(self, django_queryset: QuerySet, values: tuple) -> List[dict]: django_queryset = django_queryset.values(*values) sql, params = self._get_sql(django_queryset) if self.use_django_conn: logger.warning( 'AsyncPgDbClient uses sync django connection. Do not use it in a production' ) return list(django_queryset) async with self.connection_pool.acquire() as connection: records = await connection.fetch(sql, *params) result = [] for record in records: result.append({val: record[i] for i, val in enumerate(values)}) return result
def extract_view_queryset(query_set: QuerySet, viewed_fields: Union[str, Iterable] = None, select_all_field: bool = True, **kwagrs) -> QuerySet: """ Extract only specified field from dataset Указывает оставить в результррующем наборе данных только указанные поля :param query_set: QuerySet for data extraction :type viewed_fields: list of field for data extraction :rtype: QuerySet that contains only specified fields """ if not select_all_field and viewed_fields is not None and \ viewed_fields and query_set and (isinstance(query_set, QuerySet) or hasattr(query_set, 'values')): if isinstance(viewed_fields, str): viewed_fields = viewed_fields.split(',') view_field_desc = [ get_from_container(field_name, [('field_name', None)], True)[0] for field_name in viewed_fields ] query_set = query_set.values(*view_field_desc) return query_set
def find_similarities(self, language: str, queryset: QuerySet) -> Generator: """Given a queryset, will yield tuples of (id, (similar_1, similar_2, ...)) based on text content. """ if not queryset.exists(): return df = pandas.DataFrame(queryset.values("id", "extracted_text")) df.drop_duplicates(inplace=True) vec = TfidfVectorizer( stop_words=get_stopwords(language), max_features=3000, ngram_range=(1, 2), ) try: count_matrix = vec.fit_transform(df["extracted_text"]) except ValueError: # empty set return cosine_sim = cosine_similarity(count_matrix) for index in df.index: current_id = df.loc[index, "id"] try: similar = list(enumerate(cosine_sim[index])) except IndexError: continue sorted_similar = sorted(similar, key=operator.itemgetter(1), reverse=True)[:self.num_matches] matches = [(df.loc[row, "id"], similarity) for row, similarity in sorted_similar if df.loc[row, "id"] != current_id] yield (current_id, matches)
def paginate_queryset(self, queryset: QuerySet, page_size: int) -> dict: """Paginate the queryset, if needed.""" paginator = self.get_paginator( queryset.values(), page_size, orphans=self.get_paginate_orphans(), allow_empty_first_page=self.get_allow_empty() ) pkw = self.page_kwarg page = self.kwargs.get(pkw) or self.request.GET.get(pkw) or 1 try: page_number = int(page) except ValueError: if page == 'last': page_number = paginator.num_pages else: raise Http404(_('Page is not “last”, nor can it be converted to an int.')) try: page = paginator.page(page_number) except InvalidPage as e: raise Http404(_(f'Invalid page {page_number}: {e}')) result = { 'count': paginator.count, 'total_pages': paginator.num_pages, 'prev': None, 'next': None, 'results': list(page.object_list), } if page.has_previous(): result['prev'] = page.previous_page_number() if page.has_next(): result['next'] = page.next_page_number() return result
def update_po_state( self, item_queryset: QuerySet = None, item_list: List[ItemThroughModel] = None) -> Union[Tuple, None]: if item_queryset and item_list: raise ValidationError('Either queryset or list can be used.') if item_list: # self.po_amount = Decimal.from_float( # round(sum(a.po_total_amount for a in item_list # if a.po_item_status != ItemThroughModel.STATUS_CANCELED), 2)) self.po_amount = sum( a.po_total_amount for a in item_list if a.po_item_status != ItemThroughModel.STATUS_CANCELED) # self.po_amount_received = Decimal.from_float( # round(sum(a.po_total_amount for a in item_list # if a.po_item_status == ItemThroughModel.STATUS_RECEIVED), 2)) self.po_amount_received = sum( a.po_total_amount for a in item_list if a.po_item_status == ItemThroughModel.STATUS_RECEIVED) else: # todo: explore if queryset can be passed from PO Update View... item_queryset, item_data = self.get_po_item_data( queryset=item_queryset) qs_values = item_queryset.values('po_total_amount', 'po_item_status') total_po_amount = sum( i['po_total_amount'] for i in qs_values if i['po_item_status'] != ItemThroughModel.STATUS_CANCELED) total_received = sum( i['po_total_amount'] for i in qs_values if i['po_item_status'] == ItemThroughModel.STATUS_RECEIVED) self.po_amount = total_po_amount self.po_amount_received = total_received return item_queryset, item_data
def get_queryset_values( qs: QuerySet, fields: Iterable[str], rename_fields: Iterable[Tuple[str, str]] = () ) -> List[Dict]: """ 返回 query_set 的一些字段的值 :param qs: query_set :param fields: 需要的字段 :param rename_fields: 字段名重命名 :return: List[Dict] example: >>> from myuser.models import UserProfile >>> qs = UserProfile.objects.all() >>> fields = ('email', 'uid') >>> rename_fields = (('uid', 'userId'),) >>> get_queryset_values(qs, fields, rename_fields) """ result = qs.values(*fields) for x in result: for name, rename in rename_fields: x[rename] = x.pop(name) return list(result)
def get_order_serialized(qs: QuerySet, fields=None): """Serializer function for performance""" if fields is None: fields = ORDER_FIELDS return list(qs.values(*fields))
def split_images(self, images: QuerySet = None, test_fraction: float = 0.2): if images is None: images = GroundTruthImage.objects.all() if self.specialized_organ: images = images.filter(plant_organ=self.specialized_organ) if self.specialized_background: images = images.filter(background_type=self.specialized_background) species = images.values('specie__name').annotate(nb_image=Count('specie')).filter( nb_image__gte=self.nb_image_by_class) self.classes.all().delete() for specie in species.iterator(): print(specie['specie__name'], specie['nb_image']) specie_to_pos = {} specie_to_nb = {} specie_to_counter = {} self.save() # allow to create ref to CNN in classes nb_class = species.count() for i in range(nb_class): specie = Specie.objects.get(latin_name=species[i]['specie__name']) try: class_m = Class.objects.get(cnn=self, specie=specie) except Class.DoesNotExist: class_m = Class(cnn=self, specie=specie) class_m.pos = i class_m.save() specie_to_pos[specie] = i specie_to_nb[specie] = species[i]['nb_image'] specie_to_counter[specie] = 0 train_images = [] test_images = [] for image in images.iterator(): specie = image.specie if specie in specie_to_pos: if specie_to_counter[specie] / specie_to_nb[specie] < 1 - test_fraction: train_images.append(image) else: test_images.append(image) specie_to_counter[specie] += 1 batch_size = 32 def train_generator(): i = 0 xs, ys = np.zeros((batch_size, 224, 224, 3), dtype=np.float_), np.zeros((batch_size, nb_class), dtype=np.float_) for image in train_images: i += 1 if i == batch_size - 1: yield xs, ys xs, ys = np.zeros((batch_size, 224, 224, 3), dtype=np.float_), np.zeros((batch_size, nb_class), dtype=np.float_) i = 0 xs[i] = image.preprocess() ys[i, specie_to_pos[image.specie]] = 1 def test_generator(): i = 0 xs, ys = np.zeros((batch_size, 224, 224, 3), dtype=np.float_), np.zeros((batch_size, nb_class), dtype=np.float_) for image in test_images: i += 1 if i == batch_size - 1: yield xs, ys xs, ys = np.zeros((batch_size, 224, 224, 3), dtype=np.float_), np.zeros((batch_size, nb_class), dtype=np.float_) i = 0 xs[i] = image.preprocess() ys[i] = tf.keras.utils.to_categorical(specie_to_pos[image.specie], nb_class) self.train_ds = tf.data.Dataset.from_generator(train_generator, (tf.float64, tf.float64), ((batch_size, 224, 224, 3), (batch_size, nb_class))) self.test_ds = tf.data.Dataset.from_generator(test_generator, (tf.float64, tf.float64), ((batch_size, 224, 224, 3), (batch_size, nb_class)))
def union_ether_and_token_txs(self, tokens_queryset: QuerySet, ether_queryset: QuerySet) -> TransferDict: values = ('block_number', 'transaction_hash', 'to', '_from', 'value', 'execution_date', 'token_id', 'token_address') return ether_queryset.values(*values).union( tokens_queryset.values(*values)).order_by('-block_number')
def count_species(self, images: QuerySet, min_images=5): species = images.values('specie__name').annotate( nb_image=Count('specie')).filter(nb_image__gte=min_images) for specie in species: print(specie['specie__name'], specie['nb_image']) return species
def convert_qs(qs: QuerySet) -> list: assert not isinstance(qs, Category) return list(qs.values('id', 'name',))
def calculate_balance(cls, transactions: QuerySet) -> QuerySet: return transactions.values('account')\ .annotate(total_inflow=Coalesce(Sum('amount', filter=Q(type=Transaction.INFLOW)), 0), total_outflow=Coalesce(Sum('amount', filter=Q(type=Transaction.OUTFLOW)), 0) )\ .annotate(balance=F('total_inflow') + F('total_outflow'))