def test_order_by(self): Author.objects.create(name='Terry Pratchett') Author.objects.create(name='J. R. R. Tolkien') Author.objects.create(name='George. R. R. Martin') self.assertQuerysetEqual( Author.objects.order_by(StrIndex('name', 'R.').asc()), [ 'Terry Pratchett', 'J. R. R. Tolkien', 'George. R. R. Martin', ], lambda a: a.name) self.assertQuerysetEqual( Author.objects.order_by(StrIndex('name', 'R.').desc()), [ 'George. R. R. Martin', 'J. R. R. Tolkien', 'Terry Pratchett', ], lambda a: a.name)
def autocomplete(request): # This should be replaced by a real API query = request.GET.get("q") page = request.GET.get("page") try: page = int(page) except (ValueError, TypeError): page = 1 orgs = Organization.objects.filter(individual=False).get_viewable( request.user) if query: # Prioritize showing things that start with query orgs = (orgs.filter(name__icontains=query).annotate( pos=StrIndex(Lower("name"), Lower(V(query)))).order_by( "pos", "slug")) data = { "data": [{ "name": o.name, "slug": o.slug, "avatar": o.avatar_url } for o in orgs[((page - 1) * ORG_PAGINATION):(page * ORG_PAGINATION)]] } return JsonResponse(data)
def test_unicode_values(self): Author.objects.create(name='ツリー') Author.objects.create(name='皇帝') Author.objects.create(name='皇帝 ツリー') authors = Author.objects.annotate(sb=StrIndex('name', Value('リ'))) self.assertQuerysetEqual(authors.order_by('name'), [2, 0, 5], lambda a: a.sb)
def test_annotate_charfield(self): Author.objects.create(name='George. R. R. Martin') Author.objects.create(name='J. R. R. Tolkien') Author.objects.create(name='Terry Pratchett') authors = Author.objects.annotate( fullstop=StrIndex('name', Value('R.'))) self.assertQuerysetEqual(authors.order_by('name'), [9, 4, 0], lambda a: a.fullstop)
def test_filtering(self): Author.objects.create(name='George. R. R. Martin') Author.objects.create(name='Terry Pratchett') self.assertQuerysetEqual( Author.objects.annotate( middle_name=StrIndex('name', Value('R.'))).filter( middle_name__gt=0), ['George. R. R. Martin'], lambda a: a.name)
def test_annotate_charfield(self): Author.objects.create(name="George. R. R. Martin") Author.objects.create(name="J. R. R. Tolkien") Author.objects.create(name="Terry Pratchett") authors = Author.objects.annotate( fullstop=StrIndex("name", Value("R."))) self.assertQuerysetEqual(authors.order_by("name"), [9, 4, 0], lambda a: a.fullstop)
def test_filtering(self): Author.objects.create(name="George. R. R. Martin") Author.objects.create(name="Terry Pratchett") self.assertQuerysetEqual( Author.objects.annotate( middle_name=StrIndex("name", Value("R."))).filter( middle_name__gt=0), ["George. R. R. Martin"], lambda a: a.name, )
def test_expressions(self): Author.objects.create(name='John Smith', alias='smithj') Author.objects.create(name='Rhonda') substr = Substr(Upper('name'), StrIndex('name', V('h')), 5, output_field=CharField()) authors = Author.objects.annotate(name_part=substr) self.assertQuerysetEqual(authors.order_by('name'), ['HN SM', 'HONDA'], lambda a: a.name_part)
def search_by_username(self, queryset, name, expr): """First starting with expr, then containing expr.""" queryset = queryset.annotate( expr_position=StrIndex('user__username', Value(expr)), ).filter( expr_position__gt=0, ).order_by( 'expr_position', 'user__username', ) return queryset
def lookups(self, request, model_admin): slug_prefixes = Room.objects.all()\ .annotate(slug_prefix=Substr('slug', 1, StrIndex('slug',Value('-')) - 1, output_field=models.CharField()))\ .order_by('slug_prefix')\ .values_list('slug_prefix', flat=True).distinct() lookups = {} for prefix in slug_prefixes: lookups.update({prefix: _(prefix)}) return lookups.items()
def test_order_by(self): Author.objects.create(name="Terry Pratchett") Author.objects.create(name="J. R. R. Tolkien") Author.objects.create(name="George. R. R. Martin") self.assertQuerysetEqual( Author.objects.order_by(StrIndex("name", Value("R.")).asc()), [ "Terry Pratchett", "J. R. R. Tolkien", "George. R. R. Martin", ], lambda a: a.name, ) self.assertQuerysetEqual( Author.objects.order_by(StrIndex("name", Value("R.")).desc()), [ "George. R. R. Martin", "J. R. R. Tolkien", "Terry Pratchett", ], lambda a: a.name, )
def _get_summary_categories() -> str: """Finds and returns the summary categories and their corresponding account hints as JSON. Returns: The summary categories and their account hints, by their record types and category types. """ rows = Record.objects \ .filter(Q(summary__contains="—"), ~Q(account__code__startswith="114"), ~Q(account__code__startswith="214"), ~Q(account__code__startswith="128"), ~Q(account__code__startswith="228")) \ .annotate(rec_type=Case(When(is_credit=True, then=Value("credit")), default=Value("debit"), output_field=CharField()), cat_type=Case( When(summary__regex=".+—.+—.+→.+", then=Value("bus")), When(summary__regex=".+—.+[→↔].+", then=Value("travel")), default=Value("general"), output_field=CharField()), category=Left("summary", StrIndex("summary", Value("—")) - 1, output_field=CharField())) \ .values("rec_type", "cat_type", "category", "account__code") \ .annotate(count=Count("category")) \ .order_by("rec_type", "cat_type", "category", "-count", "account__code") # Sorts the rows by the record type and the category type categories = {} for row in rows: key = "%s-%s" % (row["rec_type"], row["cat_type"]) if key not in categories: categories[key] = {} if row["category"] not in categories[key]: categories[key][row["category"]] = [] categories[key][row["category"]].append(row) for key in categories: # Keeps only the first account with most records categories[key] = [categories[key][x][0] for x in categories[key]] # Sorts the categories by the frequency categories[key].sort(key=lambda x: (-x["count"], x["category"])) # Keeps only the category and the account categories[key] = [[x["category"], x["account__code"]] for x in categories[key]] # Converts the dictionary to a list, as the category may not be # US-ASCII return json.dumps(categories)
def handle(self, *args, **options): """ Main entry function for the command. Execution logic handled here. """ # Remove all existing Units before proceeding. # Unfortunately, the logic in this importer currently # is not consistent on imports against existing Units # vs a fresh db, so to have the most accurate data we can, # we have to start fresh with each run. Unit.objects.all().delete() # Perform mapping. # NOTE: order is important here! Particularly, # in order for teledata and Program Departments to # map properly, Colleges must be mapped first. self.full_name_replacements = settings.UNIT_NAME_FULL_REPLACEMENTS self.basic_replacements = settings.UNIT_NAME_PARTIAL_REPLACEMENTS self.lowercase_replacements = settings.UNIT_NAME_LOWERCASE_REPLACEMENTS self.uppercase_replacements = settings.UNIT_NAME_UPPERCASE_REPLACEMENTS self.colleges_processed = College.objects.all() # Teledata Organizations that look like they could align to a College # should be prioritized for processing, hence the ordering here: self.teledata_orgs_processed = TeledataOrg.objects.annotate( college_index=StrIndex('name', V('college'))).order_by( '-college_index') self.program_depts_processed = ProgramDept.objects.all() self.teledata_depts_processed = TeledataDept.objects.all() self.mapping_progress_bar = ChargingBar( 'Mapping data...', max=self.colleges_processed.count() + self.teledata_orgs_processed.count() + self.program_depts_processed.count() + self.teledata_depts_processed.count()) self.map_orgs_colleges() self.map_orgs_teledata() self.map_depts_programs() self.map_depts_teledata() # Consolidate duplicate Units as best as we can. self.consolidatable_unit_names = Unit.objects.values('name').annotate( name_count=Count('pk')).filter(name_count=2) self.cleanup_progress_bar = ChargingBar( 'Cleaning up...', max=self.consolidatable_unit_names.count()) self.consolidate_duplicate_units() # Done. self.print_stats()
def handle(self, *args, **options): chunk_size = 200 leads = Lead.objects.filter(leadpreview__text_extract__isnull=False) leads_count = leads.count() total_chunks = math.ceil(leads_count / chunk_size) n = 1 for lead in leads.iterator(chunk_size=chunk_size): print(f'Updating entries from lead chunk {n} of {total_chunks}') lead.entry_set.filter( entry_type=Entry.TagType.EXCERPT).annotate(index=StrIndex( 'lead__leadpreview__text_extract', F('excerpt'))).filter( index__gt=0).update(dropped_excerpt=F('excerpt')) n += 1 print('Done.')
def test_annotate_textfield(self): Article.objects.create( title='How to Django', text='Lorem ipsum dolor sit amet.', written=timezone.now(), ) Article.objects.create( title='How to Tango', text="Won't find anything here.", written=timezone.now(), ) articles = Article.objects.annotate( ipsum_index=StrIndex('text', 'ipsum')) self.assertQuerysetEqual(articles.order_by('title'), [7, 0], lambda a: a.ipsum_index)
def test_strindex(self): """ Tests str index function applied to a column. """ q1 = Author.objects.values("name").annotate( smith_index=StrIndex("name", Value("Smith"))) compiler = SQLCompiler(q1.query, self.connection, "default") sql_query, params = compiler.query.as_sql(compiler, self.connection) self.assertEqual( sql_query, "SELECT tests_author.name, STRPOS(tests_author.name, %s) AS " + "smith_index FROM tests_author", ) self.assertEqual(params, ("Smith", ))
def test_annotate_textfield(self): Article.objects.create( title="How to Django", text="This is about How to Django.", written=timezone.now(), ) Article.objects.create( title="How to Tango", text="Won't find anything here.", written=timezone.now(), ) articles = Article.objects.annotate( title_pos=StrIndex("text", "title")) self.assertQuerysetEqual(articles.order_by("title"), [15, 0], lambda a: a.title_pos)
def extract_authors(apps, schema_editor): """ Remove authors from subtitles, copy to author field, and remove from subtitle text. Example: "blah -- some guy" hyphens_location = 5 (1-indexed) text = "some guy" author = "blah" """ Subtitle = apps.get_model("home", "Subtitle") subtitles = Subtitle.objects.annotate( hyphens_location=StrIndex('text', V(' -- '))) subtitles_with_authors = subtitles.filter(hyphens_location__gt=0) subtitles_with_authors.update(author=Substr('text', F('hyphens_location') + 4), text=Left('text', F('hyphens_location') - 1))
def natural_sort(qs, field): return qs.annotate( ns_len=Length(field), ns_split_index=StrIndex(field, Value(' ')), ns_suffix=Trim( Substr(field, F('ns_split_index'), output_field=CharField())), ).annotate(ns_code=Trim( Substr(field, Value(1), 'ns_split_index', output_field=CharField())), ns_weight=Case( When(ns_split_index=0, then=Value(0)), default=Case(When(ns_suffix__regex=r'^\d+$', then=Cast( Substr(field, F('ns_split_index'), output_field=CharField()), output_field=IntegerField(), )), default=Value(1230), output_field=IntegerField()), output_field=IntegerField(), )).order_by('ns_code', 'ns_weight', 'ns_len', field)
def list(self, request): name = request.query_params.get('name', None) procedure_data = dict() if name: search_key = re.findall(r'[a-z0-9A-Z.]+', name) search_key = " ".join(search_key).lower() search_key = "".join(search_key.split(".")) procedure_queryset = Procedure.objects.filter( Q(search_key__icontains=search_key) | Q(search_key__icontains=' ' + search_key) | Q(search_key__istartswith=search_key)).annotate( search_index=StrIndex('search_key', Value( search_key))).order_by('search_index') procedure_queryset = paginate_queryset(procedure_queryset, request) else: procedure_queryset = self.get_queryset()[:20] procedure_list_serializer = procedure_serializer.ProcedureInSerializer( procedure_queryset, many=True) procedure_data['procedures'] = procedure_list_serializer.data return Response(procedure_data)
def split_goitein_typedtexts(apps, schema_editor): # after import from the metadata spreadsheet, # there were too many footnotes associated with the generic # unpublished source "Goitein, typed texts" # To make it manageable, segment that source into volumes # based on shelfmark prefixes Source = apps.get_model("footnotes", "Source") Footnote = apps.get_model("footnotes", "Footnote") Fragment = apps.get_model("corpus", "Fragment") Document = apps.get_model("corpus", "Document") # get the source with too many footnotes g_typedtexts = Source.objects.filter(title_en="typed texts", authors__last_name="Goitein", volume="").first() # bail out if nothing to do if not g_typedtexts: return footnotes = g_typedtexts.footnote_set.all() if not footnotes.exists(): return # we can't use our generic relation from document to footnotes in # migration, so work with a list of document ids footnote_doc_ids = footnotes.values_list("object_id", flat=True) # get a list of shelfmark prefixes for all fragments associated # with documents that are linked to our source via footnote # For all but T-S, use string index & substring to get shelfmark # portion before the first space shelfmark_prefixes = set( Fragment.objects.filter(documents__id__in=footnote_doc_ids).exclude( shelfmark__startswith="T-S").annotate(prefix=Substr( "shelfmark", 1, StrIndex("shelfmark", V(" ")) - 1, output_field=CharField(), )).values_list("prefix", flat=True)) ts_prefixes = set( # for T-S shelfmarks, get first 6 characters of shelfmark Fragment.objects.filter(documents__id__in=footnote_doc_ids).filter( shelfmark__startswith="T-S").annotate(prefix=Substr( "shelfmark", 1, 6, output_field=CharField()), ).values_list("prefix", flat=True)) # one exception: We want T-S Misc instead of T-S Mi ts_prefixes.remove("T-S Mi") ts_prefixes.add("T-S Misc") shelfmark_prefixes = shelfmark_prefixes.union(ts_prefixes) # create sources & footnote subsets for each prefix for prefix in shelfmark_prefixes: # create a new source with prefix as volume vol_source = Source.objects.create( title_en="typed texts", volume=prefix, source_type=g_typedtexts.source_type) # associate Goitein as author of the new source vol_source.authors.add(g_typedtexts.authors.first()) # move footnotes for fragments with this prefix to the new source doc_ids = Document.objects.filter( id__in=footnote_doc_ids, fragments__shelfmark__startswith=prefix).values_list("id", flat=True) updated = footnotes.filter(object_id__in=doc_ids).update( source=vol_source)
def jobs_search_filters(request, unfiltered_results=None): search_postcode = request.GET.get("postcode", None) homepage = Site.find_for_request(request).root_page.specific if not unfiltered_results: # Provide a default queryset for Pattern Library unfiltered_results = TalentLinkJob.objects.filter(homepage=homepage).all() hide_schools_and_early_years = request.GET.get( "hide_schools_and_early_years", False ) job_categories = JobCategory.get_categories_summary( unfiltered_results, homepage=homepage ).order_by("sort_order") return { "hide_schools_and_early_years": { "label": "Hide all schools and early years jobs", "count": get_school_and_early_years_count(unfiltered_results), "selected": hide_schools_and_early_years, }, "filters": [ { "title": "Job categories", "options": job_categories, "selected": request.GET.getlist("category"), "key": "category", }, { "title": "Job subcategories", "options": unfiltered_results.values("subcategory__title") .annotate(key=F("subcategory__title"), label=F("subcategory__title"),) .order_by("subcategory__title") .distinct(), "selected": request.GET.getlist("subcategory"), "key": "subcategory", }, { "title": "Contract type", "options": unfiltered_results.values("contract_type") .annotate(key=F("contract_type"), label=F("contract_type")) .order_by("contract_type") .distinct(), "selected": request.GET.getlist("contract"), "key": "contract", }, { "title": "Working hours", "options": unfiltered_results.values("working_hours") .annotate(key=F("working_hours"), label=F("working_hours"),) .order_by("working_hours") .distinct(), "selected": request.GET.getlist("working_hours"), "key": "working_hours", }, { # Because these are database values, they're not ordered in code. The # following code imposes ordering: # - first, by the reverse string index of the first encountered "£" # character # - then by the rest of the value alphabetically # # This gives the following ordering: # # 1. Bellerophon £ # 2. Aeneas £ # 3. £Aeneas # 4. £Bellerophon # 5. Aeneas # 6. Bellerophon # # It works to order the currently seen API salary values meaningfully, # but could be broken by future changes to those values. "title": "Salary range", "options": unfiltered_results.exclude(searchable_salary__exact="") .values("searchable_salary") .annotate( pound_index=StrIndex("searchable_salary", Value("£")), key=F("searchable_salary"), label=F("searchable_salary"), ) .order_by("-pound_index", "searchable_salary") .distinct(), "selected": request.GET.getlist("searchable_salary"), "key": "searchable_salary", }, ], "search_postcode": search_postcode, }
def get_queryset( self, project: Optional[Project] = None, source: Optional[Source] = None, snapshot: Optional[Snapshot] = None, ): """ Get project files. Allows for filtering: - using a search string - using path prefix (e.g for subdirectory listing) - using a mimetype Allows for aggregation (the default) by directory. """ project = project or self.get_project() # Avoid using select_related and prefetch_related here # as it can slow down queries significantly queryset = File.objects.filter(project=project).order_by("path") source = source or self.request.GET.get("source") if source: queryset = queryset.filter(source=source) snapshot = snapshot or self.request.GET.get("snapshot") if snapshot: queryset = queryset.filter(snapshot=snapshot) else: queryset = queryset.filter(snapshot__isnull=True, current=True) prefix = self.get_prefix() if prefix: queryset = queryset.filter(path__startswith=prefix) search = self.request.GET.get("search", "").strip() if search: queryset = queryset.filter(path__istartswith=prefix + search) mimetype = self.request.GET.get("mimetype", "").strip() if mimetype: queryset = queryset.filter(mimetype__startswith=mimetype) queryset = queryset.annotate(name=Substr( "path", pos=len(prefix) + 1, length=StrIndex( # Add a trailing slash to ensure all paths # will have a length Concat(Substr("path", len(prefix) + 1), Value("/")), Value("/"), ) - 1, output_field=TextField(), )) expand = self.request.GET.get("expand") if expand is not None: return queryset # Fetch the files, limiting to 10,000 so the following grouping # does not take forever # TODO: Put a message in the result if this limit is reached files = list(queryset.all()[:10000]) groups: Dict[str, Dict[str, Any]] = {} for file in files: if "/" in file.path[len(prefix) + 1:]: name = file.name info = groups.get(name) if not info: groups[name] = dict( path=prefix + name, name=file.name, is_directory=True, count=1, source=[file.source_id], size=file.size, modified=file.modified, ) else: info["count"] += 1 info["size"] += file.size info["source"] += [file.source_id] info["modified"] = (file.modified if file.modified > info["modified"] else info["modified"]) else: file.is_directory = False groups[file.path] = file # Return items sorted by path again return [value for key, value in sorted(groups.items())]