def test_order_by(self):
     Author.objects.create(name='Terry Pratchett')
     Author.objects.create(name='J. R. R. Tolkien')
     Author.objects.create(name='George. R. R. Martin')
     self.assertQuerysetEqual(
         Author.objects.order_by(StrIndex('name', 'R.').asc()), [
             'Terry Pratchett',
             'J. R. R. Tolkien',
             'George. R. R. Martin',
         ], lambda a: a.name)
     self.assertQuerysetEqual(
         Author.objects.order_by(StrIndex('name', 'R.').desc()), [
             'George. R. R. Martin',
             'J. R. R. Tolkien',
             'Terry Pratchett',
         ], lambda a: a.name)
Exemple #2
0
def autocomplete(request):
    # This should be replaced by a real API
    query = request.GET.get("q")
    page = request.GET.get("page")
    try:
        page = int(page)
    except (ValueError, TypeError):
        page = 1

    orgs = Organization.objects.filter(individual=False).get_viewable(
        request.user)
    if query:
        # Prioritize showing things that start with query
        orgs = (orgs.filter(name__icontains=query).annotate(
            pos=StrIndex(Lower("name"), Lower(V(query)))).order_by(
                "pos", "slug"))

    data = {
        "data": [{
            "name": o.name,
            "slug": o.slug,
            "avatar": o.avatar_url
        } for o in orgs[((page - 1) * ORG_PAGINATION):(page * ORG_PAGINATION)]]
    }
    return JsonResponse(data)
Exemple #3
0
 def test_unicode_values(self):
     Author.objects.create(name='ツリー')
     Author.objects.create(name='皇帝')
     Author.objects.create(name='皇帝 ツリー')
     authors = Author.objects.annotate(sb=StrIndex('name', Value('リ')))
     self.assertQuerysetEqual(authors.order_by('name'), [2, 0, 5],
                              lambda a: a.sb)
Exemple #4
0
 def test_annotate_charfield(self):
     Author.objects.create(name='George. R. R. Martin')
     Author.objects.create(name='J. R. R. Tolkien')
     Author.objects.create(name='Terry Pratchett')
     authors = Author.objects.annotate(
         fullstop=StrIndex('name', Value('R.')))
     self.assertQuerysetEqual(authors.order_by('name'), [9, 4, 0],
                              lambda a: a.fullstop)
Exemple #5
0
 def test_filtering(self):
     Author.objects.create(name='George. R. R. Martin')
     Author.objects.create(name='Terry Pratchett')
     self.assertQuerysetEqual(
         Author.objects.annotate(
             middle_name=StrIndex('name', Value('R.'))).filter(
                 middle_name__gt=0), ['George. R. R. Martin'],
         lambda a: a.name)
Exemple #6
0
 def test_annotate_charfield(self):
     Author.objects.create(name="George. R. R. Martin")
     Author.objects.create(name="J. R. R. Tolkien")
     Author.objects.create(name="Terry Pratchett")
     authors = Author.objects.annotate(
         fullstop=StrIndex("name", Value("R.")))
     self.assertQuerysetEqual(authors.order_by("name"), [9, 4, 0],
                              lambda a: a.fullstop)
Exemple #7
0
 def test_filtering(self):
     Author.objects.create(name="George. R. R. Martin")
     Author.objects.create(name="Terry Pratchett")
     self.assertQuerysetEqual(
         Author.objects.annotate(
             middle_name=StrIndex("name", Value("R."))).filter(
                 middle_name__gt=0),
         ["George. R. R. Martin"],
         lambda a: a.name,
     )
 def test_expressions(self):
     Author.objects.create(name='John Smith', alias='smithj')
     Author.objects.create(name='Rhonda')
     substr = Substr(Upper('name'),
                     StrIndex('name', V('h')),
                     5,
                     output_field=CharField())
     authors = Author.objects.annotate(name_part=substr)
     self.assertQuerysetEqual(authors.order_by('name'), ['HN SM', 'HONDA'],
                              lambda a: a.name_part)
Exemple #9
0
    def search_by_username(self, queryset, name, expr):
        """First starting with expr, then containing expr."""
        queryset = queryset.annotate(
            expr_position=StrIndex('user__username', Value(expr)),
        ).filter(
            expr_position__gt=0,
        ).order_by(
            'expr_position',
            'user__username',
        )

        return queryset
Exemple #10
0
    def lookups(self, request, model_admin):
        slug_prefixes = Room.objects.all()\
            .annotate(slug_prefix=Substr('slug', 1, StrIndex('slug',Value('-')) - 1, output_field=models.CharField()))\
            .order_by('slug_prefix')\
            .values_list('slug_prefix', flat=True).distinct()

        lookups = {}

        for prefix in slug_prefixes:
            lookups.update({prefix: _(prefix)})

        return lookups.items()
Exemple #11
0
 def test_order_by(self):
     Author.objects.create(name="Terry Pratchett")
     Author.objects.create(name="J. R. R. Tolkien")
     Author.objects.create(name="George. R. R. Martin")
     self.assertQuerysetEqual(
         Author.objects.order_by(StrIndex("name", Value("R.")).asc()),
         [
             "Terry Pratchett",
             "J. R. R. Tolkien",
             "George. R. R. Martin",
         ],
         lambda a: a.name,
     )
     self.assertQuerysetEqual(
         Author.objects.order_by(StrIndex("name", Value("R.")).desc()),
         [
             "George. R. R. Martin",
             "J. R. R. Tolkien",
             "Terry Pratchett",
         ],
         lambda a: a.name,
     )
Exemple #12
0
    def _get_summary_categories() -> str:
        """Finds and returns the summary categories and their corresponding
        account hints as JSON.

        Returns:
            The summary categories and their account hints, by their record
            types and category types.
        """
        rows = Record.objects \
            .filter(Q(summary__contains="—"),
                    ~Q(account__code__startswith="114"),
                    ~Q(account__code__startswith="214"),
                    ~Q(account__code__startswith="128"),
                    ~Q(account__code__startswith="228")) \
            .annotate(rec_type=Case(When(is_credit=True, then=Value("credit")),
                                    default=Value("debit"),
                                    output_field=CharField()),
                      cat_type=Case(
                          When(summary__regex=".+—.+—.+→.+",
                               then=Value("bus")),
                          When(summary__regex=".+—.+[→↔].+",
                               then=Value("travel")),
                          default=Value("general"),
                          output_field=CharField()),
                      category=Left("summary",
                                    StrIndex("summary", Value("—")) - 1,
                                    output_field=CharField())) \
            .values("rec_type", "cat_type", "category", "account__code") \
            .annotate(count=Count("category")) \
            .order_by("rec_type", "cat_type", "category", "-count",
                      "account__code")
        # Sorts the rows by the record type and the category type
        categories = {}
        for row in rows:
            key = "%s-%s" % (row["rec_type"], row["cat_type"])
            if key not in categories:
                categories[key] = {}
            if row["category"] not in categories[key]:
                categories[key][row["category"]] = []
            categories[key][row["category"]].append(row)
        for key in categories:
            # Keeps only the first account with most records
            categories[key] = [categories[key][x][0] for x in categories[key]]
            # Sorts the categories by the frequency
            categories[key].sort(key=lambda x: (-x["count"], x["category"]))
            # Keeps only the category and the account
            categories[key] = [[x["category"], x["account__code"]]
                               for x in categories[key]]
        # Converts the dictionary to a list, as the category may not be
        # US-ASCII
        return json.dumps(categories)
Exemple #13
0
    def handle(self, *args, **options):
        """
        Main entry function for the command.
        Execution logic handled here.
        """
        # Remove all existing Units before proceeding.
        # Unfortunately, the logic in this importer currently
        # is not consistent on imports against existing Units
        # vs a fresh db, so to have the most accurate data we can,
        # we have to start fresh with each run.
        Unit.objects.all().delete()

        # Perform mapping.
        # NOTE: order is important here! Particularly,
        # in order for teledata and Program Departments to
        # map properly, Colleges must be mapped first.
        self.full_name_replacements = settings.UNIT_NAME_FULL_REPLACEMENTS
        self.basic_replacements = settings.UNIT_NAME_PARTIAL_REPLACEMENTS
        self.lowercase_replacements = settings.UNIT_NAME_LOWERCASE_REPLACEMENTS
        self.uppercase_replacements = settings.UNIT_NAME_UPPERCASE_REPLACEMENTS

        self.colleges_processed = College.objects.all()
        # Teledata Organizations that look like they could align to a College
        # should be prioritized for processing, hence the ordering here:
        self.teledata_orgs_processed = TeledataOrg.objects.annotate(
            college_index=StrIndex('name', V('college'))).order_by(
                '-college_index')
        self.program_depts_processed = ProgramDept.objects.all()
        self.teledata_depts_processed = TeledataDept.objects.all()

        self.mapping_progress_bar = ChargingBar(
            'Mapping data...',
            max=self.colleges_processed.count() +
            self.teledata_orgs_processed.count() +
            self.program_depts_processed.count() +
            self.teledata_depts_processed.count())
        self.map_orgs_colleges()
        self.map_orgs_teledata()
        self.map_depts_programs()
        self.map_depts_teledata()

        # Consolidate duplicate Units as best as we can.
        self.consolidatable_unit_names = Unit.objects.values('name').annotate(
            name_count=Count('pk')).filter(name_count=2)
        self.cleanup_progress_bar = ChargingBar(
            'Cleaning up...', max=self.consolidatable_unit_names.count())
        self.consolidate_duplicate_units()

        # Done.
        self.print_stats()
Exemple #14
0
 def handle(self, *args, **options):
     chunk_size = 200
     leads = Lead.objects.filter(leadpreview__text_extract__isnull=False)
     leads_count = leads.count()
     total_chunks = math.ceil(leads_count / chunk_size)
     n = 1
     for lead in leads.iterator(chunk_size=chunk_size):
         print(f'Updating entries from lead chunk {n} of {total_chunks}')
         lead.entry_set.filter(
             entry_type=Entry.TagType.EXCERPT).annotate(index=StrIndex(
                 'lead__leadpreview__text_extract', F('excerpt'))).filter(
                     index__gt=0).update(dropped_excerpt=F('excerpt'))
         n += 1
     print('Done.')
 def test_annotate_textfield(self):
     Article.objects.create(
         title='How to Django',
         text='Lorem ipsum dolor sit amet.',
         written=timezone.now(),
     )
     Article.objects.create(
         title='How to Tango',
         text="Won't find anything here.",
         written=timezone.now(),
     )
     articles = Article.objects.annotate(
         ipsum_index=StrIndex('text', 'ipsum'))
     self.assertQuerysetEqual(articles.order_by('title'), [7, 0],
                              lambda a: a.ipsum_index)
Exemple #16
0
    def test_strindex(self):
        """
        Tests str index function applied to a column.
        """
        q1 = Author.objects.values("name").annotate(
            smith_index=StrIndex("name", Value("Smith")))

        compiler = SQLCompiler(q1.query, self.connection, "default")
        sql_query, params = compiler.query.as_sql(compiler, self.connection)
        self.assertEqual(
            sql_query,
            "SELECT tests_author.name, STRPOS(tests_author.name, %s) AS " +
            "smith_index FROM tests_author",
        )
        self.assertEqual(params, ("Smith", ))
Exemple #17
0
 def test_annotate_textfield(self):
     Article.objects.create(
         title="How to Django",
         text="This is about How to Django.",
         written=timezone.now(),
     )
     Article.objects.create(
         title="How to Tango",
         text="Won't find anything here.",
         written=timezone.now(),
     )
     articles = Article.objects.annotate(
         title_pos=StrIndex("text", "title"))
     self.assertQuerysetEqual(articles.order_by("title"), [15, 0],
                              lambda a: a.title_pos)
Exemple #18
0
def extract_authors(apps, schema_editor):
    """
    Remove authors from subtitles, copy to author field, and remove from subtitle text.

    Example: "blah -- some guy"
        hyphens_location = 5 (1-indexed)
        text = "some guy"
        author = "blah"
    """
    Subtitle = apps.get_model("home", "Subtitle")
    subtitles = Subtitle.objects.annotate(
        hyphens_location=StrIndex('text', V(' -- ')))
    subtitles_with_authors = subtitles.filter(hyphens_location__gt=0)
    subtitles_with_authors.update(author=Substr('text',
                                                F('hyphens_location') + 4),
                                  text=Left('text',
                                            F('hyphens_location') - 1))
Exemple #19
0
def natural_sort(qs, field):
    return qs.annotate(
        ns_len=Length(field),
        ns_split_index=StrIndex(field, Value(' ')),
        ns_suffix=Trim(
            Substr(field, F('ns_split_index'), output_field=CharField())),
    ).annotate(ns_code=Trim(
        Substr(field, Value(1), 'ns_split_index', output_field=CharField())),
               ns_weight=Case(
                   When(ns_split_index=0, then=Value(0)),
                   default=Case(When(ns_suffix__regex=r'^\d+$',
                                     then=Cast(
                                         Substr(field,
                                                F('ns_split_index'),
                                                output_field=CharField()),
                                         output_field=IntegerField(),
                                     )),
                                default=Value(1230),
                                output_field=IntegerField()),
                   output_field=IntegerField(),
               )).order_by('ns_code', 'ns_weight', 'ns_len', field)
Exemple #20
0
    def list(self, request):
        name = request.query_params.get('name', None)

        procedure_data = dict()
        if name:
            search_key = re.findall(r'[a-z0-9A-Z.]+', name)
            search_key = " ".join(search_key).lower()
            search_key = "".join(search_key.split("."))
            procedure_queryset = Procedure.objects.filter(
                Q(search_key__icontains=search_key)
                | Q(search_key__icontains=' ' + search_key)
                | Q(search_key__istartswith=search_key)).annotate(
                    search_index=StrIndex('search_key', Value(
                        search_key))).order_by('search_index')
            procedure_queryset = paginate_queryset(procedure_queryset, request)
        else:
            procedure_queryset = self.get_queryset()[:20]

        procedure_list_serializer = procedure_serializer.ProcedureInSerializer(
            procedure_queryset, many=True)
        procedure_data['procedures'] = procedure_list_serializer.data
        return Response(procedure_data)
def split_goitein_typedtexts(apps, schema_editor):
    # after import from the metadata spreadsheet,
    # there were too many footnotes associated with the generic
    # unpublished source "Goitein, typed texts"
    # To make it manageable, segment that source into volumes
    # based on shelfmark prefixes

    Source = apps.get_model("footnotes", "Source")
    Footnote = apps.get_model("footnotes", "Footnote")
    Fragment = apps.get_model("corpus", "Fragment")
    Document = apps.get_model("corpus", "Document")

    # get the source with too many footnotes
    g_typedtexts = Source.objects.filter(title_en="typed texts",
                                         authors__last_name="Goitein",
                                         volume="").first()

    # bail out if nothing to do
    if not g_typedtexts:
        return
    footnotes = g_typedtexts.footnote_set.all()
    if not footnotes.exists():
        return
    # we can't use our generic relation from document to footnotes in
    # migration, so work with a list of document ids
    footnote_doc_ids = footnotes.values_list("object_id", flat=True)

    # get a list of shelfmark prefixes for all fragments associated
    # with documents that are linked to our source via footnote
    # For all but T-S, use string index & substring to get shelfmark
    # portion before the first space
    shelfmark_prefixes = set(
        Fragment.objects.filter(documents__id__in=footnote_doc_ids).exclude(
            shelfmark__startswith="T-S").annotate(prefix=Substr(
                "shelfmark",
                1,
                StrIndex("shelfmark", V(" ")) - 1,
                output_field=CharField(),
            )).values_list("prefix", flat=True))

    ts_prefixes = set(
        # for T-S shelfmarks, get first 6 characters of shelfmark
        Fragment.objects.filter(documents__id__in=footnote_doc_ids).filter(
            shelfmark__startswith="T-S").annotate(prefix=Substr(
                "shelfmark", 1, 6,
                output_field=CharField()), ).values_list("prefix", flat=True))
    # one exception: We want T-S Misc instead of T-S Mi
    ts_prefixes.remove("T-S Mi")
    ts_prefixes.add("T-S Misc")
    shelfmark_prefixes = shelfmark_prefixes.union(ts_prefixes)

    # create sources & footnote subsets for each prefix
    for prefix in shelfmark_prefixes:
        # create a new source with prefix as volume
        vol_source = Source.objects.create(
            title_en="typed texts",
            volume=prefix,
            source_type=g_typedtexts.source_type)
        # associate Goitein as author of the new source
        vol_source.authors.add(g_typedtexts.authors.first())
        # move footnotes for fragments with this prefix to the new source
        doc_ids = Document.objects.filter(
            id__in=footnote_doc_ids,
            fragments__shelfmark__startswith=prefix).values_list("id",
                                                                 flat=True)

        updated = footnotes.filter(object_id__in=doc_ids).update(
            source=vol_source)
def jobs_search_filters(request, unfiltered_results=None):
    search_postcode = request.GET.get("postcode", None)
    homepage = Site.find_for_request(request).root_page.specific

    if not unfiltered_results:
        # Provide a default queryset for Pattern Library
        unfiltered_results = TalentLinkJob.objects.filter(homepage=homepage).all()

    hide_schools_and_early_years = request.GET.get(
        "hide_schools_and_early_years", False
    )

    job_categories = JobCategory.get_categories_summary(
        unfiltered_results, homepage=homepage
    ).order_by("sort_order")

    return {
        "hide_schools_and_early_years": {
            "label": "Hide all schools and early years jobs",
            "count": get_school_and_early_years_count(unfiltered_results),
            "selected": hide_schools_and_early_years,
        },
        "filters": [
            {
                "title": "Job categories",
                "options": job_categories,
                "selected": request.GET.getlist("category"),
                "key": "category",
            },
            {
                "title": "Job subcategories",
                "options": unfiltered_results.values("subcategory__title")
                .annotate(key=F("subcategory__title"), label=F("subcategory__title"),)
                .order_by("subcategory__title")
                .distinct(),
                "selected": request.GET.getlist("subcategory"),
                "key": "subcategory",
            },
            {
                "title": "Contract type",
                "options": unfiltered_results.values("contract_type")
                .annotate(key=F("contract_type"), label=F("contract_type"))
                .order_by("contract_type")
                .distinct(),
                "selected": request.GET.getlist("contract"),
                "key": "contract",
            },
            {
                "title": "Working hours",
                "options": unfiltered_results.values("working_hours")
                .annotate(key=F("working_hours"), label=F("working_hours"),)
                .order_by("working_hours")
                .distinct(),
                "selected": request.GET.getlist("working_hours"),
                "key": "working_hours",
            },
            {
                # Because these are database values, they're not ordered in code. The
                # following code imposes ordering:
                # - first, by the reverse string index of the first encountered "£"
                #   character
                # - then by the rest of the value alphabetically
                #
                # This gives the following ordering:
                #
                # 1. Bellerophon £
                # 2. Aeneas £
                # 3. £Aeneas
                # 4. £Bellerophon
                # 5. Aeneas
                # 6. Bellerophon
                #
                # It works to order the currently seen API salary values meaningfully,
                # but could be broken by future changes to those values.
                "title": "Salary range",
                "options": unfiltered_results.exclude(searchable_salary__exact="")
                .values("searchable_salary")
                .annotate(
                    pound_index=StrIndex("searchable_salary", Value("£")),
                    key=F("searchable_salary"),
                    label=F("searchable_salary"),
                )
                .order_by("-pound_index", "searchable_salary")
                .distinct(),
                "selected": request.GET.getlist("searchable_salary"),
                "key": "searchable_salary",
            },
        ],
        "search_postcode": search_postcode,
    }
Exemple #23
0
    def get_queryset(
        self,
        project: Optional[Project] = None,
        source: Optional[Source] = None,
        snapshot: Optional[Snapshot] = None,
    ):
        """
        Get project files.

        Allows for filtering:
          - using a search string
          - using path prefix (e.g for subdirectory listing)
          - using a mimetype

        Allows for aggregation (the default) by directory.
        """
        project = project or self.get_project()

        # Avoid using select_related and prefetch_related here
        # as it can slow down queries significantly
        queryset = File.objects.filter(project=project).order_by("path")

        source = source or self.request.GET.get("source")
        if source:
            queryset = queryset.filter(source=source)

        snapshot = snapshot or self.request.GET.get("snapshot")
        if snapshot:
            queryset = queryset.filter(snapshot=snapshot)
        else:
            queryset = queryset.filter(snapshot__isnull=True, current=True)

        prefix = self.get_prefix()
        if prefix:
            queryset = queryset.filter(path__startswith=prefix)

        search = self.request.GET.get("search", "").strip()
        if search:
            queryset = queryset.filter(path__istartswith=prefix + search)

        mimetype = self.request.GET.get("mimetype", "").strip()
        if mimetype:
            queryset = queryset.filter(mimetype__startswith=mimetype)

        queryset = queryset.annotate(name=Substr(
            "path",
            pos=len(prefix) + 1,
            length=StrIndex(
                # Add a trailing slash to ensure all paths
                # will have a length
                Concat(Substr("path",
                              len(prefix) + 1), Value("/")),
                Value("/"),
            ) - 1,
            output_field=TextField(),
        ))

        expand = self.request.GET.get("expand")
        if expand is not None:
            return queryset

        # Fetch the files, limiting to 10,000 so the following grouping
        # does not take forever
        # TODO: Put a message in the result if this limit is reached
        files = list(queryset.all()[:10000])

        groups: Dict[str, Dict[str, Any]] = {}
        for file in files:
            if "/" in file.path[len(prefix) + 1:]:
                name = file.name
                info = groups.get(name)
                if not info:
                    groups[name] = dict(
                        path=prefix + name,
                        name=file.name,
                        is_directory=True,
                        count=1,
                        source=[file.source_id],
                        size=file.size,
                        modified=file.modified,
                    )
                else:
                    info["count"] += 1
                    info["size"] += file.size
                    info["source"] += [file.source_id]
                    info["modified"] = (file.modified
                                        if file.modified > info["modified"]
                                        else info["modified"])
            else:
                file.is_directory = False
                groups[file.path] = file

        # Return items sorted by path again
        return [value for key, value in sorted(groups.items())]