def citation(request, series_slug, volume_number_slug, page_number, case_id=None, pdf=False, db_case=None): """ /<series_slug>/<volume_number>/<page_number>/ -- show requested case (or list of cases, or case not found page). /<series_slug>/<volume_number>/<page_number>/<case_id>/ -- show requested case, using case_id to find one of multiple cases at this cite """ # redirect if series slug or volume number slug is in the wrong format if not pdf and (slugify(series_slug) != series_slug or slugify(volume_number_slug) != volume_number_slug): return HttpResponseRedirect(reverse( 'citation', args=[slugify(series_slug), slugify(volume_number_slug), page_number] + ([case_id] if case_id else []), host='cite')) ### try to look up citation case = None resolved_case = None if case_id: try: case = CaseDocument.get(id=case_id) resolved_cases = ResolveDocument.search().query("match", source='cap').query("match", source_id=case_id).execute() if resolved_cases: resolved_case = resolved_cases[0] except NotFoundError: raise Http404
def volume(request, series_slug, volume_number_slug): """ /<series_slug>/<volume_number>/ -- list all cases for given volumes (typically only one). """ # redirect if series slug or volume number slug is in the wrong format if slugify(series_slug) != series_slug or slugify(volume_number_slug) != volume_number_slug: return HttpResponseRedirect(reverse('volume', args=[slugify(series_slug), slugify(volume_number_slug)], host='cite')) vols = list(VolumeMetadata.objects .select_related('reporter') .filter(volume_number_slug=volume_number_slug, reporter__short_name_slug=series_slug, out_of_scope=False) .order_by('-second_part_of')) if not vols: raise Http404 cases_query = CaseDocument.search()\ .filter("term", volume__volume_number_slug=volume_number_slug)\ .filter("term", reporter__short_name_slug__raw=series_slug)\ .sort('first_page')\ .extra(size=10000)\ .source({"excludes": "casebody_data.*"}) cases = cases_query.execute() cases = natsorted(cases, key=lambda c: c.first_page) volumes = [(volume, [c for c in cases if c.volume.barcode == volume.barcode]) for volume in vols] return render(request, 'cite/volume.html', { "volumes": volumes, })
def get_toc_by_url(): from capapi.documents import CaseDocument base_path = Path(__file__).parent.joinpath('templates/docs') toc_by_url = { '': { 'parents': [], 'children': [], 'url': '', 'meta': { 'doc_link': '/', } }, } # context variables for rendering markdown templates context = { 'email': settings.DEFAULT_FROM_EMAIL, 'news': get_data_from_lil_site(section="news"), } try: case = CaseDocument.get(id=settings.API_DOCS_CASE_ID)
def get_filter_query_params(self, request, view): def lc_values(values): return [ value.lower() for value in values if isinstance(value, str) ] query_params = super().get_filter_query_params(request, view) if 'cite' in query_params: query_params['cite']['values'] = [ models.normalize_cite(cite) for cite in lc_values(query_params['cite']['values']) ] if 'court' in query_params: query_params['court']['values'] = lc_values( query_params['court']['values']) if 'jurisdiction' in query_params: query_params['jurisdiction']['values'] = lc_values( query_params['jurisdiction']['values']) if 'cites_to' in query_params: old_cites_to = query_params['cites_to']['values'] query_params['cites_to']['values'] = [] for cite in old_cites_to: # check if case id is passed in if cite.isdigit(): try: case = CaseDocument.get(id=cite) # add all citations relating to case query_params['cites_to']['values'] += [ c['normalized_cite'] for c in case.citations ] except NotFoundError: pass else: query_params['cites_to']['values'].append( normalize_cite(cite))
def random(request): """ Redirect to a random case over 1,000 words. """ s = CaseDocument.search().source(['frontend_url']).filter('range', analysis__word_count={'gte':1000}) s.query = FunctionScore( query=s.query, # omit this if not applying a filter first functions=[ SF('random_score'), # to weight by pagerank: # SF('field_value_factor', field='analysis.pagerank.percentile', modifier="ln1p", missing=0) ], boost_mode='replace', ) random_case = s[0].execute()[0] return HttpResponseRedirect(random_case.frontend_url)
def export_cases_by_reporter(version_string, id): """ Write a .jsonl.gz file with all cases for reporter. """ reporter = Reporter.objects.get(pk=id) cases = CaseDocument.raw_search().filter("term", reporter__id=id) if cases.count() == 0: print("WARNING: Reporter '{}' contains NO CASES.".format( reporter.full_name)) return out_path = Path( "bulk_exports", version_string, "by_reporter", "{subfolder}", reporter.short_name_slug, "%s_{case_format}_%s.zip" % (reporter.short_name_slug, version_string)) public = not reporter.case_metadatas.in_scope().filter( jurisdiction__whitelisted=False).exists() export_case_documents(cases, out_path, reporter, public=public)
def export_cases_by_jurisdiction(version_string, id): """ Write a .jsonl.gz file with all cases for jurisdiction. """ jurisdiction = Jurisdiction.objects.get(pk=id) cases = CaseDocument.raw_search().filter("term", jurisdiction__id=id) if cases.count() == 0: print("WARNING: Jurisdiction '{}' contains NO CASES.".format( jurisdiction.name)) return out_path = Path( "bulk_exports", version_string, "by_jurisdiction", "{subfolder}", jurisdiction.slug, "%s_{case_format}_%s.zip" % (jurisdiction.slug, version_string)) export_case_documents(cases, out_path, jurisdiction, public=jurisdiction.whitelisted)
def test_CaseDocumentSerializerWithCasebody(api_request_factory, case_factory, elasticsearch): cases = [case_factory() for i in range(3)] case_documents = [CaseDocument.get(c.id) for c in cases] # can get single case data request = api_request_factory.get(api_reverse("cases-list")) request.accepted_renderer = None serializer_context = {'request': Request(request)} serialized = serializers.CaseDocumentSerializerWithCasebody( case_documents[0], context=serializer_context) assert 'casebody' in serialized.data # can get multiple cases' data serialized = serializers.CaseDocumentSerializerWithCasebody( case_documents, many=True, context=serializer_context) assert len(serialized.data) == 3 for case in serialized.data: assert 'casebody' in case
case2.volume.volume_number = "124" case2.volume.save() CaseMetadata.update_frontend_urls(["124 Test 456", "123 Test 456"]) case1.refresh_from_db() case2.refresh_from_db() assert case1.frontend_url == "/test/123/456/" assert case2.frontend_url == "/test/124/456/" @pytest.mark.django_db def test_set_duplicate(reset_sequences, case, elasticsearch): # make sure set_duplicate function updates the cases and removes the cases from the elasticsearch index duplicate_of = VolumeMetadata.objects.exclude(pk=case.volume.pk).first() update_elasticsearch_from_queue() assert CaseDocument.search().filter( "term", volume__barcode=case.volume.barcode).count() == 1 case.volume.set_duplicate(duplicate_of) assert case.volume.duplicate_of == duplicate_of update_elasticsearch_from_queue() assert CaseDocument.search().filter( "term", volume__barcode=case.volume.barcode).count() == 0 @pytest.mark.django_db def test_set_reporter(reset_sequences, case, elasticsearch, reporter): volume = case.volume old_frontend_url_rep_shorts = set([ case.frontend_url.split('/')[1] for case in volume.case_metadatas.all() ])
'meta': { 'doc_link': '/', } }, } # context variables for rendering markdown templates context = { 'email': settings.DEFAULT_FROM_EMAIL, 'news': get_data_from_lil_site(section="news"), } try: case = CaseDocument.get(id=settings.API_DOCS_CASE_ID) except NotFoundError: try: case = CaseDocument.search()[0].execute()[0] except NotFoundError: case = None context['case_id'] = case.id if case else 1 context['case_url'] = reverse('cases-detail', args=[context['case_id']], host='api') context['case_cite'] = case.citations[0].cite if case else "123 U.S. 456" def path_string_to_title(string): return string.replace('-', ' ').replace('_', ' ').title().replace( 'Api', 'API').replace('Cap', 'CAP') for path in iter_docs(): if not (path.suffix == '.md' or path.is_dir()): continue
body_cache__text=", some text, ".join(legitimate_cites + illegitimate_cites), decision_date=datetime(2000, 1, 1)) fabfile.extract_all_citations() update_elasticsearch_from_queue() # check extracted cites cites = list(ExtractedCitation.objects.all()) cite_set = set(c.cite for c in cites) normalized_cite_set = set(c.normalized_cite for c in cites) assert cite_set == set(legitimate_cites) assert normalized_cite_set == legitimate_cites_normalized assert all(c.cited_by_id == case.pk for c in cites) assert set( c['cite'] for c in CaseDocument.get(id=case.pk).extractedcitations) == cite_set assert set(c['normalized_cite'] for c in CaseDocument.get( id=case.pk).extractedcitations) == normalized_cite_set # remove a cite and add a cite -- # make sure IDs of unchanged cites are still the same removed_cite_str = legitimate_cites[0] added_cite_str = '123 F. Supp. 456' case.body_cache.text = case.body_cache.text.replace( removed_cite_str, '') + ', some text, ' + added_cite_str case.body_cache.save() fabfile.extract_all_citations() new_cites = list(ExtractedCitation.objects.all()) removed_cite = next(c for c in cites if c.cite == removed_cite_str) added_cite = next(c for c in new_cites if c.cite == added_cite_str) assert {(c.id, c.cite)
if resolved_cases: resolved_case = resolved_cases[0] except NotFoundError: raise Http404 else: full_cite = "%s %s %s" % (volume_number_slug, series_slug.replace('-', ' ').title(), page_number) normalized_cite = re.sub(r'[^0-9a-z]', '', full_cite.lower()) resolved = ResolveDocument.search().filter("term", citations__normalized_cite=normalized_cite).execute() resolved_by_source = None if resolved: resolved_by_source = group_by(resolved, lambda r: r.source) if 'cap' in resolved_by_source: if len(resolved_by_source['cap']) == 1: resolved_case = resolved_by_source['cap'][0] case = CaseDocument.get(resolved_case['source_id']) else: cap_candidates = { c.id: c for r in resolved for cite in r.citations for c in CaseDocument.search().filter("term", citations__normalized_cite=cite.normalized_cite).execute() } if cap_candidates: resolved_by_source['cap_guess'] = cap_candidates.values() if not case: reporter = Reporter.objects.filter(short_name_slug=slugify(series_slug)).first() if reporter: series = reporter.short_name full_cite = f'{volume_number_slug} {series} {page_number}'