class StaticFilesTest(TestCase): good_mp3_path = 'mp3/2014/06/09/ander_v._leo.mp3' good_txt_path = 'txt/2015/12/28/opinion_text.txt' good_pdf_path = 'pdf/2013/06/12/' + \ 'in_re_motion_for_consent_to_disclosure_of_court_records.pdf' def setUp(self): self.court = Court.objects.get(pk='test') self.docket = Docket(case_name=u'Docket', court=self.court, source=Docket.DEFAULT) self.docket.save() self.audio = Audio(local_path_original_file=self.good_mp3_path, local_path_mp3=self.good_mp3_path, docket=self.docket, blocked=False, case_name_full='Ander v. Leo', date_created=datetime.date(2014, 6, 9)) self.audio.save(index=False) self.opinioncluster = OpinionCluster( case_name=u'Hotline Bling', docket=self.docket, date_filed=datetime.date(2015, 12, 14), ) self.opinioncluster.save(index=False) self.txtopinion = Opinion(cluster=self.opinioncluster, type='Lead Opinion', local_path=self.good_txt_path) self.txtopinion.save(index=False) self.pdfopinion = Opinion(cluster=self.opinioncluster, type='Lead Opinion', local_path=self.good_pdf_path) self.pdfopinion.save(index=False) def test_serve_static_file_serves_mp3(self): request = HttpRequest() file_path = self.audio.local_path_mp3 response = serve_static_file(request, file_path=self.good_mp3_path) self.assertEqual(response.status_code, 200) self.assertEqual(response['Content-Type'], 'audio/mpeg') self.assertIn('inline;', response['Content-Disposition']) def test_serve_static_file_serves_txt(self): request = HttpRequest() response = serve_static_file(request, file_path=self.good_txt_path) self.assertEqual(response.status_code, 200) self.assertEqual(response['Content-Type'], 'text/plain') self.assertIn('inline;', response['Content-Disposition']) self.assertIn('FOR THE DISTRICT OF COLUMBIA CIRCUIT', response.content) def test_serve_static_file_serves_pdf(self): request = HttpRequest() response = serve_static_file(request, file_path=self.good_pdf_path) self.assertEqual(response.status_code, 200) self.assertEqual(response['Content-Type'], 'application/pdf') self.assertIn('inline;', response['Content-Disposition'])
def add_oc_and_o(self, old_document, old_citation, old_docket, new_docket): """Add the OpinionCluster and Opinion, updating existing items if present. """ new_opinion_cluster = OpinionClusterNew( pk=old_document.pk, docket=new_docket, judges=self._none_to_blank(old_document.judges), date_modified=old_document.date_modified, date_created=old_document.date_modified, date_filed=old_document.date_filed, slug=self._none_to_blank(old_citation.slug), citation_id=old_document.citation_id, case_name_short=old_docket.case_name_short, case_name=old_docket.case_name, case_name_full=old_docket.case_name_full, federal_cite_one=self._none_to_blank(old_citation.federal_cite_one), federal_cite_two=self._none_to_blank(old_citation.federal_cite_two), federal_cite_three=self._none_to_blank(old_citation.federal_cite_three), state_cite_one=self._none_to_blank(old_citation.state_cite_one), state_cite_two=self._none_to_blank(old_citation.state_cite_two), state_cite_three=self._none_to_blank(old_citation.state_cite_three), state_cite_regional=self._none_to_blank(old_citation.state_cite_regional), specialty_cite_one=self._none_to_blank(old_citation.specialty_cite_one), scotus_early_cite=self._none_to_blank(old_citation.scotus_early_cite), lexis_cite=self._none_to_blank(old_citation.lexis_cite), westlaw_cite=self._none_to_blank(old_citation.westlaw_cite), neutral_cite=self._none_to_blank(old_citation.neutral_cite), scdb_id=self._none_to_blank(old_document.supreme_court_db_id), source=old_document.source, nature_of_suit=old_document.nature_of_suit, citation_count=old_document.citation_count, precedential_status=old_document.precedential_status, date_blocked=old_document.date_blocked, blocked=old_document.blocked, ) new_opinion_cluster.save( using='default', index=False, ) new_opinion = OpinionNew( pk=old_document.pk, cluster=new_opinion_cluster, date_modified=old_document.date_modified, date_created=old_document.time_retrieved, type='010combined', sha1=old_document.sha1, download_url=old_document.download_url, local_path=old_document.local_path, plain_text=old_document.plain_text, html=self._none_to_blank(old_document.html), html_lawbox=self._none_to_blank(old_document.html_lawbox), html_with_citations=old_document.html_with_citations, extracted_by_ocr=old_document.extracted_by_ocr, ) new_opinion.save( using='default', index=False, )
class BulkDataTest(TestCase): fixtures = ['court_data.json'] tmp_data_dir = '/tmp/bulk-dir/' def setUp(self): docket = Docket( case_name=u'foo', court=Court.objects.get(pk='test'), source=Docket.DEFAULT ) docket.save() # Must be more than a year old for all tests to be runnable. last_month = now().date() - timedelta(days=400) self.doc_cluster = OpinionCluster( case_name=u"foo", docket=docket, date_filed=last_month ) self.doc_cluster.save(index=False) opinion = Opinion.objects.create( cluster=self.doc_cluster, type='Lead Opinion' ) opinion2 = Opinion.objects.create( cluster=self.doc_cluster, type='Concurrence' ) OpinionsCited.objects.create( citing_opinion=opinion2, cited_opinion=opinion ) # Scrape the audio "site" and add its contents site = test_oral_arg_scraper.Site().parse() OralArgumentCommand().scrape_court(site, full_crawl=True) def tearDown(self): OpinionCluster.objects.all().delete() Docket.objects.all().delete() Audio.objects.all().delete() try: shutil.rmtree(self.tmp_data_dir) except OSError: pass @override_settings(BULK_DATA_DIR=tmp_data_dir) def test_make_all_bulk_files(self): """Can we successfully generate all bulk files?""" Command().execute() def test_database_has_objects_for_bulk_export(self): self.assertTrue(Opinion.objects.count() > 0, 'Opinions exist') self.assertTrue(Audio.objects.count() > 0, 'Audio exist') self.assertTrue(Docket.objects.count() > 0, 'Docket exist') self.assertTrue(Court.objects.count() > 0, 'Court exist') self.assertEqual( Court.objects.get(pk='test').full_name, 'Testing Supreme Court' )
class BulkDataTest(TestCase): tmp_data_dir = "/tmp/bulk-dir/" def setUp(self) -> None: docket = Docket( case_name="foo", court=Court.objects.get(pk="test"), source=Docket.DEFAULT, ) docket.save() # Must be more than a year old for all tests to be runnable. last_month = now().date() - timedelta(days=400) self.doc_cluster = OpinionCluster(case_name="foo", docket=docket, date_filed=last_month) self.doc_cluster.save(index=False) opinion = Opinion(cluster=self.doc_cluster, type="Lead Opinion") opinion.save(index=False) opinion2 = Opinion(cluster=self.doc_cluster, type="Concurrence") opinion2.save(index=False) OpinionsCited.objects.create(citing_opinion=opinion2, cited_opinion=opinion) # Scrape the audio "site" and add its contents site = test_oral_arg_scraper.Site().parse() with mock.patch( "cl.lib.storage.get_name_by_incrementing", side_effect=clobbering_get_name, ): OralArgumentCommand().scrape_court(site, full_crawl=True) def tearDown(self) -> None: OpinionCluster.objects.all().delete() Docket.objects.all().delete() Audio.objects.all().delete() try: shutil.rmtree(self.tmp_data_dir) except OSError: pass @override_settings(BULK_DATA_DIR=tmp_data_dir) def test_make_all_bulk_files(self) -> None: """Can we successfully generate all bulk files?""" call_command("cl_make_bulk_data") def test_database_has_objects_for_bulk_export(self) -> None: # This is a very weird test. It's essentially just testing the # setUp function, which...OK? self.assertTrue(Opinion.objects.count() > 0, "No opinions exist") self.assertTrue(OpinionsCited.objects.count() > 0, "No citations exist") self.assertTrue(Audio.objects.count() > 0, "No audio exist") self.assertTrue(Docket.objects.count() > 0, "No docket exist") self.assertTrue(Court.objects.count() > 0, "No courts exist") self.assertEqual( Court.objects.get(pk="test").full_name, "Testing Supreme Court")
class BulkDataTest(TestCase): tmp_data_dir = '/tmp/bulk-dir/' def setUp(self): docket = Docket( case_name=u'foo', court=Court.objects.get(pk='test'), source=Docket.DEFAULT ) docket.save() # Must be more than a year old for all tests to be runnable. last_month = now().date() - timedelta(days=400) self.doc_cluster = OpinionCluster( case_name=u"foo", docket=docket, date_filed=last_month ) self.doc_cluster.save(index=False) opinion = Opinion(cluster=self.doc_cluster, type='Lead Opinion') opinion.save(index=False) opinion2 = Opinion(cluster=self.doc_cluster, type='Concurrence') opinion2.save(index=False) OpinionsCited.objects.create( citing_opinion=opinion2, cited_opinion=opinion ) # Scrape the audio "site" and add its contents site = test_oral_arg_scraper.Site().parse() OralArgumentCommand().scrape_court(site, full_crawl=True) def tearDown(self): OpinionCluster.objects.all().delete() Docket.objects.all().delete() Audio.objects.all().delete() try: shutil.rmtree(self.tmp_data_dir) except OSError: pass @override_settings(BULK_DATA_DIR=tmp_data_dir) def test_make_all_bulk_files(self): """Can we successfully generate all bulk files?""" Command().execute() def test_database_has_objects_for_bulk_export(self): self.assertTrue(Opinion.objects.count() > 0, 'Opinions exist') self.assertTrue(Audio.objects.count() > 0, 'Audio exist') self.assertTrue(Docket.objects.count() > 0, 'Docket exist') self.assertTrue(Court.objects.count() > 0, 'Court exist') self.assertEqual( Court.objects.get(pk='test').full_name, 'Testing Supreme Court' )
def test_save_old_opinion(self): """Can we save opinions older than 1900?""" docket = Docket(case_name=u"Blah", court_id='test', source=Docket.DEFAULT) docket.save() oc = OpinionCluster( case_name=u"Blah", docket=docket, date_filed=datetime.date(1899, 1, 1), ) oc.save() o = Opinion(cluster=oc, type='Lead Opinion') try: cf = ContentFile(StringIO.StringIO('blah').read()) o.file_with_date = datetime.date(1899, 1, 1) o.local_path.save('file_name.pdf', cf, save=False) o.save(index=False) except ValueError as e: raise ValueError("Unable to save a case older than 1900. Did you " "try to use `strftime`...again?")
def migrate_opinions_oral_args_and_dockets(self): self.stdout.write("Migrating dockets, audio files, and opinions to new " "database...") q = DocketOld.objects.using('old').all() old_dockets = queryset_generator(q) num_dockets = q.count() progress = 0 self._print_progress(progress, num_dockets) for old_docket in old_dockets: # First do the docket, then create the cluster and opinion objects. try: old_audio = old_docket.audio_files.all()[0] except IndexError: old_audio = None try: old_document = old_docket.documents.all()[0] except IndexError: old_document = None if old_document is not None: old_citation = old_document.citation old_doc_case_name, old_doc_case_name_full, old_doc_case_name_short = self._get_case_names(old_citation.case_name) if old_audio is not None: old_audio_case_name, old_audio_case_name_full, old_audio_case_name_short = self._get_case_names(old_audio.case_name) court = CourtNew.objects.get(pk=old_docket.court_id) # Courts are in place thanks to initial data. new_docket = DocketNew( pk=old_docket.pk, date_modified=old_docket.date_modified, date_created=old_docket.date_modified, court=court, case_name=old_doc_case_name, case_name_full=old_doc_case_name_full, case_name_short=old_doc_case_name_short, slug=self._none_to_blank(old_docket.slug), docket_number=self._none_to_blank(old_citation.docket_number), date_blocked=old_docket.date_blocked, blocked=old_docket.blocked, ) if old_audio is not None: new_docket.date_argued = old_audio.date_argued new_docket.save(using='default') if old_document is not None: new_opinion_cluster = OpinionClusterNew( pk=old_document.pk, docket=new_docket, judges=self._none_to_blank(old_document.judges), date_modified=old_document.date_modified, date_created=old_document.date_modified, date_filed=old_document.date_filed, slug=self._none_to_blank(old_citation.slug), citation_id=old_document.citation_id, case_name_short=old_doc_case_name_short, case_name=old_doc_case_name, case_name_full=old_doc_case_name_full, federal_cite_one=self._none_to_blank( old_citation.federal_cite_one), federal_cite_two=self._none_to_blank( old_citation.federal_cite_two), federal_cite_three=self._none_to_blank( old_citation.federal_cite_three), state_cite_one=self._none_to_blank( old_citation.state_cite_one), state_cite_two=self._none_to_blank( old_citation.state_cite_two), state_cite_three=self._none_to_blank( old_citation.state_cite_three), state_cite_regional=self._none_to_blank( old_citation.state_cite_regional), specialty_cite_one=self._none_to_blank( old_citation.specialty_cite_one), scotus_early_cite=self._none_to_blank( old_citation.scotus_early_cite), lexis_cite=self._none_to_blank(old_citation.lexis_cite), westlaw_cite=self._none_to_blank(old_citation.westlaw_cite), neutral_cite=self._none_to_blank(old_citation.neutral_cite), scdb_id=self._none_to_blank( old_document.supreme_court_db_id), source=old_document.source, nature_of_suit=old_document.nature_of_suit, citation_count=old_document.citation_count, precedential_status=old_document.precedential_status, date_blocked=old_document.date_blocked, blocked=old_document.blocked, ) new_opinion_cluster.save( using='default', index=False, ) new_opinion = OpinionNew( pk=old_document.pk, cluster=new_opinion_cluster, date_modified=old_document.date_modified, date_created=old_document.time_retrieved, type='010combined', sha1=old_document.sha1, download_url=old_document.download_url, local_path=old_document.local_path, plain_text=old_document.plain_text, html=self._none_to_blank(old_document.html), html_lawbox=self._none_to_blank(old_document.html_lawbox), html_with_citations=old_document.html_with_citations, extracted_by_ocr=old_document.extracted_by_ocr, ) new_opinion.save( using='default', index=False, ) if old_audio is not None: new_audio_file = AudioNew( pk=old_audio.pk, docket=new_docket, source=old_audio.source, case_name=old_audio_case_name, case_name_short=old_audio_case_name_short, case_name_full=old_audio_case_name_full, judges=self._none_to_blank(old_audio.judges), date_created=old_audio.time_retrieved, date_modified=old_audio.date_modified, sha1=old_audio.sha1, download_url=old_audio.download_url, local_path_mp3=old_audio.local_path_mp3, local_path_original_file=old_audio.local_path_original_file, duration=old_audio.duration, processing_complete=old_audio.processing_complete, date_blocked=old_audio.date_blocked, blocked=old_audio.blocked, ) new_audio_file.save( using='default', index=False, ) progress += 1 self._print_progress(progress, num_dockets) self.stdout.write(u'') # Newline
def add_new_records( html_str: str, data: Dict[str, Any], date_argued: datetime.date, date_filed: datetime.date, case_names: Dict[str, str], status: str, docket_number: str, found_citations: List[FoundCitation], court_id: str, ) -> Docket: """Create new records in the DB based on parsed data :param html_str: HTML opinion to add :param data: Case data to import :param date_argued: Date case was argued. :param date_filed: Date case was filed. :param case_names: A dict with the three case name types :param status: Whether it's precedential :param docket_number: The docket number :param found_citations: A list of FoundCitation objects. :param court_id: The CL id of the court :return: None. """ docket = Docket.objects.create( **case_names, docket_number=docket_number, court_id=court_id, source=Docket.ANON_2020, ia_needs_upload=False, date_argued=date_argued, ) logger.info("Add cluster for: %s", found_citations[0].base_citation()) judges = data["judges"] or "" cluster = OpinionCluster( **case_names, precedential_status=status, docket_id=docket.id, source=docket.ANON_2020, date_filed=date_filed, attorneys=data["representation"] or "", disposition=data["summary_disposition"] or "", summary=data["summary_court"] or "", history=data["history"] or "", cross_reference=data["history_docket_numbers"] or "", correction=data["publication_status_note"] or "", judges=judges.replace("{", "").replace("}", "") or "", ) cluster.save(index=False) for citation in found_citations: logger.info("Adding citation for: %s", citation.base_citation()) Citation.objects.get_or_create( volume=citation.volume, reporter=citation.reporter, page=citation.page, type=map_reporter_db_cite_type( REPORTERS[citation.canonical_reporter][0]["cite_type"]), cluster_id=cluster.id, ) op = Opinion( cluster_id=cluster.id, type=Opinion.COMBINED, html_anon_2020=html_str, extracted_by_ocr=False, ) op.save() logger.info( f"Finished importing cluster {cluster.id}; {found_citations[0].base_citation()}" ) return docket
def make_and_save(item, skipdupes=False, min_dates=None, start_dates=None, testing=True): """Associates case data from `parse_opinions` with objects. Saves these objects. min_date: if not none, will skip cases after min_date """ date_filed = (date_argued) = ( date_reargued ) = date_reargument_denied = date_cert_granted = date_cert_denied = None unknown_date = None for date_cluster in item["dates"]: for date_info in date_cluster: # check for any dates that clearly aren't dates if date_info[1].year < 1600 or date_info[1].year > 2020: continue # check for untagged dates that will be assigned to date_filed if date_info[0] is None: date_filed = date_info[1] continue # try to figure out what type of date it is based on its tag string if date_info[0] in FILED_TAGS: date_filed = date_info[1] elif date_info[0] in DECIDED_TAGS: if not date_filed: date_filed = date_info[1] elif date_info[0] in ARGUED_TAGS: date_argued = date_info[1] elif date_info[0] in REARGUE_TAGS: date_reargued = date_info[1] elif date_info[0] in REARGUE_DENIED_TAGS: date_reargument_denied = date_info[1] elif date_info[0] in CERT_GRANTED_TAGS: date_cert_granted = date_info[1] elif date_info[0] in CERT_DENIED_TAGS: date_cert_denied = date_info[1] else: unknown_date = date_info[1] if date_info[0] not in UNKNOWN_TAGS: print("\nFound unknown date tag '%s' with date '%s'.\n" % date_info) # the main date (used for date_filed in OpinionCluster) and panel dates # (used for finding judges) are ordered in terms of which type of dates # best reflect them main_date = (date_filed or date_argued or date_reargued or date_reargument_denied or unknown_date) panel_date = (date_argued or date_reargued or date_reargument_denied or date_filed or unknown_date) if main_date is None: raise Exception("Failed to get a date for " + item["file"]) # special rule for Kentucky if item["court_id"] == "kycourtapp" and main_date <= date(1975, 12, 31): item["court_id"] = "kycourtapphigh" if min_dates is not None: if min_dates.get(item["court_id"]) is not None: if main_date >= min_dates[item["court_id"]]: print( main_date, "after", min_dates[item["court_id"]], " -- skipping.", ) return if start_dates is not None: if start_dates.get(item["court_id"]) is not None: if main_date <= start_dates[item["court_id"]]: print( main_date, "before court founding:", start_dates[item["court_id"]], " -- skipping.", ) return docket = Docket( source=Docket.COLUMBIA, date_argued=date_argued, date_reargued=date_reargued, date_cert_granted=date_cert_granted, date_cert_denied=date_cert_denied, date_reargument_denied=date_reargument_denied, court_id=item["court_id"], case_name_short=item["case_name_short"] or "", case_name=item["case_name"] or "", case_name_full=item["case_name_full"] or "", docket_number=item["docket"] or "", ) # get citation objects in a list for addition to the cluster found_citations = [] for c in item["citations"]: found = get_citations(clean_text(c, ["html", "inline_whitespace"])) if not found: # if the docket number --is-- citation string, we're likely dealing # with a somewhat common triplet of (docket number, date, # jurisdiction), which isn't a citation at all (so there's no # problem) if item["docket"]: docket_no = item["docket"].lower() if "claim no." in docket_no: docket_no = docket_no.split("claim no.")[0] for junk in DOCKET_JUNK: docket_no = docket_no.replace(junk, "") docket_no = docket_no.strip(".").strip() if docket_no and docket_no in c.lower(): continue # there are a trivial number of letters (except for # months and a few trivial words) in the citation, # then it's not a citation at all non_trivial = c.lower() for trivial in TRIVIAL_CITE_WORDS: non_trivial = non_trivial.replace(trivial, "") num_letters = sum( non_trivial.count(letter) for letter in string.lowercase) if num_letters < 3: continue # if there is a string that's known to indicate # a bad citation, then it's not a citation if any(bad in c for bad in BAD_CITES): continue # otherwise, this is a problem raise Exception("Failed to get a citation from the string '%s' in " "court '%s' with docket '%s'." % (c, item["court_id"], item["docket"])) else: found_citations.extend(found.to_model()) cluster = OpinionCluster( judges=item.get("judges", "") or "", precedential_status=("Unpublished" if item["unpublished"] else "Published"), date_filed=main_date, case_name_short=item["case_name_short"] or "", case_name=item["case_name"] or "", case_name_full=item["case_name_full"] or "", source="Z", attorneys=item["attorneys"] or "", posture=item["posture"] or "", ) panel = lookup_judges_by_last_name_list(item["panel"], item["court_id"], panel_date) opinions = [] for i, opinion_info in enumerate(item["opinions"]): if opinion_info["author"] is None: author = None else: author = lookup_judge_by_last_name(opinion_info["author"], item["court_id"], panel_date) converted_text = convert_columbia_html(opinion_info["opinion"]) opinion_type = OPINION_TYPE_MAPPING[opinion_info["type"]] if opinion_type == Opinion.LEAD and i > 0: opinion_type = Opinion.ADDENDUM opinion = Opinion( author=author, per_curiam=opinion_info["per_curiam"], type=opinion_type, # type=OPINION_TYPE_MAPPING[opinion_info['type']], html_columbia=converted_text, sha1=opinion_info["sha1"], # This is surely not updated for the new S3 world. If you're # reading this, you'll need to update this code. local_path=opinion_info["local_path"], ) joined_by = lookup_judges_by_last_name_list(item["joining"], item["court_id"], panel_date) opinions.append((opinion, joined_by)) if min_dates is None: # check to see if this is a duplicate dups = find_dups(docket, cluster) if dups: if skipdupes: print("Duplicate. skipping.") else: raise Exception("Found %s duplicate(s)." % len(dups)) # save all the objects if not testing: try: docket.save() cluster.docket = docket cluster.save(index=False) for citation in found_citations: citation.cluster = cluster citation.save() for member in panel: cluster.panel.add(member) for opinion, joined_by in opinions: opinion.cluster = cluster opinion.save(index=False) for joiner in joined_by: opinion.joined_by.add(joiner) if settings.DEBUG: domain = "http://127.0.0.1:8000" else: domain = "https://www.courtlistener.com" print("Created item at: %s%s" % (domain, cluster.get_absolute_url())) except: # if anything goes wrong, try to delete everything try: docket.delete() except: pass raise
def parse_harvard_opinions(reporter, volume, make_searchable): """ Parse downloaded CaseLaw Corpus from internet archive and add them to our database. Optionally uses a reporter abbreviation to identify cases to download as used by IA. (Ex. T.C. => tc) Optionally uses a volume integer. If neither is provided, code will cycle through all downloaded files. :param volume: The volume (int) of the reporters (optional) (ex 10) :param reporter: Reporter string as slugify'd (optional) (tc) for T.C. :param make_searchable: Boolean to indicate saving to solr :return: None """ if not reporter and volume: logger.error("You provided a volume but no reporter. Exiting.") return for file_path in filepath_list(reporter, volume): ia_download_url = "/".join( ["https://archive.org/download", file_path.split("/", 9)[-1]] ) if OpinionCluster.objects.filter( filepath_json_harvard=file_path ).exists(): logger.info("Skipping - already in system %s" % ia_download_url) continue try: with open(file_path) as f: data = json.load(f) except ValueError: logger.warning("Empty json: missing case at: %s" % ia_download_url) continue except Exception as e: logger.warning("Unknown error %s for: %s" % (e, ia_download_url)) continue cites = get_citations(data["citations"][0]["cite"]) if not cites: logger.info( "No citation found for %s." % data["citations"][0]["cite"] ) continue case_name = harmonize(data["name_abbreviation"]) case_name_short = cnt.make_case_name_short(case_name) case_name_full = harmonize(data["name"]) citation = cites[0] if skip_processing(citation, case_name, file_path): continue # TODO: Generalize this to handle all court types somehow. court_id = match_court_string( data["court"]["name"], state=True, federal_appeals=True, federal_district=True, ) soup = BeautifulSoup(data["casebody"]["data"], "lxml") # Some documents contain images in the HTML # Flag them for a later crawl by using the placeholder '[[Image]]' judge_list = [ extract_judge_last_name(x.text) for x in soup.find_all("judges") ] author_list = [ extract_judge_last_name(x.text) for x in soup.find_all("author") ] # Flatten and dedupe list of judges judges = ", ".join( sorted( list( set( itertools.chain.from_iterable(judge_list + author_list) ) ) ) ) judges = titlecase(judges) docket_string = ( data["docket_number"] .replace("Docket No.", "") .replace("Docket Nos.", "") .strip() ) short_fields = ["attorneys", "disposition", "otherdate", "seealso"] long_fields = [ "syllabus", "summary", "history", "headnotes", "correction", ] short_data = parse_extra_fields(soup, short_fields, False) long_data = parse_extra_fields(soup, long_fields, True) with transaction.atomic(): logger.info("Adding docket for: %s", citation.base_citation()) docket = Docket( case_name=case_name, case_name_short=case_name_short, case_name_full=case_name_full, docket_number=docket_string, court_id=court_id, source=Docket.HARVARD, ia_needs_upload=False, ) try: with transaction.atomic(): docket.save() except OperationalError as e: if "exceeds maximum" in str(e): docket.docket_number = ( "%s, See Corrections for full Docket Number" % trunc(docket_string, length=5000, ellipsis="...") ) docket.save() long_data["correction"] = "%s <br> %s" % ( data["docket_number"], long_data["correction"], ) # Handle partial dates by adding -01v to YYYY-MM dates date_filed, is_approximate = validate_dt(data["decision_date"]) logger.info("Adding cluster for: %s", citation.base_citation()) cluster = OpinionCluster( case_name=case_name, case_name_short=case_name_short, case_name_full=case_name_full, precedential_status="Published", docket_id=docket.id, source="U", date_filed=date_filed, date_filed_is_approximate=is_approximate, attorneys=short_data["attorneys"], disposition=short_data["disposition"], syllabus=long_data["syllabus"], summary=long_data["summary"], history=long_data["history"], other_dates=short_data["otherdate"], cross_reference=short_data["seealso"], headnotes=long_data["headnotes"], correction=long_data["correction"], judges=judges, filepath_json_harvard=file_path, ) cluster.save(index=False) logger.info("Adding citation for: %s", citation.base_citation()) Citation.objects.create( volume=citation.volume, reporter=citation.reporter, page=citation.page, type=map_reporter_db_cite_type( REPORTERS[citation.canonical_reporter][0]["cite_type"] ), cluster_id=cluster.id, ) new_op_pks = [] for op in soup.find_all("opinion"): # This code cleans author tags for processing. # It is particularly useful for identifiying Per Curiam for elem in [op.find("author")]: if elem is not None: [x.extract() for x in elem.find_all("page-number")] auth = op.find("author") if auth is not None: author_tag_str = titlecase(auth.text.strip(":")) author_str = titlecase( "".join(extract_judge_last_name(author_tag_str)) ) else: author_str = "" author_tag_str = "" per_curiam = True if author_tag_str == "Per Curiam" else False # If Per Curiam is True set author string to Per Curiam if per_curiam: author_str = "Per Curiam" op_type = map_opinion_type(op.get("type")) opinion_xml = str(op) logger.info("Adding opinion for: %s", citation.base_citation()) op = Opinion( cluster_id=cluster.id, type=op_type, author_str=author_str, xml_harvard=opinion_xml, per_curiam=per_curiam, extracted_by_ocr=True, ) # Don't index now; do so later if desired op.save(index=False) new_op_pks.append(op.pk) if make_searchable: add_items_to_solr.delay(new_op_pks, "search.Opinion") logger.info("Finished: %s", citation.base_citation())
def make_and_save(item, skipdupes=False, min_dates=None, testing=True): """Associates case data from `parse_opinions` with objects. Saves these objects. min_date: if not none, will skip cases after min_date """ date_filed = date_argued = date_reargued = date_reargument_denied = date_cert_granted = date_cert_denied = None unknown_date = None for date_cluster in item['dates']: for date_info in date_cluster: # check for any dates that clearly aren't dates if date_info[1].year < 1600 or date_info[1].year > 2020: continue # check for untagged dates that will be assigned to date_filed if date_info[0] is None: date_filed = date_info[1] continue # try to figure out what type of date it is based on its tag string if date_info[0] in FILED_TAGS: date_filed = date_info[1] elif date_info[0] in DECIDED_TAGS: if not date_filed: date_filed = date_info[1] elif date_info[0] in ARGUED_TAGS: date_argued = date_info[1] elif date_info[0] in REARGUE_TAGS: date_reargued = date_info[1] elif date_info[0] in REARGUE_DENIED_TAGS: date_reargument_denied = date_info[1] elif date_info[0] in CERT_GRANTED_TAGS: date_cert_granted = date_info[1] elif date_info[0] in CERT_DENIED_TAGS: date_cert_denied = date_info[1] else: unknown_date = date_info[1] if date_info[0] not in UNKNOWN_TAGS: print("\nFound unknown date tag '%s' with date '%s'.\n" % date_info) # the main date (used for date_filed in OpinionCluster) and panel dates # (used for finding judges) are ordered in terms of which type of dates # best reflect them main_date = (date_filed or date_argued or date_reargued or date_reargument_denied or unknown_date) panel_date = (date_argued or date_reargued or date_reargument_denied or date_filed or unknown_date) if main_date is None: raise Exception("Failed to get a date for " + item['file']) if min_dates is not None: if min_dates.get(item['court_id']) is not None: if main_date >= min_dates[item['court_id']]: print(main_date, 'after', min_dates[item['court_id']], ' -- skipping.') return docket = Docket( source=Docket.COLUMBIA, date_argued=date_argued, date_reargued=date_reargued, date_cert_granted=date_cert_granted, date_cert_denied=date_cert_denied, date_reargument_denied=date_reargument_denied, court_id=item['court_id'], case_name_short=item['case_name_short'] or '', case_name=item['case_name'] or '', case_name_full=item['case_name_full'] or '', docket_number=item['docket'] or '' ) # get citations in the form of, e.g. {'federal_cite_one': '1 U.S. 1', ...} found_citations = [] for c in item['citations']: found = get_citations(c) if not found: # if the docket number --is-- citation string, we're likely dealing # with a somewhat common triplet of (docket number, date, # jurisdiction), which isn't a citation at all (so there's no # problem) if item['docket']: docket_no = item['docket'].lower() if 'claim no.' in docket_no: docket_no = docket_no.split('claim no.')[0] for junk in DOCKET_JUNK: docket_no = docket_no.replace(junk, '') docket_no = docket_no.strip('.').strip() if docket_no and docket_no in c.lower(): continue # there are a trivial number of letters (except for months and a few # trivial words) in the citation, then it's not a citation at all non_trivial = c.lower() for trivial in TRIVIAL_CITE_WORDS: non_trivial = non_trivial.replace(trivial, '') num_letters = sum(non_trivial.count(letter) for letter in string.lowercase) if num_letters < 3: continue # if there is a string that's known to indicate a bad citation, then # it's not a citation if any(bad in c for bad in BAD_CITES): continue # otherwise, this is a problem raise Exception("Failed to get a citation from the string '%s' in " "court '%s' with docket '%s'." % ( c, item['court_id'], item['docket'] )) else: found_citations.extend(found) citations_map = map_citations_to_models(found_citations) cluster = OpinionCluster( judges=item.get('judges', '') or "", precedential_status=('Unpublished' if item['unpublished'] else 'Published'), date_filed=main_date, case_name_short=item['case_name_short'] or '', case_name=item['case_name'] or '', case_name_full=item['case_name_full'] or '', source='Z', attorneys=item['attorneys'] or '', posture=item['posture'] or '', **citations_map ) panel = [find_person(n, item['court_id'], case_date=panel_date) for n in item['panel']] panel = [x for x in panel if x is not None] opinions = [] for i, opinion_info in enumerate(item['opinions']): if opinion_info['author'] is None: author = None else: author = find_person(opinion_info['author'], item['court_id'], case_date=panel_date) converted_text = convert_columbia_html(opinion_info['opinion']) opinion_type = OPINION_TYPE_MAPPING[opinion_info['type']] if opinion_type == '020lead' and i > 0: opinion_type = '050addendum' opinion = Opinion( author=author, per_curiam=opinion_info['per_curiam'], type=opinion_type, # type=OPINION_TYPE_MAPPING[opinion_info['type']], html_columbia=converted_text, sha1=opinion_info['sha1'], local_path=opinion_info['local_path'], ) joined_by = [find_person(n, item['court_id'], case_date=panel_date) for n in opinion_info['joining']] joined_by = [x for x in joined_by if x is not None] opinions.append((opinion, joined_by)) if min_dates is None: # check to see if this is a duplicate dups = find_dups(docket, cluster, panel, opinions) if dups: if skipdupes: print('Duplicate. skipping.') else: raise Exception("Found %s duplicate(s)." % len(dups)) # save all the objects if not testing: try: docket.save() cluster.docket = docket cluster.save(index=False) for member in panel: cluster.panel.add(member) for opinion, joined_by in opinions: opinion.cluster = cluster opinion.save(index=False) for joiner in joined_by: opinion.joined_by.add(joiner) if settings.DEBUG: domain = "http://127.0.0.1:8000" else: domain = "https://www.courtlistener.com" print("Created item at: %s%s" % (domain, cluster.get_absolute_url())) except: # if anything goes wrong, try to delete everything try: docket.delete() except: pass raise
def make_and_save(item): """Associates case data from `parse_opinions` with objects. Saves these objects.""" date_filed = date_argued = date_reargued = date_reargument_denied = date_cert_granted = date_cert_denied = None for date_cluster in item['dates']: for date_info in date_cluster: # check for any dates that clearly aren't dates if date_info[1].year < 1600 or date_info[1].year > 2020: continue # check for untagged dates that will be assigned to date_filed if date_info[0] is None: date_filed = date_info[1] continue # try to figure out what type of date it is based on its tag string if date_info[0] in FILED_TAGS: date_filed = date_info[1] elif date_info[0] in DECIDED_TAGS: if not date_filed: date_filed = date_info[1] elif date_info[0] in ARGUED_TAGS: date_argued = date_info[1] elif date_info[0] in REARGUE_TAGS: date_reargued = date_info[1] elif date_info[0] in REARGUE_DENIED_TAGS: date_reargument_denied = date_info[1] elif date_info[0] in CERT_GRANTED_TAGS: date_cert_granted = date_info[1] elif date_info[0] in CERT_DENIED_TAGS: date_cert_denied = date_info[1] else: print("Found unknown date tag '%s' with date '%s'." % date_info) docket = Docket( date_argued=date_argued ,date_reargued=date_reargued ,date_cert_granted=date_cert_granted ,date_cert_denied=date_cert_denied ,date_reargument_denied=date_reargument_denied ,court_id=item['court_id'] ,case_name_short=item['case_name_short'] or '' ,case_name=item['case_name'] or '' ,case_name_full=item['case_name_full'] or '' ,docket_number=item['docket'] or '' ) docket.save() # get citations in the form of, e.g. {'federal_cite_one': '1 U.S. 1', ...} found_citations = [] for c in item['citations']: found = get_citations(c) if not found: raise Exception("Failed to get a citation from the string '%s'." % c) elif len(found) > 1: raise Exception("Got multiple citations from string '%s' when there should have been one." % c) found_citations.append(found[0]) citations_map = map_citations_to_models(found_citations) cluster = OpinionCluster( docket=docket ,precedential_status=('Unpublished' if item['unpublished'] else 'Published') ,date_filed=date_filed ,case_name_short=item['case_name_short'] or '' ,case_name=item['case_name'] or '' ,case_name_full=item['case_name_full'] or '' ,source='Z' ,attorneys=item['attorneys'] or '' ,posture=item['posture'] or '' ,**citations_map ) cluster.save() if date_argued is not None: paneldate = date_argued else: paneldate = date_filed panel = [find_person(n, item['court_id'], paneldate) for n in item['panel']] panel = [x for x in panel if x is not None] for member in panel: cluster.panel.add(member) for opinion_info in item['opinions']: if opinion_info['author'] is None: author = None else: author = find_person(opinion_info['author'], item['court_id'], date_filed or date_argued) opinion = Opinion( cluster=cluster ,author=author ,type=OPINION_TYPE_MAPPING[opinion_info['type']] ,html_columbia=opinion_info['opinion'] ) opinion.save() joined_by = [find_person(n, item['court_id'], paneldate) for n in opinion_info['joining']] joined_by = [x for x in joined_by if x is not None] for joiner in joined_by: opinion.joined_by.add(joiner)
class StaticFilesTest(TestCase): good_mp3_path = "mp3/2014/06/09/ander_v._leo.mp3" good_txt_path = "txt/2015/12/28/opinion_text.txt" good_pdf_path = ( "pdf/2013/06/12/" + "in_re_motion_for_consent_to_disclosure_of_court_records.pdf") def setUp(self): self.court = Court.objects.get(pk="test") self.docket = Docket(case_name=u"Docket", court=self.court, source=Docket.DEFAULT) self.docket.save() self.audio = Audio( local_path_original_file=self.good_mp3_path, local_path_mp3=self.good_mp3_path, docket=self.docket, blocked=False, case_name_full="Ander v. Leo", date_created=datetime.date(2014, 6, 9), ) self.audio.save(index=False) self.opinioncluster = OpinionCluster( case_name=u"Hotline Bling", docket=self.docket, date_filed=datetime.date(2015, 12, 14), ) self.opinioncluster.save(index=False) self.txtopinion = Opinion( cluster=self.opinioncluster, type="Lead Opinion", local_path=self.good_txt_path, ) self.txtopinion.save(index=False) self.pdfopinion = Opinion( cluster=self.opinioncluster, type="Lead Opinion", local_path=self.good_pdf_path, ) self.pdfopinion.save(index=False) def test_serve_static_file_serves_mp3(self): request = HttpRequest() file_path = self.audio.local_path_mp3 response = serve_static_file(request, file_path=self.good_mp3_path) self.assertEqual(response.status_code, 200) self.assertEqual(response["Content-Type"], "audio/mpeg") self.assertIn("inline;", response["Content-Disposition"]) def test_serve_static_file_serves_txt(self): request = HttpRequest() response = serve_static_file(request, file_path=self.good_txt_path) self.assertEqual(response.status_code, 200) self.assertEqual(response["Content-Type"], "text/plain") self.assertIn("inline;", response["Content-Disposition"]) self.assertIn("FOR THE DISTRICT OF COLUMBIA CIRCUIT", response.content) def test_serve_static_file_serves_pdf(self): request = HttpRequest() response = serve_static_file(request, file_path=self.good_pdf_path) self.assertEqual(response.status_code, 200) self.assertEqual(response["Content-Type"], "application/pdf") self.assertIn("inline;", response["Content-Disposition"])
def add_oc_and_o(self, old_document, old_citation, old_docket, new_docket): """Add the OpinionCluster and Opinion, updating existing items if present. """ new_opinion_cluster = OpinionClusterNew( pk=old_document.pk, docket=new_docket, judges=self._none_to_blank(old_document.judges), date_modified=old_document.date_modified, date_created=old_document.date_modified, date_filed=old_document.date_filed, slug=self._none_to_blank(old_citation.slug), citation_id=old_document.citation_id, case_name_short=old_docket.case_name_short, case_name=old_docket.case_name, case_name_full=old_docket.case_name_full, federal_cite_one=self._none_to_blank( old_citation.federal_cite_one), federal_cite_two=self._none_to_blank( old_citation.federal_cite_two), federal_cite_three=self._none_to_blank( old_citation.federal_cite_three), state_cite_one=self._none_to_blank(old_citation.state_cite_one), state_cite_two=self._none_to_blank(old_citation.state_cite_two), state_cite_three=self._none_to_blank( old_citation.state_cite_three), state_cite_regional=self._none_to_blank( old_citation.state_cite_regional), specialty_cite_one=self._none_to_blank( old_citation.specialty_cite_one), scotus_early_cite=self._none_to_blank( old_citation.scotus_early_cite), lexis_cite=self._none_to_blank(old_citation.lexis_cite), westlaw_cite=self._none_to_blank(old_citation.westlaw_cite), neutral_cite=self._none_to_blank(old_citation.neutral_cite), scdb_id=self._none_to_blank(old_document.supreme_court_db_id), source=old_document.source, nature_of_suit=old_document.nature_of_suit, citation_count=old_document.citation_count, precedential_status=old_document.precedential_status, date_blocked=old_document.date_blocked, blocked=old_document.blocked, ) new_opinion_cluster.save( using='default', index=False, ) new_opinion = OpinionNew( pk=old_document.pk, cluster=new_opinion_cluster, date_modified=old_document.date_modified, date_created=old_document.time_retrieved, type='010combined', sha1=old_document.sha1, download_url=old_document.download_url, local_path=old_document.local_path, plain_text=old_document.plain_text, html=self._none_to_blank(old_document.html), html_lawbox=self._none_to_blank(old_document.html_lawbox), html_with_citations=old_document.html_with_citations, extracted_by_ocr=old_document.extracted_by_ocr, ) new_opinion.save( using='default', index=False, )
def make_and_save(item, skipdupes=False, min_dates=None, start_dates=None, testing=True): """Associates case data from `parse_opinions` with objects. Saves these objects. min_date: if not none, will skip cases after min_date """ date_filed = date_argued = date_reargued = date_reargument_denied = date_cert_granted = date_cert_denied = None unknown_date = None for date_cluster in item['dates']: for date_info in date_cluster: # check for any dates that clearly aren't dates if date_info[1].year < 1600 or date_info[1].year > 2020: continue # check for untagged dates that will be assigned to date_filed if date_info[0] is None: date_filed = date_info[1] continue # try to figure out what type of date it is based on its tag string if date_info[0] in FILED_TAGS: date_filed = date_info[1] elif date_info[0] in DECIDED_TAGS: if not date_filed: date_filed = date_info[1] elif date_info[0] in ARGUED_TAGS: date_argued = date_info[1] elif date_info[0] in REARGUE_TAGS: date_reargued = date_info[1] elif date_info[0] in REARGUE_DENIED_TAGS: date_reargument_denied = date_info[1] elif date_info[0] in CERT_GRANTED_TAGS: date_cert_granted = date_info[1] elif date_info[0] in CERT_DENIED_TAGS: date_cert_denied = date_info[1] else: unknown_date = date_info[1] if date_info[0] not in UNKNOWN_TAGS: print("\nFound unknown date tag '%s' with date '%s'.\n" % date_info) # the main date (used for date_filed in OpinionCluster) and panel dates # (used for finding judges) are ordered in terms of which type of dates # best reflect them main_date = (date_filed or date_argued or date_reargued or date_reargument_denied or unknown_date) panel_date = (date_argued or date_reargued or date_reargument_denied or date_filed or unknown_date) if main_date is None: raise Exception("Failed to get a date for " + item['file']) # special rule for Kentucky if item['court_id'] == 'kycourtapp' and main_date <= date(1975, 12, 31): item['court_id'] = 'kycourtapphigh' if min_dates is not None: if min_dates.get(item['court_id']) is not None: if main_date >= min_dates[item['court_id']]: print(main_date, 'after', min_dates[item['court_id']], ' -- skipping.') return if start_dates is not None: if start_dates.get(item['court_id']) is not None: if main_date <= start_dates[item['court_id']]: print(main_date, 'before court founding:', start_dates[item['court_id']], ' -- skipping.') return docket = Docket(source=Docket.COLUMBIA, date_argued=date_argued, date_reargued=date_reargued, date_cert_granted=date_cert_granted, date_cert_denied=date_cert_denied, date_reargument_denied=date_reargument_denied, court_id=item['court_id'], case_name_short=item['case_name_short'] or '', case_name=item['case_name'] or '', case_name_full=item['case_name_full'] or '', docket_number=item['docket'] or '') # get citations in the form of, e.g. {'federal_cite_one': '1 U.S. 1', ...} found_citations = [] for c in item['citations']: found = get_citations(c) if not found: # if the docket number --is-- citation string, we're likely dealing # with a somewhat common triplet of (docket number, date, # jurisdiction), which isn't a citation at all (so there's no # problem) if item['docket']: docket_no = item['docket'].lower() if 'claim no.' in docket_no: docket_no = docket_no.split('claim no.')[0] for junk in DOCKET_JUNK: docket_no = docket_no.replace(junk, '') docket_no = docket_no.strip('.').strip() if docket_no and docket_no in c.lower(): continue # there are a trivial number of letters (except for months and a few # trivial words) in the citation, then it's not a citation at all non_trivial = c.lower() for trivial in TRIVIAL_CITE_WORDS: non_trivial = non_trivial.replace(trivial, '') num_letters = sum( non_trivial.count(letter) for letter in string.lowercase) if num_letters < 3: continue # if there is a string that's known to indicate a bad citation, then # it's not a citation if any(bad in c for bad in BAD_CITES): continue # otherwise, this is a problem raise Exception("Failed to get a citation from the string '%s' in " "court '%s' with docket '%s'." % (c, item['court_id'], item['docket'])) else: found_citations.extend(found) citations_map = map_citations_to_models(found_citations) cluster = OpinionCluster( judges=item.get('judges', '') or "", precedential_status=('Unpublished' if item['unpublished'] else 'Published'), date_filed=main_date, case_name_short=item['case_name_short'] or '', case_name=item['case_name'] or '', case_name_full=item['case_name_full'] or '', source='Z', attorneys=item['attorneys'] or '', posture=item['posture'] or '', **citations_map) panel = [ find_person(n, item['court_id'], case_date=panel_date) for n in item['panel'] ] panel = [x for x in panel if x is not None] opinions = [] for i, opinion_info in enumerate(item['opinions']): if opinion_info['author'] is None: author = None else: author = find_person(opinion_info['author'], item['court_id'], case_date=panel_date) converted_text = convert_columbia_html(opinion_info['opinion']) opinion_type = OPINION_TYPE_MAPPING[opinion_info['type']] if opinion_type == '020lead' and i > 0: opinion_type = '050addendum' opinion = Opinion( author=author, per_curiam=opinion_info['per_curiam'], type=opinion_type, # type=OPINION_TYPE_MAPPING[opinion_info['type']], html_columbia=converted_text, sha1=opinion_info['sha1'], local_path=opinion_info['local_path'], ) joined_by = [ find_person(n, item['court_id'], case_date=panel_date) for n in opinion_info['joining'] ] joined_by = [x for x in joined_by if x is not None] opinions.append((opinion, joined_by)) if min_dates is None: # check to see if this is a duplicate dups = find_dups(docket, cluster) if dups: if skipdupes: print('Duplicate. skipping.') else: raise Exception("Found %s duplicate(s)." % len(dups)) # save all the objects if not testing: try: docket.save() cluster.docket = docket cluster.save(index=False) for member in panel: cluster.panel.add(member) for opinion, joined_by in opinions: opinion.cluster = cluster opinion.save(index=False) for joiner in joined_by: opinion.joined_by.add(joiner) if settings.DEBUG: domain = "http://127.0.0.1:8000" else: domain = "https://www.courtlistener.com" print("Created item at: %s%s" % (domain, cluster.get_absolute_url())) except: # if anything goes wrong, try to delete everything try: docket.delete() except: pass raise
class StaticFilesTest(TestCase): good_mp3_path = 'mp3/2014/06/09/ander_v._leo.mp3' good_txt_path = 'txt/2015/12/28/opinion_text.txt' good_pdf_path = 'pdf/2013/06/12/' + \ 'in_re_motion_for_consent_to_disclosure_of_court_records.pdf' def setUp(self): self.court = Court.objects.get(pk='test') self.docket = Docket(case_name=u'Docket', court=self.court, source=Docket.DEFAULT) self.docket.save() self.audio = Audio( local_path_original_file=self.good_mp3_path, local_path_mp3=self.good_mp3_path, docket=self.docket, blocked=False, case_name_full='Ander v. Leo', date_created=datetime.date(2014, 6, 9) ) self.audio.save(index=False) self.opinioncluster = OpinionCluster( case_name=u'Hotline Bling', docket=self.docket, date_filed=datetime.date(2015, 12, 14), ) self.opinioncluster.save(index=False) self.txtopinion = Opinion( cluster=self.opinioncluster, type='Lead Opinion', local_path=self.good_txt_path ) self.txtopinion.save(index=False) self.pdfopinion = Opinion( cluster=self.opinioncluster, type='Lead Opinion', local_path=self.good_pdf_path ) self.pdfopinion.save(index=False) def test_serve_static_file_serves_mp3(self): request = HttpRequest() file_path = self.audio.local_path_mp3 response = serve_static_file(request, file_path=self.good_mp3_path) self.assertEqual(response.status_code, 200) self.assertEqual(response['Content-Type'], 'audio/mpeg') self.assertIn('inline;', response['Content-Disposition']) def test_serve_static_file_serves_txt(self): request = HttpRequest() response = serve_static_file(request, file_path=self.good_txt_path) self.assertEqual(response.status_code, 200) self.assertEqual(response['Content-Type'], 'text/plain') self.assertIn('inline;', response['Content-Disposition']) self.assertIn( 'FOR THE DISTRICT OF COLUMBIA CIRCUIT', response.content ) def test_serve_static_file_serves_pdf(self): request = HttpRequest() response = serve_static_file(request, file_path=self.good_pdf_path) self.assertEqual(response.status_code, 200) self.assertEqual(response['Content-Type'], 'application/pdf') self.assertIn('inline;', response['Content-Disposition'])