def test_add_summary_merge_strategies(example_summary): summary2 = copy.deepcopy(example_summary) summary2.get_cases()[0].otn = "a_different_otn" # default merge_strategy is to ignore new duplicates or # new Person rec = CRecord(Person("Dummy", "Name", None)) rec.add_summary(example_summary) rec.add_summary(summary2) assert rec.cases[0].otn == example_summary.get_cases()[0].otn assert rec.person.first_name == "Dummy" # alternate merge strategy overwrites duplicates w/ new case # but doesn't touch the Person rec = CRecord(Person("Dummy", "Name", None)) rec.add_summary(example_summary) rec.add_summary(summary2, case_merge_strategy="overwrite_old") assert rec.cases[0].otn == summary2.get_cases()[0].otn assert rec.person.first_name == "Dummy" # override_person param provides for overwriting the Person with the new summary's # Person rec = CRecord(Person("Dummy", "Name", None)) rec.add_summary(example_summary) rec.add_summary(summary2, override_person=True) assert rec.cases[0].otn != summary2.get_cases()[0].otn assert rec.person.first_name == summary2.get_defendant().first_name
def test_init(): dob = date(2010, 1, 1) person = Person(**{ "first_name": "Joe", "last_name": "Smith", "date_of_birth": dob }) rec = CRecord(**{"person": person}) assert rec.person.first_name == "Joe" rec = CRecord(person=Person( first_name="Joan", last_name="Smythe", date_of_birth=dob)) assert rec.person.last_name == "Smythe"
def from_dict(dct: dict) -> Expungement: dct.update({ "attorney": Attorney.from_dict(dct["attorney"]), "client": Person.from_dict(dct["client"]), "cases": [Case.from_dict(c) for c in dct["cases"]], }) return Expungement(**dct)
def get_person(stree: etree) -> Person: """ Extract a Person the xml of a docket, parsed into sections. Returns an empty Person object on errors. Args: stree: xml tree of a docket, parsed into a header and some number of sections Returns: a Person object """ try: name = stree.xpath( "docket/header/caption/defendant_line")[0].text.strip() first_name, last_name = split_first_name(name) except IndexError: first_name = "" last_name = "" aliases = xpath_or_empty_list(stree, "//alias") date_of_birth = xpath_date_or_blank(stree, "//birth_date") return Person( first_name=first_name, last_name=last_name, date_of_birth=date_of_birth, aliases=aliases, )
def example_person(example_address): return Person( first_name="Jane", last_name="Smorp", aliases=["JSmo", "SmorpyJJ"], address=example_address, date_of_birth=date(2010, 1, 1), ssn="999-99-9999", )
def parse_person(txt: str) -> Tuple[Person, List[str]]: """ Extract a Person from the text of a CP docket. """ person = Person(first_name=None, last_name=None, date_of_birth=None) errs = [] defendant_name, d_errs = find_pattern( "defendant_name", r"^Defendant\s+(?P<last_name>.*), (?P<first_name>.*)", txt, re.M, ) if defendant_name is not None: person.first_name = defendant_name.group("first_name") person.last_name = defendant_name.group("last_name") else: errs.extend(d_errs) defendant_dob, dob_errs = find_pattern( "date_of_birth", r"Date Of Birth:?\s+(?P<date_of_birth>\d{1,2}\/\d{1,2}\/\d{4})", txt, ) if defendant_dob is not None: person.date_of_birth = date_or_none( defendant_dob.group("date_of_birth")) else: errs.extend(dob_errs) defendant_info_section, d_section_errs = find_pattern( "defendant_info", r"DEFENDANT INFORMATION(?P<defendant_info>.*)\s+CASE PARTICIPANTS", txt, re.DOTALL, ) if defendant_info_section is not None: defendant_info_text = defendant_info_section.group("defendant_info") alias_search, a_errs = find_pattern( "aliases", r"Alias Name\s*\n+(?P<aliases>(.+\s*\n*)*)", defendant_info_text) if alias_search is not None: person.aliases = [ a.strip() for a in alias_search.group("aliases").split("\n") if len(a) > 0 ] else: errs.extend(a_errs) addr_search, addr_errs = find_pattern( "address", r"City/State/Zip:\s*(?P<addr>.*)\s*", defendant_info_text) if addr_search is not None: person.address = Address(addr_search.group("addr"), "") else: errs.extend(addr_errs) else: errs.extend(d_section_errs) return person, errs
def get_defendant(summary_xml: etree.Element) -> Person: full_name = summary_xml.find("caption/defendant_name").text last_first = [n.strip() for n in full_name.split(",")] def_dob = summary_xml.find("caption/def_dob").text.strip() aliases = [el.text.strip() for el in summary_xml.xpath("//alias")] try: def_dob = datetime.strptime(def_dob, "%m/%d/%Y").date() except ValueError: def_dob = None return Person(last_first[1], last_first[0], def_dob, aliases=aliases)
def test_serializing_person(example_person): ser = to_serializable(example_person) assert ser["first_name"] == example_person.first_name assert ser["aliases"] == example_person.aliases deser = PersonSerializer(data=ser) assert deser.is_valid(), deser.error_messages deser = deser.validated_data deser = Person.from_dict(deser) assert isinstance(deser, Person) assert deser == example_person
def test_person(): per = Person("John", "Smeth", date(2010, 1, 1), date_of_death=date(2020, 1, 1), aliases=["SmithGuy"], ssn="999-99-9999", address=Address(line_one="1234 Main St.", city_state_zip="Philadelphia, PA 19103")) assert per.first_name == "John" assert per.last_name == "Smeth" assert per.date_of_birth.year == 2010 assert per.date_of_death.year == 2020 assert per.aliases == ["SmithGuy"] assert per.ssn == "999-99-9999" assert per.address.line_one == "1234 Main St."
def test_person_todict(): per = Person("John", "Smeth", date(2010, 1, 1), aliases=["JJ", "Smelly"], ssn="999-99-9999", address=Address(line_one="1234 Main St.", city_state_zip="Philadelphia, PA 19103")) assert to_serializable(per) == { "first_name": "John", "last_name": "Smeth", "date_of_birth": date(2010, 1, 1).isoformat(), "aliases": ["JJ", "Smelly"], "ssn": "999-99-9999", "address": { "line_one": "1234 Main St.", "city_state_zip": "Philadelphia, PA 19103" } }
def test_person_from_dict(example_person): ser = to_serializable(example_person) per2 = Person.from_dict(ser) assert example_person.last_name == per2.last_name assert isinstance(per2.date_of_birth, date)
) if resp.status_code != 200: continue filename = os.path.join(td, case["docket_number"]) with open(filename, "wb") as fp: fp.write(resp.content) case[f"{source_type}_text"] = get_text_from_pdf(filename) if output_dir is not None: for doc in os.listdir(td): shutil.copy(os.path.join(td, doc), os.path.join(output_dir, doc)) # Read the source records and integrate them into a CRecord # representing the person't full criminal record. sourcerecords = list() crecord = CRecord(person=Person( first_name=first_name, last_name=last_name, date_of_birth=dob)) for case in search_results: parser = pick_pdf_parser(case["docket_number"]) if parser is None: continue sr = SourceRecord(case["docket_sheet_text"], parser) sourcerecords.append(sr) crecord.add_sourcerecord(sr, case_merge_strategy="overwrite_old") logger.info("Built CRecord.") logger.info(f" -time so far:{(datetime.now() - starttime).seconds}") # Create and Analysis using the CRecord. This Analysis will explain # what charges and cases are expungeable, what will be automatically sealed, # what could be sealed by petition. analysis = (Analysis(crecord).rule(rd.expunge_deceased).rule(
def test_add_docket(example_docket): rec = CRecord(Person("dummy", "name", None)) rec.add_docket(example_docket) assert len(rec.cases) == 1 assert rec.person.first_name != "dummy"
def test_add_empty_sourcerecord(): rec = CRecord(Person("dummy", "name", None)) sr = SourceRecord("anysource", parser=None) rec.add_sourcerecord(sr, override_person=True) assert len(rec.cases) == 0 assert rec.person.first_name == "dummy"
def test_add_sourcerecord(example_sourcerecord): rec = CRecord(Person("dummy", "name", None)) rec.add_sourcerecord(example_sourcerecord, override_person=True) assert len(rec.cases) == len(example_sourcerecord.cases) assert rec.person.first_name != "dummy"
def test_add_summary_doesnt_add_duplicates(example_summary): summary2 = copy.deepcopy(example_summary) rec = CRecord(Person("Dummy", "Name", None)) rec.add_summary(example_summary) rec.add_summary(summary2) assert len(rec.cases) == len(example_summary.get_cases())
def test_add_summary_to_crecord(): summary, _ = Summary.from_pdf(pdf="tests/data/CourtSummaryReport.pdf") rec = CRecord(Person("John", "Smith", date(1998, 1, 1))) rec.add_summary(summary, override_person=True) assert len(rec.person.first_name) > 0 assert rec.person.first_name != "John"
def parse_mdj_pdf_text(txt: str) -> Tuple[Person, List[Case], List[str]]: """ Parse MDJ docket, given the formatted text of the pdf. This function uses the original Expungement Generator's technique: regexes and nested loops, iterating over the lines of the docket. see https://github.com/NateV/Expungement-Generator/blob/master/Expungement-Generator/Record.php:64 """ already_searched_aliases = False case_info = dict() case_info["charges"] = [] person_info = dict() person_info["aliases"] = [] lines = txt.split("\n") for idx, line in enumerate(lines): m = PATTERNS.mdj_district_number.search(line) if m: # what's the mdj district number for? case_info["mdj_district_number"] = m.group(1) m = PATTERNS.mdj_county_and_disposition.search(line) if m: case_info["county"] = m.group(1) case_info["disposition_date"] = m.group(2) m = PATTERNS.docket_number.search(line) if m: case_info["docket_number"] = m.group(1) m = PATTERNS.otn.search(line) if m: case_info["otn"] = m.group(1) m = PATTERNS.dc_number.search(line) if m: case_info["dc_num"] = m.group(1) m = PATTERNS.arrest_agency_and_date.search(line) if m: case_info["arresting_agency"] = m.group(1) try: case_info["arrest_date"] = m.group(2) except: pass m = PATTERNS.complaint_date.search(line) if m: case_info["complaint_date"] = m.group(1) m = PATTERNS.affiant.search(line) if m: # TODO - mdj docket parse should reverse order of names of affiant case_info["affiant"] = m.group(1) # MHollander said: # the judge name can appear in multiple places. Start by checking to see if the # judge's name appears in the Judge Assigned field. If it does, then set it. # Later on, we'll check in the "Final Issuing Authority" field. If it appears there # and doesn't show up as "migrated," we'll reassign the judge name. m = PATTERNS.judge_assigned.search(line) if m: judge = m.group(1).strip() next_line = lines[idx + 1] overflow_match = PATTERNS.judge_assigned_overflow.search(next_line) if overflow_match: judge = f"{judge} {overflow_match.group(1).strip()}" if "igrated" not in judge: case_info["judge"] = judge m = PATTERNS.judge.search(line) if m: if len(m.group(1)) > 0 and "igrated" not in m.group(1): case_info["judge"] = m.group(1) m = PATTERNS.dob.search(line) if m: person_info["date_of_birth"] = m.group(1) m = PATTERNS.name.search(line) if m: person_info["first_name"] = m.group(2) person_info["last_name"] = m.group(1) person_info["aliases"].append(f"{m.group(1)}, {m.group(2)}") m = PATTERNS.alias_names_start.search(line) if already_searched_aliases is False and m: idx2 = idx + 1 end_of_aliases = False while not end_of_aliases: if PATTERNS.end_of_page.search(lines[idx2]): continue if re.search(r"\w", lines[idx2]): person_info["aliases"].append(lines[idx2].strip()) idx2 += 1 end_of_aliases = PATTERNS.alias_names_end.search(lines[idx2]) if end_of_aliases: already_searched_aliases = True m = PATTERNS.charges.search(line) # Arrest.php;595 if m: charge_info = dict() charge_info["statute"] = m.group(1) charge_info["grade"] = m.group(3) charge_info["offense"] = m.group(4) charge_info["disposition"] = m.group(6) m2 = PATTERNS.charges_search_overflow.search(lines[idx + 1]) if m2: charge_info[ "offense" ] = f"{charge_info['offense'].strip()} {m2.group(1).strip()}" ## disposition date is on the next line if "disposition_date" in case_info.keys(): charge_info["disposition_date"] = case_info["disposition_date"] case_info["charges"].append(charge_info) m = PATTERNS.bail.search(line) if m: # TODO charges won't use the detailed bail info yet. case_info["bail_charged"] = m.group(1) case_info["bail_paid"] = m.group(2) case_info["bail_adjusted"] = m.group(3) case_info["bail_total"] = m.group(5) m = PATTERNS.costs.search(line) if m: case_info["total_fines"] = m.group(1) case_info["fines_paid"] = m.group(2) case_info["costs_adjusted"] = m.group(3) case_info["costs_total"] = m.group(5) case_info = { k: (v.strip() if isinstance(v, str) else v) for k, v in case_info.items() } person_info = { k: (v.strip() if isinstance(v, str) else v) for k, v in person_info.items() } person = Person.from_dict(person_info) case = Case.from_dict(case_info) logger.info("Finished parsing MDJ docket") return person, [case], []
def is_over_age(person: Person, age_limit: int) -> Decision: return Decision( name=f"Is {person.first_name} over {age_limit}?", value=person.age() > age_limit, reasoning=f"{person.first_name} is {person.age()}", )
def test_person_age(): per = Person("John", "Smeth", date(2000, 1, 1)) assert per.age() > 17