def from_dict(dct: dict) -> Expungement: dct.update({ "attorney": Attorney.from_dict(dct["attorney"]), "client": Person.from_dict(dct["client"]), "cases": [Case.from_dict(c) for c in dct["cases"]], }) return Expungement(**dct)
def test_serializing_person(example_person): ser = to_serializable(example_person) assert ser["first_name"] == example_person.first_name assert ser["aliases"] == example_person.aliases deser = PersonSerializer(data=ser) assert deser.is_valid(), deser.error_messages deser = deser.validated_data deser = Person.from_dict(deser) assert isinstance(deser, Person) assert deser == example_person
def test_person_from_dict(example_person): ser = to_serializable(example_person) per2 = Person.from_dict(ser) assert example_person.last_name == per2.last_name assert isinstance(per2.date_of_birth, date)
def parse_mdj_pdf_text(txt: str) -> Tuple[Person, List[Case], List[str]]: """ Parse MDJ docket, given the formatted text of the pdf. This function uses the original Expungement Generator's technique: regexes and nested loops, iterating over the lines of the docket. see https://github.com/NateV/Expungement-Generator/blob/master/Expungement-Generator/Record.php:64 """ already_searched_aliases = False case_info = dict() case_info["charges"] = [] person_info = dict() person_info["aliases"] = [] lines = txt.split("\n") for idx, line in enumerate(lines): m = PATTERNS.mdj_district_number.search(line) if m: # what's the mdj district number for? case_info["mdj_district_number"] = m.group(1) m = PATTERNS.mdj_county_and_disposition.search(line) if m: case_info["county"] = m.group(1) case_info["disposition_date"] = m.group(2) m = PATTERNS.docket_number.search(line) if m: case_info["docket_number"] = m.group(1) m = PATTERNS.otn.search(line) if m: case_info["otn"] = m.group(1) m = PATTERNS.dc_number.search(line) if m: case_info["dc_num"] = m.group(1) m = PATTERNS.arrest_agency_and_date.search(line) if m: case_info["arresting_agency"] = m.group(1) try: case_info["arrest_date"] = m.group(2) except: pass m = PATTERNS.complaint_date.search(line) if m: case_info["complaint_date"] = m.group(1) m = PATTERNS.affiant.search(line) if m: # TODO - mdj docket parse should reverse order of names of affiant case_info["affiant"] = m.group(1) # MHollander said: # the judge name can appear in multiple places. Start by checking to see if the # judge's name appears in the Judge Assigned field. If it does, then set it. # Later on, we'll check in the "Final Issuing Authority" field. If it appears there # and doesn't show up as "migrated," we'll reassign the judge name. m = PATTERNS.judge_assigned.search(line) if m: judge = m.group(1).strip() next_line = lines[idx + 1] overflow_match = PATTERNS.judge_assigned_overflow.search(next_line) if overflow_match: judge = f"{judge} {overflow_match.group(1).strip()}" if "igrated" not in judge: case_info["judge"] = judge m = PATTERNS.judge.search(line) if m: if len(m.group(1)) > 0 and "igrated" not in m.group(1): case_info["judge"] = m.group(1) m = PATTERNS.dob.search(line) if m: person_info["date_of_birth"] = m.group(1) m = PATTERNS.name.search(line) if m: person_info["first_name"] = m.group(2) person_info["last_name"] = m.group(1) person_info["aliases"].append(f"{m.group(1)}, {m.group(2)}") m = PATTERNS.alias_names_start.search(line) if already_searched_aliases is False and m: idx2 = idx + 1 end_of_aliases = False while not end_of_aliases: if PATTERNS.end_of_page.search(lines[idx2]): continue if re.search(r"\w", lines[idx2]): person_info["aliases"].append(lines[idx2].strip()) idx2 += 1 end_of_aliases = PATTERNS.alias_names_end.search(lines[idx2]) if end_of_aliases: already_searched_aliases = True m = PATTERNS.charges.search(line) # Arrest.php;595 if m: charge_info = dict() charge_info["statute"] = m.group(1) charge_info["grade"] = m.group(3) charge_info["offense"] = m.group(4) charge_info["disposition"] = m.group(6) m2 = PATTERNS.charges_search_overflow.search(lines[idx + 1]) if m2: charge_info[ "offense" ] = f"{charge_info['offense'].strip()} {m2.group(1).strip()}" ## disposition date is on the next line if "disposition_date" in case_info.keys(): charge_info["disposition_date"] = case_info["disposition_date"] case_info["charges"].append(charge_info) m = PATTERNS.bail.search(line) if m: # TODO charges won't use the detailed bail info yet. case_info["bail_charged"] = m.group(1) case_info["bail_paid"] = m.group(2) case_info["bail_adjusted"] = m.group(3) case_info["bail_total"] = m.group(5) m = PATTERNS.costs.search(line) if m: case_info["total_fines"] = m.group(1) case_info["fines_paid"] = m.group(2) case_info["costs_adjusted"] = m.group(3) case_info["costs_total"] = m.group(5) case_info = { k: (v.strip() if isinstance(v, str) else v) for k, v in case_info.items() } person_info = { k: (v.strip() if isinstance(v, str) else v) for k, v in person_info.items() } person = Person.from_dict(person_info) case = Case.from_dict(case_info) logger.info("Finished parsing MDJ docket") return person, [case], []