def from_dict(dct: dict) -> Expungement: dct.update({ "attorney": Attorney.from_dict(dct["attorney"]), "client": Person.from_dict(dct["client"]), "cases": [Case.from_dict(c) for c in dct["cases"]], }) return Expungement(**dct)
def test_order_cases_by_last_action(example_case): # sorting when a case has no last_action puts the case with no action # at the top. case2 = Case( status="Open", county="Erie", docket_number="12-CP-01", otn="112000111", dc="11222", charges=[example_case], total_fines=200, fines_paid=1, judge="Smooth Operator", judge_address="1234 Other st.", disposition_date=date(2019, 1, 1), arrest_date=None, complaint_date=date(2018, 2, 1), arresting_agency="Happy County", arresting_agency_address="1234 Main St.", affiant="Officer Happy", ) s = sorted([example_case, case2], key=Case.order_cases_by_last_action) s[0] == case2 example_case.arrest_date = date(2019, 2, 1) s = sorted([example_case, case2], key=Case.order_cases_by_last_action) s[1] == case2
def test_case(example_sentence): char = Charge( "Eating w/ mouth open", "M2", "14 section 23", "Guilty Plea", sentences=[example_sentence], ) case = Case( status="Open", county="Erie", docket_number="12-CP-02", otn="112000111", dc="11222", charges=[char], total_fines=200, fines_paid=1, judge="Smooth Operator", judge_address="1234 Other St., PA", disposition_date=None, arrest_date=None, complaint_date=None, affiant="Sheriff Smelly", arresting_agency="Upsidedown County", arresting_agency_address="1234 Main St., PA", ) assert case.status == "Open"
def example_case(example_charge): return Case( status="Open", county="Erie", docket_number="12-MC-01", otn="112000111", dc="11222", charges=[example_charge], total_fines=200, fines_paid=100, arrest_date=None, complaint_date=None, disposition_date=None, judge="Judge Jimmy Hendrix", judge_address="1234 Judge St.,", affiant="Officer Bland", arresting_agency_address="1234 Grey St.", arresting_agency="Monochrome County PD.", )
def get_md_cases(summary_xml: etree.Element) -> List: """ Return a list of the cases described in this Summary sheet. """ cases = [] case_elements = summary_xml.xpath("//case") for case in case_elements: # in mdj summaries, there's only one "charge" element, not different "open" and "closed" elements. # And there are no sentences recorded. md_charges = [] md_charge_elems = case.xpath(".//charge") for charge in md_charge_elems: charge = Charge( offense=text_or_blank(charge.find("description")), statute=text_or_blank(charge.find("statute")), grade=text_or_blank(charge.find("grade")), disposition=text_or_blank(charge.find("disposition")), sentences=[], ) md_charges.append(charge) cases.append( Case( status=text_or_blank(case.getparent().getparent()), county=text_or_blank(case.getparent().find("county")), docket_number=text_or_blank(case.find("case_basics/docket_num")), otn=text_or_blank(case.find("case_basics/otn_num")), dc=text_or_blank(case.find("case_basics/dc_num")), charges=md_charges, total_fines=None, # a summary docket never has info about this. fines_paid=None, arrest_date=date_or_none( case.find("arrest_disp_actions/arrest_disp/arrest_date") ), disposition_date=date_or_none( case.find("arrest_disp_actions/arrest_disp/disp_date") ), judge=text_or_blank( case.find("arrest_disp_actions/arrest_disp/disp_judge") ), ) ) return cases
def parse_case(txt: str) -> Tuple[Case, List[str]]: """ Use regexes to extract case information from the text of a docket. Args: txt (str): The text of a CP or MC docket. """ errs = [] case = Case(status=None, county=None, docket_number=None, otn=None, dc=None, charges=[]) docket_number_search, dn_errs = find_pattern( "docket_number", r"Docket Number:\s+(?P<docket_number>(MC|CP)\-\d{2}\-(\D{2})\-\d*\-\d{4})", txt, ) if docket_number_search is not None: case.docket_number = docket_number_search.group("docket_number")
def test_from_dict(example_case): assert Case.from_dict({"invalid": "stuff"}) is None ser = to_serializable(example_case) c2 = Case.from_dict(ser) assert c2.docket_number == example_case.docket_number
def parse_mdj_pdf_text(txt: str) -> Tuple[Person, List[Case], List[str]]: """ Parse MDJ docket, given the formatted text of the pdf. This function uses the original Expungement Generator's technique: regexes and nested loops, iterating over the lines of the docket. see https://github.com/NateV/Expungement-Generator/blob/master/Expungement-Generator/Record.php:64 """ already_searched_aliases = False case_info = dict() case_info["charges"] = [] person_info = dict() person_info["aliases"] = [] lines = txt.split("\n") for idx, line in enumerate(lines): m = PATTERNS.mdj_district_number.search(line) if m: # what's the mdj district number for? case_info["mdj_district_number"] = m.group(1) m = PATTERNS.mdj_county_and_disposition.search(line) if m: case_info["county"] = m.group(1) case_info["disposition_date"] = m.group(2) m = PATTERNS.docket_number.search(line) if m: case_info["docket_number"] = m.group(1) m = PATTERNS.otn.search(line) if m: case_info["otn"] = m.group(1) m = PATTERNS.dc_number.search(line) if m: case_info["dc_num"] = m.group(1) m = PATTERNS.arrest_agency_and_date.search(line) if m: case_info["arresting_agency"] = m.group(1) try: case_info["arrest_date"] = m.group(2) except: pass m = PATTERNS.complaint_date.search(line) if m: case_info["complaint_date"] = m.group(1) m = PATTERNS.affiant.search(line) if m: # TODO - mdj docket parse should reverse order of names of affiant case_info["affiant"] = m.group(1) # MHollander said: # the judge name can appear in multiple places. Start by checking to see if the # judge's name appears in the Judge Assigned field. If it does, then set it. # Later on, we'll check in the "Final Issuing Authority" field. If it appears there # and doesn't show up as "migrated," we'll reassign the judge name. m = PATTERNS.judge_assigned.search(line) if m: judge = m.group(1).strip() next_line = lines[idx + 1] overflow_match = PATTERNS.judge_assigned_overflow.search(next_line) if overflow_match: judge = f"{judge} {overflow_match.group(1).strip()}" if "igrated" not in judge: case_info["judge"] = judge m = PATTERNS.judge.search(line) if m: if len(m.group(1)) > 0 and "igrated" not in m.group(1): case_info["judge"] = m.group(1) m = PATTERNS.dob.search(line) if m: person_info["date_of_birth"] = m.group(1) m = PATTERNS.name.search(line) if m: person_info["first_name"] = m.group(2) person_info["last_name"] = m.group(1) person_info["aliases"].append(f"{m.group(1)}, {m.group(2)}") m = PATTERNS.alias_names_start.search(line) if already_searched_aliases is False and m: idx2 = idx + 1 end_of_aliases = False while not end_of_aliases: if PATTERNS.end_of_page.search(lines[idx2]): continue if re.search(r"\w", lines[idx2]): person_info["aliases"].append(lines[idx2].strip()) idx2 += 1 end_of_aliases = PATTERNS.alias_names_end.search(lines[idx2]) if end_of_aliases: already_searched_aliases = True m = PATTERNS.charges.search(line) # Arrest.php;595 if m: charge_info = dict() charge_info["statute"] = m.group(1) charge_info["grade"] = m.group(3) charge_info["offense"] = m.group(4) charge_info["disposition"] = m.group(6) m2 = PATTERNS.charges_search_overflow.search(lines[idx + 1]) if m2: charge_info[ "offense" ] = f"{charge_info['offense'].strip()} {m2.group(1).strip()}" ## disposition date is on the next line if "disposition_date" in case_info.keys(): charge_info["disposition_date"] = case_info["disposition_date"] case_info["charges"].append(charge_info) m = PATTERNS.bail.search(line) if m: # TODO charges won't use the detailed bail info yet. case_info["bail_charged"] = m.group(1) case_info["bail_paid"] = m.group(2) case_info["bail_adjusted"] = m.group(3) case_info["bail_total"] = m.group(5) m = PATTERNS.costs.search(line) if m: case_info["total_fines"] = m.group(1) case_info["fines_paid"] = m.group(2) case_info["costs_adjusted"] = m.group(3) case_info["costs_total"] = m.group(5) case_info = { k: (v.strip() if isinstance(v, str) else v) for k, v in case_info.items() } person_info = { k: (v.strip() if isinstance(v, str) else v) for k, v in person_info.items() } person = Person.from_dict(person_info) case = Case.from_dict(case_info) logger.info("Finished parsing MDJ docket") return person, [case], []
def get_case(stree: etree) -> Case: """ Extract a list of Cases from the xml of a docket that has been parsed into sections. Returns and empty Case on failure. Args: stree: xml tree of a docket, parsed into a header and some number of sections Returns: a Cases object. """ county = xpath_or_blank(stree, "/docket/header/court_name/county") docket_number = xpath_or_blank(stree, "/docket/header/docket_number") otn = xpath_or_blank(stree, "//section[@name='section_case_info']//otn") dc = xpath_or_blank(stree, "//section[@name='section_case_info']//dc") judge = xpath_or_blank( stree, "//section[@name='section_case_info']//judge_assigned") affiant = xpath_or_blank(stree, "//arresting_officer") arresting_agency = xpath_or_blank(stree, "//arresting_agency") complaint_date = xpath_date_or_blank( stree, "//section[@name='section_status_info']//complaint_date") arrest_date = xpath_date_or_blank( stree, "//section[@name='section_status_info']//arrest_date") status = xpath_or_blank( stree, "//section[@name='section_status_info']//case_status") # If the case's status is Closed, find the disposition date by finding the last status event date. # TODO I'm not sure this is the right date. Is the 'disposition date' the date the case status changed to # Completed, or the date of "Sentenced/Penalty Imposed" if re.search("close", status, re.IGNORECASE): disposition_date = xpath_date_or_blank( stree, "//section[@name='section_status_info']//status_event[status_type[contains(text(),'Sentenced')]]/status_date", ) # try: # disposition_date = datetime.strptime(disposition_date, r"%m/%d/%Y") # except ValueError: # #logging.error(f"disposition date {disposition_date} did not parse.") # disposition_date = None else: disposition_date = None # fines and costs total_fines = str_to_money( xpath_or_blank( stree, "//section[@name='section_case_financal_info']/case_financial_info/grand_toals/assessed", )) fines_paid = str_to_money( xpath_or_blank( stree, "//section[@name='section_case_financial_info']/case_financial_info/grant_totals/payments", )) # charges charges = get_charges(stree) return Case( status=status, county=county, docket_number=docket_number, otn=otn, dc=dc, charges=charges, total_fines=total_fines, fines_paid=fines_paid, arrest_date=arrest_date, disposition_date=disposition_date, judge=judge, affiant=affiant, arresting_agency=arresting_agency, complaint_date=complaint_date, )
def get_cp_cases(summary_xml: etree.Element) -> List: """ Return a list of the cases described in this Summary sheet. """ cases = [] case_elements = summary_xml.xpath("//case") for case in case_elements: closed_sequences = case.xpath(".//closed_sequence") closed_charges = [] for seq in closed_sequences: charge = Charge( offense=text_or_blank(seq.find("description")), statute=text_or_blank(seq.find("statute")), grade=text_or_blank(seq.find("grade")), disposition=text_or_blank(seq.find("sequence_disposition")), disposition_date=None, sentences=[], ) for sentence in seq.xpath(".//sentencing_info"): charge.sentences.append( Sentence( sentence_date=date_or_none(sentence.find("sentence_date")), sentence_type=text_or_blank(sentence.find("sentence_type")), sentence_period=text_or_blank(sentence.find("program_period")), sentence_length=SentenceLength.from_tuples( min_time=( text_or_blank( sentence.find("sentence_length/min_length/time") ), text_or_blank( sentence.find("sentence_length/min_length/unit") ), ), max_time=( text_or_blank( sentence.find("sentence_length/max_length/time") ), text_or_blank( sentence.find("sentence_length/max_length/unit") ), ), ), ) ) closed_charges.append(charge) open_sequences = case.xpath(".//open_sequence") open_charges = [] for seq in open_sequences: charge = Charge( offense=text_or_blank(seq.find("description")), statute=text_or_blank(seq.find("statute")), grade=text_or_blank(seq.find("grade")), disposition=text_or_blank(seq.find("sequence_disposition")), disposition_date=None, sentences=[], ) for sentence in seq.xpath(".//sentencing_info"): charge.sentences.append( Sentence( sentence_date=date_or_none(sentence.find("sentence_date")), sentence_type=text_or_blank(sentence.find("sentence_type")), sentence_period=text_or_blank(sentence.find("program_period")), sentence_length=SentenceLength.from_tuples( min_time=( text_or_blank( sentence.find("sentence_length/min_length/time") ), text_or_blank( sentence.find("sentence_length/min_length/unit") ), ), max_time=( text_or_blank( sentence.find("sentence_length/max_length/time") ), text_or_blank( sentence.find("sentence_length/max_length/unit") ), ), ), ) ) open_charges.append(charge) new_case = Case( status=text_or_blank(case.getparent().getparent()), county=text_or_blank(case.getparent().find("county")), docket_number=text_or_blank(case.find("case_basics/docket_num")), otn=text_or_blank(case.find("case_basics/otn_num")), dc=text_or_blank(case.find("case_basics/dc_num")), charges=closed_charges + open_charges, total_fines=None, # a summary docket never has info about this. fines_paid=None, arrest_date=date_or_none( either( case.find("arrest_disp_actions/arrest_disp/arrest_date"), case.find("arrest_disp_actions/arrest_trial/arrest_date"), ) ), disposition_date=date_or_none( case.find("arrest_disp_actions/arrest_disp/disp_date") ), judge=text_or_blank( case.find("arrest_disp_actions/arrest_disp/disp_judge") ), ) # In Summaries, the Disposition Date is set on a Case, but it is set on a Charge in Dockets. # So when processing a Summary sheet, if there is a date on the Case, the Charges should # inherit the date on the case. for charge in new_case.charges: if new_case.disposition_date is not None: charge.disposition_date = new_case.disposition_date cases.append(new_case) return cases