Beispiel #1
0
 def from_dict(dct: dict) -> Expungement:
     dct.update({
         "attorney": Attorney.from_dict(dct["attorney"]),
         "client": Person.from_dict(dct["client"]),
         "cases": [Case.from_dict(c) for c in dct["cases"]],
     })
     return Expungement(**dct)
def test_serializing_person(example_person):
    ser = to_serializable(example_person)
    assert ser["first_name"] == example_person.first_name
    assert ser["aliases"] == example_person.aliases

    deser = PersonSerializer(data=ser)
    assert deser.is_valid(), deser.error_messages
    deser = deser.validated_data
    deser = Person.from_dict(deser)
    assert isinstance(deser, Person)
    assert deser == example_person
def test_person_from_dict(example_person):
    ser = to_serializable(example_person)
    per2 = Person.from_dict(ser)
    assert example_person.last_name == per2.last_name
    assert isinstance(per2.date_of_birth, date)
def parse_mdj_pdf_text(txt: str) -> Tuple[Person, List[Case], List[str]]:
    """
    Parse MDJ docket, given the formatted text of the pdf.
    This function uses the original Expungement Generator's technique: regexes and nested loops, 
    iterating over the lines of the docket.

    see https://github.com/NateV/Expungement-Generator/blob/master/Expungement-Generator/Record.php:64

    """
    already_searched_aliases = False

    case_info = dict()
    case_info["charges"] = []
    person_info = dict()
    person_info["aliases"] = []

    lines = txt.split("\n")
    for idx, line in enumerate(lines):
        m = PATTERNS.mdj_district_number.search(line)
        if m:
            # what's the mdj district number for?
            case_info["mdj_district_number"] = m.group(1)

        m = PATTERNS.mdj_county_and_disposition.search(line)
        if m:
            case_info["county"] = m.group(1)
            case_info["disposition_date"] = m.group(2)

        m = PATTERNS.docket_number.search(line)
        if m:
            case_info["docket_number"] = m.group(1)

        m = PATTERNS.otn.search(line)
        if m:
            case_info["otn"] = m.group(1)

        m = PATTERNS.dc_number.search(line)
        if m:
            case_info["dc_num"] = m.group(1)

        m = PATTERNS.arrest_agency_and_date.search(line)
        if m:
            case_info["arresting_agency"] = m.group(1)
            try:
                case_info["arrest_date"] = m.group(2)
            except:
                pass

        m = PATTERNS.complaint_date.search(line)
        if m:
            case_info["complaint_date"] = m.group(1)

        m = PATTERNS.affiant.search(line)
        if m:
            # TODO - mdj docket parse should reverse order of names of affiant
            case_info["affiant"] = m.group(1)

        # MHollander said:
        #  the judge name can appear in multiple places.  Start by checking to see if the
        # judge's name appears in the Judge Assigned field.  If it does, then set it.
        # Later on, we'll check in the "Final Issuing Authority" field.  If it appears there
        # and doesn't show up as "migrated," we'll reassign the judge name.
        m = PATTERNS.judge_assigned.search(line)
        if m:
            judge = m.group(1).strip()
            next_line = lines[idx + 1]
            overflow_match = PATTERNS.judge_assigned_overflow.search(next_line)
            if overflow_match:
                judge = f"{judge} {overflow_match.group(1).strip()}"

            if "igrated" not in judge:
                case_info["judge"] = judge

        m = PATTERNS.judge.search(line)
        if m:
            if len(m.group(1)) > 0 and "igrated" not in m.group(1):
                case_info["judge"] = m.group(1)

        m = PATTERNS.dob.search(line)
        if m:
            person_info["date_of_birth"] = m.group(1)

        m = PATTERNS.name.search(line)
        if m:
            person_info["first_name"] = m.group(2)
            person_info["last_name"] = m.group(1)
            person_info["aliases"].append(f"{m.group(1)}, {m.group(2)}")

        m = PATTERNS.alias_names_start.search(line)
        if already_searched_aliases is False and m:
            idx2 = idx + 1
            end_of_aliases = False
            while not end_of_aliases:
                if PATTERNS.end_of_page.search(lines[idx2]):
                    continue
                if re.search(r"\w", lines[idx2]):
                    person_info["aliases"].append(lines[idx2].strip())
                idx2 += 1

                end_of_aliases = PATTERNS.alias_names_end.search(lines[idx2])
                if end_of_aliases:
                    already_searched_aliases = True

        m = PATTERNS.charges.search(line)  # Arrest.php;595
        if m:
            charge_info = dict()
            charge_info["statute"] = m.group(1)
            charge_info["grade"] = m.group(3)
            charge_info["offense"] = m.group(4)
            charge_info["disposition"] = m.group(6)
            m2 = PATTERNS.charges_search_overflow.search(lines[idx + 1])
            if m2:
                charge_info[
                    "offense"
                ] = f"{charge_info['offense'].strip()} {m2.group(1).strip()}"

            ## disposition date is on the next line
            if "disposition_date" in case_info.keys():
                charge_info["disposition_date"] = case_info["disposition_date"]

            case_info["charges"].append(charge_info)

        m = PATTERNS.bail.search(line)
        if m:
            # TODO charges won't use the detailed bail info yet.
            case_info["bail_charged"] = m.group(1)
            case_info["bail_paid"] = m.group(2)
            case_info["bail_adjusted"] = m.group(3)
            case_info["bail_total"] = m.group(5)

        m = PATTERNS.costs.search(line)
        if m:
            case_info["total_fines"] = m.group(1)
            case_info["fines_paid"] = m.group(2)
            case_info["costs_adjusted"] = m.group(3)
            case_info["costs_total"] = m.group(5)
    case_info = {
        k: (v.strip() if isinstance(v, str) else v) for k, v in case_info.items()
    }
    person_info = {
        k: (v.strip() if isinstance(v, str) else v) for k, v in person_info.items()
    }
    person = Person.from_dict(person_info)
    case = Case.from_dict(case_info)
    logger.info("Finished parsing MDJ docket")

    return person, [case], []