Beispiel #1
0
def parse_charges(txt: str) -> Tuple[Optional[List[Charge]], List[str]]:
    """
    Find the charges in the text of a docket.
    

    Returns:
        Tuple[0] is either None or a list of Charges.
        Tuple[1] is a list of strings describing errors encountered.
    """
    logger.info("      parsing charges")
    disposition_section_searcher = re.compile(
        r"(?:.*\s+)DISPOSITION SENTENCING/PENALTIES\s*\n(?P<disposition_section>(.+\n+(?=[A-Z ]+))+.*)"
    )
    errs = []
    disposition_sections = disposition_section_searcher.findall(txt)
    if disposition_section_searcher == []:
        errs.append("Could not find the disposition/sentencing section.")
        return None, errs
    charges = []
    charges_pattern = r"(?P<sequence>\d)\s+\/\s+(?P<offense>.+)\s{12,}(?P<disposition>\w.+?)(?=\s\s)\s{12,}(?P<grade>\w{0,2})\s+(?P<statute>\w{1,2}\s?\u00A7\s?\d+(\-|\u00A7|\w+)*)"
    # there may be multiple disposition sections
    for disposition_section in disposition_sections:
        section_text = disposition_section[0]
        section_lines = section_text.split("\n")
        for idx, ln in enumerate(section_lines):
            # Need to use a copy of the index, to advance if we find a charge overflow line, so that
            # when we reach forward for the disposition date, we compensate if we've also found a charge overflow line.
            idx_copy = idx
            # not using the find_pattern function here because we're doing repeated searches on every line,
            # and failing to match is not an error, in that case.
            charge_line_search = re.search(charges_pattern, ln)
            if charge_line_search is not None:
                logger.debug(f"found a charge in line: {ln}")
                offense = charge_line_search.group("offense").strip()
                charge_overflow_search = re.search(
                    r"^\s+(?P<offense_overflow>\w+\s*\w*)\s*$",
                    section_lines[idx + 1],
                    re.I,
                )
                if charge_overflow_search is not None:
                    offense += (" " + charge_overflow_search.group(
                        "offense_overflow").strip())
                    idx_copy += 1
                try:
                    sequence = int(
                        charge_line_search.group("sequence").strip())
                except Exception:
                    sequence = None
                charge = Charge(
                    sequence=sequence,
                    offense=offense,
                    grade=charge_line_search.group("grade"),
                    statute=charge_line_search.group("statute"),
                    disposition=charge_line_search.group("disposition"),
                    sentences=
                    [],  # TODO: re_parse_cp_pdf parser does not collect Sentences yet.
                )

                # sometimes a single charge may have multiple successive disposition dates. We need the last one.
                next_line_index = idx_copy + 1
                disp_date_search = re.search(
                    r"(.*)\s(?P<disposition_date>\d{1,2}\/\d{1,2}\/\d{4})",
                    section_lines[next_line_index],
                )
                while re.search(
                        r"(.*)\s(?P<disposition_date>\d{1,2}\/\d{1,2}\/\d{4})",
                        section_lines[next_line_index],
                ):
                    disp_date_search = re.search(
                        r"(.*)\s(?P<disposition_date>\d{1,2}\/\d{1,2}\/\d{4})",
                        section_lines[next_line_index],
                    )
                    next_line_index += 1

                #
                # disposition_date_line = section_lines[idx_copy + 1]
                # disp_date_search = re.search(r"(.*)\s(?P<disposition_date>\d{1,2}\/\d{1,2}\/\d{4})",disposition_date_line)
                if disp_date_search is not None:
                    charge.disposition_date = date_or_none(
                        disp_date_search.group("disposition_date"))
                    if charge.disposition_date is None:
                        errs.append(
                            f"For the offense, {charge.sequence}/ {offense}, we found, but could not parse, the disposition date: {disp_date_search.group('disposition_date')}"
                        )
                charges.append(charge)
    charges = Charge.reduce_merge(charges)
    missing_disposition_dates = [
        f"Could not find disposition date for {c.sequence} / {c.offense} with disposition {c.disposition}"
        for c in charges if c.disposition_date is None
    ]
    errs += missing_disposition_dates
    return charges, errs
Beispiel #2
0
def get_cp_cases(summary_xml: etree.Element) -> List:
    """
    Return a list of the cases described in this Summary sheet.
    """
    cases = []
    case_elements = summary_xml.xpath("//case")
    for case in case_elements:
        closed_sequences = case.xpath(".//closed_sequence")
        closed_charges = []
        for seq in closed_sequences:
            charge = Charge(
                offense=text_or_blank(seq.find("description")),
                statute=text_or_blank(seq.find("statute")),
                grade=text_or_blank(seq.find("grade")),
                disposition=text_or_blank(seq.find("sequence_disposition")),
                disposition_date=None,
                sentences=[],
            )
            for sentence in seq.xpath(".//sentencing_info"):
                charge.sentences.append(
                    Sentence(
                        sentence_date=date_or_none(sentence.find("sentence_date")),
                        sentence_type=text_or_blank(sentence.find("sentence_type")),
                        sentence_period=text_or_blank(sentence.find("program_period")),
                        sentence_length=SentenceLength.from_tuples(
                            min_time=(
                                text_or_blank(
                                    sentence.find("sentence_length/min_length/time")
                                ),
                                text_or_blank(
                                    sentence.find("sentence_length/min_length/unit")
                                ),
                            ),
                            max_time=(
                                text_or_blank(
                                    sentence.find("sentence_length/max_length/time")
                                ),
                                text_or_blank(
                                    sentence.find("sentence_length/max_length/unit")
                                ),
                            ),
                        ),
                    )
                )
            closed_charges.append(charge)

        open_sequences = case.xpath(".//open_sequence")
        open_charges = []
        for seq in open_sequences:
            charge = Charge(
                offense=text_or_blank(seq.find("description")),
                statute=text_or_blank(seq.find("statute")),
                grade=text_or_blank(seq.find("grade")),
                disposition=text_or_blank(seq.find("sequence_disposition")),
                disposition_date=None,
                sentences=[],
            )
            for sentence in seq.xpath(".//sentencing_info"):
                charge.sentences.append(
                    Sentence(
                        sentence_date=date_or_none(sentence.find("sentence_date")),
                        sentence_type=text_or_blank(sentence.find("sentence_type")),
                        sentence_period=text_or_blank(sentence.find("program_period")),
                        sentence_length=SentenceLength.from_tuples(
                            min_time=(
                                text_or_blank(
                                    sentence.find("sentence_length/min_length/time")
                                ),
                                text_or_blank(
                                    sentence.find("sentence_length/min_length/unit")
                                ),
                            ),
                            max_time=(
                                text_or_blank(
                                    sentence.find("sentence_length/max_length/time")
                                ),
                                text_or_blank(
                                    sentence.find("sentence_length/max_length/unit")
                                ),
                            ),
                        ),
                    )
                )
            open_charges.append(charge)
        new_case = Case(
            status=text_or_blank(case.getparent().getparent()),
            county=text_or_blank(case.getparent().find("county")),
            docket_number=text_or_blank(case.find("case_basics/docket_num")),
            otn=text_or_blank(case.find("case_basics/otn_num")),
            dc=text_or_blank(case.find("case_basics/dc_num")),
            charges=closed_charges + open_charges,
            total_fines=None,  # a summary docket never has info about this.
            fines_paid=None,
            arrest_date=date_or_none(
                either(
                    case.find("arrest_disp_actions/arrest_disp/arrest_date"),
                    case.find("arrest_disp_actions/arrest_trial/arrest_date"),
                )
            ),
            disposition_date=date_or_none(
                case.find("arrest_disp_actions/arrest_disp/disp_date")
            ),
            judge=text_or_blank(
                case.find("arrest_disp_actions/arrest_disp/disp_judge")
            ),
        )
        # In Summaries, the Disposition Date is set on a Case, but it is set on a Charge in Dockets.
        # So when processing a Summary sheet, if there is a date on the Case, the Charges should
        # inherit the date on the case.
        for charge in new_case.charges:
            if new_case.disposition_date is not None:
                charge.disposition_date = new_case.disposition_date
        cases.append(new_case)
    return cases