コード例 #1
0
def make_docket_number_core(docket_number: Optional[str]) -> str:
    """Make a core docket number from an existing docket number.

    Converts docket numbers like:

        2:12-cv-01032
        12-cv-01032
        12-332

    Into:

        1201032

    :param docket_number: A docket number to condense
    :return empty string if no change possible, or the condensed version if it
    worked. Note that all values returned are strings. We cannot return an int
    because that'd strip leading zeroes, which we need.
    """
    if docket_number is None:
        return ""

    docket_number = normalize_dashes(docket_number)

    district_m = re.search(r"(?:\d:)?(\d\d)-..-(\d+)", docket_number)
    if district_m:
        return f"{district_m.group(1)}{int(district_m.group(2)):05d}"

    bankr_m = re.search(r"(\d\d)-(\d+)", docket_number)
    if bankr_m:
        # Pad to six characters because some courts have a LOT of bankruptcies
        return f"{bankr_m.group(1)}{int(bankr_m.group(2)):06d}"

    return ""
コード例 #2
0
def get_tax_docket_numbers(opinion_text):
    """
    Parse opinon plain text for docket numbers.

    First we idenitify where the docket numbers are in the document.
    This is normally at the start of the document but can often follow
     a lengthy case details section.

    :param opinion_text: is the opinions plain_text
    :return docket_string: as string of docket numbers Ex. (18710-94, 12321-95)
    """
    opinion_text = normalize_dashes(opinion_text)
    parsed_text = ""
    docket_no_re = r"Docket.? Nos?.? .*[0-9]{3,5}"
    matches = re.finditer(docket_no_re, opinion_text)

    for matchNum, match in enumerate(matches, start=1):
        parsed_text = opinion_text[match.start():]
        break

    matches2 = re.finditer(r"[0-9]{3,5}(-|–)[\w]{2,4}([A-Z])?((\.)| [A-Z]\.)",
                           parsed_text)
    for m2, match2 in enumerate(matches2, start=0):
        parsed_text = parsed_text[:match2.end()]
        break

    docket_end_re = r"[0-9]{3,5}(-|–)[\w]{2,4}([A-Z])?((\,|\.)| [A-Z]\.)"

    matches = re.finditer(docket_end_re, parsed_text, re.MULTILINE)
    hits = []
    for matchNum, match in enumerate(matches, start=1):
        hits.append(match.group())
    docket_string = ", ".join(hits).replace(",,", ",").replace(".", "")
    return docket_string.strip()
コード例 #3
0
 def test_dash_handling(self) -> None:
     """Can we convert dashes nicely?"""
     tests = {
         "en dash –": "en dash -",  # En-dash
         "em dash —": "em dash -",  # Em-dash
         "dash -": "dash -",  # Regular dash
     }
     for test, answer in tests.items():
         computed = normalize_dashes(test)
         self.assertEqual(computed, answer)