コード例 #1
0
def get_parsed_values(parsed_addr, orig_val, val_label, orig_addr_str):
    # type: (Mapping[str, str], str, str, str) -> Union[str, None]
    """Get valid values from parsed_addr corresponding to val_label.

    Retrieves values from parsed_addr corresponding to the label supplied in
    val_label.
    If a value for val_label is found in parsed_addr AND an orig_val is
    supplied, a single string will be returned if the values match. If only
    one of the two contains a non-null value.
    If both values are empty, None is returned.
    If the values an AmbiguousAddressError will be returned if the two values
    are not equal. This provides a check against misidentified address
    components when known values are available. (For example when a city is
    supplied from the address dict or record being normalized, but usaddress
    identifies extra information stored in address_line_1 as a PlaceName.)

    :param parsed_addr: address parsed into ordereddict per usaddress.
    :type parsed_addr: Mapping
    :param orig_val: related value passed in from incoming data source.
    :type orig_val: str
    :param val_label: label to locate in parsed_addr
    :type val_label: str
    :param orig_addr_str: address string to pass to error, if applicable.
    :type orig_addr_str: str
    :return: str | None
    """
    val_from_parse = parsed_addr.get(val_label)
    orig_val = post_clean_addr_str(orig_val)
    val_from_parse = post_clean_addr_str(val_from_parse)
    non_null_val_set = {orig_val, val_from_parse} - {None}
    if len(non_null_val_set) > 1:
        raise AmbiguousAddressError(None, None, orig_addr_str)
    else:
        return non_null_val_set.pop() if non_null_val_set else None
コード例 #2
0
    def test_post_clean_addr_str(self):
        """Test post_clean_addr_str function."""
        addr_str = '(100-104) SW NO   WHERE st'
        expected = '100-104 SW NO WHERE ST'
        result = post_clean_addr_str(addr_str)
        self.assertEqual(expected, result)

        self.assertIsNone(post_clean_addr_str(None))
        self.assertEqual('', post_clean_addr_str(''))
コード例 #3
0
def validate_us_postal_code_format(postal_code, address):
    # type: (str, Union[str, Mapping]) -> str
    """Validate postal code conforms to US five-digit Zip or Zip+4 standards.

    :param postal_code: string containing US postal code data.
    :type postal_code: str
    :param address: dict or string containing original address.
    :type address: dict | str
    :return: original postal code if no error is raised
    :rtype: str
    """
    error = None
    msg = (
        'US Postal Codes must conform to five-digit Zip or Zip+4 standards.')
    postal_code = post_clean_addr_str(postal_code)
    if '-' in postal_code:
        plus_four_code = postal_code.split('-')
        if len(plus_four_code) != 2:
            error = True
        elif len(plus_four_code[0]) != 5 or len(plus_four_code[1]) != 4:
            error = True
    elif len(postal_code) != 5:
        error = True

    if error:
        raise AddressValidationError(msg, None, address)
    else:
        return postal_code
コード例 #4
0
def get_normalized_line_segment(parsed_addr, line_labels):
    # type: (Mapping[str, str], Sequence[str]) -> str
    """

    :param parsed_addr: address parsed into ordereddict per usaddress.
    :param line_labels: tuple of str labels of all the potential keys related
        to the desired address segment (ie address_line_1 or address_line_2).
    :return: s/r joined values from parsed_addr corresponding to given labels.
    """
    line_elems = [
        elem for key, elem in parsed_addr.items() if key in line_labels
    ]
    line_str = ' '.join(line_elems) if line_elems else None
    return post_clean_addr_str(line_str)
コード例 #5
0
def normalize_addr_str(
    addr_str,  # type: str
    line2=None,  # type: Optional[str]
    city=None,  # type: Optional[str]
    state=None,  # type: Optional[str]
    zipcode=None,  # type: Optional[str]
    addtl_funcs=None  # type: Sequence[Callable[str, (str, str)]]  # noqa
):  # noqa
    # type (...) -> Mapping[str, str]                                        # noqa
    # type (...) -> Mapping[str, str]
    """Normalize a complete or partial address string.

    :param addr_str: str containing address data.
    :type addr_str: str
    :param line2: optional str containing occupancy or sub-address data
        (eg: Unit, Apt, Lot).
    :type line2: str
    :param city: optional str city name that does not need to be parsed from
        addr_str.
    :type city: str
    :param state: optional str state name that does not need to be parsed from
        addr_str.
    :type state: str
    :param zipcode: optional str postal code that does not need to be parsed
        from addr_str.
    :type zipcode: str
    :param addtl_funcs: optional sequence of funcs that take string for further
        processing and return line1 and line2 strings
    :type addtl_funcs: Sequence[Callable[str, (str, str)]]
    :return: address dict with uppercase parsed and normalized address values.
    :rtype: Mapping[str, str]
    """
    # get address parsed into usaddress components.
    error = None
    parsed_addr = None
    addr_str = pre_clean_addr_str(addr_str, normalize_state(state))
    try:
        parsed_addr = parse_address_string(addr_str)
    except (usaddress.RepeatedLabelError, AmbiguousAddressError) as err:
        error = err
        if not line2 and addtl_funcs:
            for func in addtl_funcs:
                try:
                    line1, line2 = func(addr_str)
                    error = False
                    # send refactored line_1 and line_2 back through processing
                    return normalize_addr_str(line1,
                                              line2=line2,
                                              city=city,
                                              state=state,
                                              zipcode=zipcode)
                except ValueError:
                    # try a different additional processing function
                    pass

    if parsed_addr:
        parsed_addr = normalize_address_components(parsed_addr)
        zipcode = get_parsed_values(parsed_addr, zipcode, 'ZipCode', addr_str)
        city = get_parsed_values(parsed_addr, city, 'PlaceName', addr_str)
        state = get_parsed_values(parsed_addr, state, 'StateName', addr_str)
        state = normalize_state(state)

        # assumes if line2 is passed in that it need not be parsed from
        # addr_str. Primarily used to allow advanced processing of otherwise
        # unparsable addresses.
        line2 = line2 if line2 else get_normalized_line_segment(
            parsed_addr, LINE2_USADDRESS_LABELS)
        line2 = post_clean_addr_str(line2)
        # line 1 is fully post cleaned in get_normalized_line_segment.
        line1 = get_normalized_line_segment(parsed_addr,
                                            LINE1_USADDRESS_LABELS)
        validate_parens_groups_parsed(line1)
    else:
        # line1 is set to addr_str so complete dict can be passed to error.
        line1 = addr_str

    addr_rec = dict(address_line_1=line1,
                    address_line_2=line2,
                    city=city,
                    state=state,
                    postal_code=zipcode)
    if error:
        raise UnParseableAddressError(None, None, addr_rec)
    else:
        return addr_rec