address_full += '-' + str(address_high) # Get unit unit_full = None if unit_num: unit_full = '# {}'.format(unit_num) address = None if address_full and street_full: source_address_comps = [address_full, street_full, unit_full] source_address = ' '.join([x for x in source_address_comps if x]) # Try to parse try: address = Address(source_address) # QC: check for miscellaneous parcel modifications street_address = address.street_address if should_check_street_full and source_address != street_address: note = 'Parser changes: {} => {}'.format(source_address, street_address) had_warning('Parser changes', note=note) # QC: check for duplicate address address_counts.setdefault(street_address, 0) address_counts[street_address] += 1 except Exception as e: print(source_address) had_error('Could not parse')
def get_address_response_relationships(self, address=None, **kwargs): # TODO: assign in include_units fct? #print("normalized: ", self.normalized_address) #print(self.ref_addr) ref_address = Address(self.ref_addr) address = Address(address.street_address) # print(ref_address, address) ref_base_address = ' '.join( [ref_address.address_full, ref_address.street_full]) ref_base_address_no_suffix = '{} {}'.format( ref_address.address_full_num, ref_address.street_full) base_address = ' '.join([address.address_full, address.street_full]) base_address_no_suffix = '{} {}'.format(address.address_full_num, address.street_full) match_type = None unit_type_variations = ["APT", "UNIT", "#"] street_address_variations = [ address.street_address, address.street_address.replace(address.unit_type, "APT"), address.street_address.replace(address.unit_type, "UNIT"), address.street_address.replace(address.unit_type, "#") ] if address.unit_type else [] #print(address.unit_type, ref_address.unit_type) if address.street_address == ref_address.street_address: # Address is same as reference address match_type = self.match_type # elif address.base_address == ref_address.base_address: # # 1769 FRANKFORD AVE UNIT 99 # match_type = 'has_base' # elif address.base_address_no_suffix == ref_address.base_address or address.base_address == ref_address.base_address_no_suffix: # #6037 N 17TH ST #A or 6037B N 17TH ST #A # match_type = 'has_base_no_suffix' elif address.unit_type not in ('', None): # Address is different from ref address and has unit type if address.address_high is None: # Address also doesn't have a high num if ref_address.unit_type not in ('', None): # Reference address has unit type if ref_address.unit_num == address.unit_num: if ref_address.unit_type == address.unit_type: match_type = 'exact' if not ref_address.address_high else 'in_range' # Reference and address have same unit num elif all(unit_type in unit_type_variations for unit_type in [ref_address.unit_type, address.unit_type]): match_type = 'generic_unit_sibling' if not ref_address.address_high else 'in_range_generic_unit_sibling' else: match_type = 'unit_sibling' if not ref_address.address_high else 'in_range_unit_sibling' elif base_address == ref_base_address: match_type = 'has_base_unit_child' elif base_address_no_suffix == ref_base_address_no_suffix: match_type = 'has_base_no_suffix_unit_child' elif ref_address.address_high: if ref_address.address_low_suffix == address.address_low_suffix: if address.unit_num == ref_address.unit_num: match_type = 'in_range' elif not address.unit_num: match_type = 'has_base_in_range' else: # 1769-71 FRANKFORD AVE UNIT 8?include_units match_type = 'has_base_in_range_unit_child' elif address.address_low_suffix: if ref_address.address_low_suffix: match_type = 'has_base_no_suffix_in_range_suffix_child_unit_child' else: match_type = 'has_base_in_range' else: if ref_address.address_low_suffix: match_type = 'has_base_no_suffix_in_range_unit_child' else: match_type = 'has_base_in_range_unit_child' elif ref_address.address_high is not None: # Ref address has no unit type but has high_num: if ref_address.address_low_suffix == address.address_low_suffix: match_type = 'in_range_unit_child' elif ref_address.address_low_suffix: if address.address_low_suffix: match_type = 'has_base_no_suffix_in_range_suffix_child_unit_child' else: match_type = 'has_base_no_suffix_in_range_unit_child' else: match_type = 'in_range_suffix_child_unit_child' else: # ref address has no unit type or address high num if address.address_low_suffix is not None and address.base_address_no_suffix == ref_address.base_address: match_type = 'has_base_no_suffix_unit_child' else: match_type = 'unit_child' else: # Address is different from ref address and has unit type and high num (is ranged unit address) if ref_address.unit_type is not None: # Unit type is a generic unit type if ref_address.street_address in street_address_variations: match_type = 'generic_unit_sibling' else: if ref_address.address_high is None: if all(unit_type in unit_type_variations for unit_type in [ref_address.unit_type, address.unit_type]): match_type = 'range_parent_unit_sibling' else: match_type = 'range_parent_unit_child' else: match_type = 'unit_sibling' if ref_address.address_low == address.address_low and ref_address.address_high == address.address_high else 'overlapping_unit_sibling' else: if ref_address.address_high is None: match_type = 'range_parent_unit_child' else: match_type = 'unit_child' # match_type = 'range_parent' else: # Address is different from ref address but has no unit type if address.address_high: if not ref_address.address_high: match_type = 'range_parent' else: if ref_address.address_high != address.address_high or ref_address.address_low != address.address_low: if not ref_address.unit_type and ref_address.address_low_suffix == address.address_low_suffix: match_type = 'overlaps' elif ref_base_address == base_address: match_type = 'has_base_overlaps' else: if address.address_low_suffix: # 4923-49 N 16TH ST match_type = 'overlapping_suffix_child' else: match_type = 'has_base_no_suffix_overlaps' else: if ref_address.address_low_suffix == address.address_low_suffix: match_type = 'has_base' else: # 4923-47 N 16TH ST match_type = 'has_base_no_suffix' elif ref_address.address_high: # no address.address_high but ref_address.address_high if ref_address.unit_type: if ref_address.address_low_suffix == address.address_low_suffix: if not ref_address.unit_num: match_type = 'in_range' else: # 902A-4 N 3RD ST UNIT 2 match_type = 'has_base_in_range' elif address.address_low_suffix: # 902-4 N 3RD ST UNIT 2 if not ref_address.address_low_suffix: match_type = 'in_range_suffix_child' else: match_type = 'has_base_no_suffix_in_range_suffix_child' else: # 902R-4 N 3RD ST UNIT 2 match_type = 'has_base_no_suffix_in_range' else: if ref_address.address_low_suffix == address.address_low_suffix: if not ref_address.unit_num: # 901A-4 N 3RD ST match_type = 'in_range' else: match_type = 'in_range_unit_sibling' elif address.address_low_suffix: # 902-4 N 3RD ST UNIT 2 match_type = 'in_range_suffix_child' else: # 902R-4 N 3RD ST match_type = 'has_base_no_suffix_in_range' elif ref_address.base_address == address.street_address: # 1769 FRANKFORD AVE UNIT 8 match_type = 'has_base' elif ref_address.base_address_no_suffix == address.street_address: # 1769R FRANKFORD AVE match_type = 'has_base_no_suffix' elif address.base_address_no_suffix == ref_address.base_address: # 902 N 3RD ST UNIT 2 match_type = 'has_base_suffix_child' elif address.base_address_no_suffix == ref_address.base_address_no_suffix: # 902 N 3RD ST UNIT 2 match_type = 'has_base_no_suffix_suffix_child' else: match_type = 'has_base' return match_type
try: # Try parsing parsed_address = parsed_addresses.get(source_address) if parsed_address is None: # Passyunk no longer raising errors try: parsed_address = parser.parse(source_address) parsed_addresses[source_address] = parsed_address except: raise ValueError('Could not parse') if parsed_address['type'] == "none": raise ValueError('Unknown address type') address = Address(parsed_address) # Get street address and map to source address street_address = address.street_address _source_addresses = source_address_map.setdefault(street_address, []) if not source_address in _source_addresses: _source_addresses.append(source_address) # Check for zero address if address.address_low == 0: raise ValueError('Low number is zero') # Add address if not street_address in street_addresses_seen: addresses.append(address) street_addresses_seen.add(street_address)
zip_range.pop('zip_4_high') zip_range['zip_4'] = zip_4 zip_ranges.append(zip_range) if WRITE_OUT: print('Writing zip ranges to AIS...') zip_range_table.write(zip_ranges) print('Creating indexes...') zip_range_table.create_index('usps_id') print('\n** RELATE TO ADDRESSES**') print('Reading addresses...') addresses = db['address'].read(fields=['street_address']) addresses = [Address(x['street_address']) for x in addresses] if WRITE_OUT: print('Dropping indexes...') address_zip_table.drop_index('street_address') address_zip_table.drop_index('usps_id') print('Dropping address-zips...') address_zip_table.delete() # index zip ranges by street_full street_full_fields = [ 'street_predir', 'street_name', 'street_suffix', 'street_postdir', ]
def get_address_response_relationships(self, address=None, **kwargs): # TODO: assign in include_units fct? #print("normalized: ", self.normalized_address) #print(self.ref_addr) ref_address = Address(self.ref_addr) address = Address(address.street_address) # print(ref_address, address) ref_base_address = ' '.join( [ref_address.address_full, ref_address.street_full]) ref_base_address_no_suffix = '{} {}'.format( ref_address.address_full_num, ref_address.street_full) base_address = ' '.join([address.address_full, address.street_full]) base_address_no_suffix = '{} {}'.format(address.address_full_num, address.street_full) match_type = None unit_type_variations = ["APT", "UNIT", "#"] street_address_variations = [ address.street_address, address.street_address.replace(address.unit_type, "APT"), address.street_address.replace(address.unit_type, "UNIT"), address.street_address.replace(address.unit_type, "#") ] if address.unit_type else [] #print(address.unit_type, ref_address.unit_type) if address.street_address == ref_address.street_address: # Address is same as reference address match_type = self.match_type elif address.unit_type not in ('', None): # Address is different from ref address and has unit type if address.address_high is None: # Address also doesn't have a high num if ref_address.unit_type is not None: # Reference address has unit type if ref_address.unit_num == address.unit_num: if ref_address.unit_type == address.unit_type: match_type = 'exact' if not ref_address.address_high else 'in_range' # Reference and address have same unit num elif all(unit_type in unit_type_variations for unit_type in [ref_address.unit_type, address.unit_type]): match_type = 'generic_unit_sibling' if not ref_address.address_high else 'in_range_generic_unit_sibling' # if ref_address.street_address in street_address_variations: # # Unit type is a generic unit type # # match_type = 'generic_unit_sibling' # match_type = 'exact' else: match_type = 'unit_sibling' if not ref_address.address_high else 'in_range_unit_sibling' elif base_address == ref_base_address: match_type = 'has_base_unit_child' elif ref_address.address_high: match_type = 'in_range_unit_sibling' elif ref_address.address_high is not None: # Ref address has no unit type but has high_num: if ref_address.unit_type is not None: # Ref address has unit type match_type = 'in_range' else: match_type = 'in_range_unit_child' else: # ref address has no unit type or address high num if address.address_low_suffix is not None and address.base_address_no_suffix == ref_address.base_address: match_type = 'has_base_no_suffix_unit_child' else: match_type = 'unit_child' else: # Address is different from ref address and has unit type and high num (is ranged unit address) if ref_address.unit_type is not None: # Unit type is a generic unit type if ref_address.street_address in street_address_variations: print(3) if ref_address.address_high is None: match_type = 'unit_sibling' else: match_type = 'unit_child' else: print(4) if ref_address.address_high is None: if all(unit_type in unit_type_variations for unit_type in [ref_address.unit_type, address.unit_type]): match_type = 'range_parent_unit_sibling' else: match_type = 'range_parent_unit_child' else: match_type = 'unit_sibling' if ref_address.address_low == address.address_low and ref_address.address_high == address.address_high else 'overlapping_unit_sibling' else: if ref_address.address_high is None: match_type = 'range_parent_unit_child' else: match_type = 'unit_child' # match_type = 'range_parent' else: # Address is different from ref address but has no unit type if address.address_high: if not ref_address.address_high: match_type = 'range_parent' else: if ref_address.address_high != address.address_high or ref_address.address_low != address.address_low: match_type = 'overlaps' if not ref_address.unit_type else "has_base_overlaps" else: match_type = 'has_base' elif ref_address.address_high: # no address.address_high but ref_address.address_high match_type = 'in_range' if not ref_address.unit_type else 'has_base_in_range' elif address.base_address_no_suffix == ref_address.street_address: match_type = 'has_base_no_suffix' else: match_type = 'has_base' return match_type #, related_addresses
if source_address is None: # TODO: it might be helpful to log this, but right now we aren't # logging object IDs so there would be no way to identify the # null address in the source dataset. Just skipping for now. continue source_map.setdefault(source_address, []).append(source_name) # Make sure this is reset on each run (also for logging) street_address = None try: # Try parsing try: address = Address(source_address) except: raise ValueError('Could not parse') # Get street address and map to source address street_address = address.street_address _source_addresses = source_address_map.setdefault( street_address, []) if not source_address in _source_addresses: _source_addresses.append(source_address) # Check for zero address if address.address_low == 0: raise ValueError('Low number is zero') # Add address
if not re.match('\d+[A-Z]', source_address): # FEEDBACK: missing suffix if '-' in source_address: insert_i = source_address.index('-') else: insert_i = source_address.index(' ') source_address = source_address[:insert_i] + address_suffix + \ source_address[insert_i:] # Parse try: parsed = parser.parse(source_address) comps = parsed['components'] except: raise ValueError('Could not parse') address = Address(comps) street_address = comps['street_address'] # Owners try: owners = owner_map[account_num] except KeyError: owners = '' prop = { 'account_num': account_num, 'source_address': source_address, 'tencode': tencode, 'owners': owners, 'address_low': comps['address']['low_num'], # 'address_low_suffix': comps['address']['low_suffix'] or '',
if not re.match('\d+[A-Z]', source_address): # FEEDBACK: missing suffix if '-' in source_address: insert_i = source_address.index('-') else: insert_i = source_address.index(' ') source_address = source_address[:insert_i] + address_suffix + \ source_address[insert_i:] # Parse try: parsed = parser.parse(source_address) comps = parsed['components'] except: raise ValueError('Could not parse') address = Address(parsed) street_address = comps['output_address'] # Owners try: owners = owner_map[account_num] except KeyError: owners = '' prop = { 'account_num': account_num, 'source_address': source_address, 'tencode': tencode, 'owners': owners, 'address_low': comps['address']['low_num'], # 'address_low_suffix': comps['address']['low_suffix'] or '',