def validate_found_address(found_address, user_provided_address): """ Validates that the street name and number found in trash collection database matches the provided values. We do not treat partial matches as valid. :param found_address: Full address found in trash collection database :param user_provided_address: Street number and name provided by user :return: boolean: True if addresses are considered a match, else False """ logger.debug('found_address: ' + str(found_address) + 'user_provided_address: ' + str(user_provided_address)) address_parser = StreetAddressParser() found_address = address_parser.parse(found_address) user_provided_address = address_parser.parse(user_provided_address) if found_address["house"] != user_provided_address["house"]: return False if found_address["street_name"].lower() != \ user_provided_address["street_name"].lower(): return False # Allow fuzzy match on street type to allow "ave" to match "avenue" if found_address["street_type"].lower() not in \ user_provided_address["street_type"].lower() and \ user_provided_address["street_type"].lower() not in \ found_address["street_type"].lower(): return False return True
def get_python_data(): addr_parser = StreetAddressParser() tmp = addr_parser.parse(request.args.get('address')) #print 'dbg1',tmp if tmp['house'] and tmp[ 'street_full']: #create a full address for legislature lookup fullAdr = ' '.join([tmp['house'], tmp['street_full']]) elif tmp['street_full']: fullAdr = tmp['street_full'] else: fullAdr = '' adr = [fullAdr, request.args.get('city'), request.args.get('zipcode')] #print 'dbg2',adr for tries in range(5): response = lkupLib.lkupLeg(adr) #returns none if retries fail if response != None: #got something from website, scrape it and return senRep = lkupLib.legScrape(response) if len(senRep) > 1: #lookup worked, calculate route code senRep['route'] = lkupLib.mkRoute(senRep) else: #lookup failed. return list of guesses #print 'dbg3',request.args.get('zipcode'),tmp['street_full'] senRep['guesses'] = mkGuess(request.args.get('zipcode'), tmp['street_full']) return json.dumps(senRep) return 'MA Legislature Website Down - Hit Clear and try again later', status.HTTP_404_NOT_FOUND
def build_origin_address(req): """ Builds an address from an Alexa session. Assumes city is Boston if not specified :param req: MyCityRequestDataModel object :return: String containing full address """ logger.debug('MyCityRequestDataModel received:' + req.get_logger_string()) address_parser = StreetAddressParser() current_address = \ req.session_attributes[intent_constants.CURRENT_ADDRESS_KEY] parsed_address = address_parser.parse(current_address) if parsed_address["house"] is None or parsed_address["street_full"] is None: logger.debug("Parsed address had an unexpected None part in {house: %r, street_full: %r}", parsed_address["house"], parsed_address["street_full"]) raise InvalidAddressError() origin_address = " ".join([parsed_address["house"], parsed_address["street_full"]]) if parsed_address["other"]: origin_address += " {}".format(parsed_address["other"]) else: origin_address += " Boston MA" return origin_address
def tags_for_school(self, school): city = school.town or school.locality or '' if city and city == city.upper(): city = string.capwords(city) kwargs = dict(amenity='school', name=school.name.strip()) if school.website: kwargs['website'] = url_for_school(school) kwargs['ref:{0}'.format(school.source.lower())] = str(school.uid) kwargs['addr:country'] = 'GB' if school.postcode: kwargs['addr:postcode'] = school.postcode if school.street: addr_parser = StreetAddressParser() address = addr_parser.parse(school.street) if address['street_full']: kwargs['addr:street'] = address['street_full'] if address['house']: kwargs['addr:housenumber'] = address['house'] if city: kwargs['addr:city'] = city if school.phone: pn = phonenumbers.parse(school.phone, 'GB') kwargs['phone'] = phonenumbers.format_number(pn, phonenumbers.PhoneNumberFormat.E164) kwargs['source:geometry'] = 'OS_Open_Map_Local_FunctionalSite' kwargs['source:addr'] = school.source.lower() kwargs['source:name'] = school.source.lower() return kwargs
def tags_for_school(self, school): city = school.town or school.locality or '' if city and city == city.upper(): city = string.capwords(city) kwargs = dict(amenity='school', name=school.name.strip()) if school.website: kwargs['website'] = url_for_school(school) kwargs['ref:{0}'.format(school.source.lower())] = str(school.uid) kwargs['addr:country'] = 'GB' if school.postcode: kwargs['addr:postcode'] = school.postcode if school.street: addr_parser = StreetAddressParser() address = addr_parser.parse(school.street) if address['street_full']: kwargs['addr:street'] = address['street_full'] if address['house']: kwargs['addr:housenumber'] = address['house'] if city: kwargs['addr:city'] = city if school.phone: pn = phonenumbers.parse(school.phone, 'GB') kwargs['phone'] = phonenumbers.format_number( pn, phonenumbers.PhoneNumberFormat.E164) kwargs['source:geometry'] = 'OS_Open_Map_Local_FunctionalSite' kwargs['source:addr'] = school.source.lower() kwargs['source:name'] = school.source.lower() return kwargs
def submitted_form(): jsdata = request.form['javascript_data'] formDat = json.loads(jsdata) name = HumanName(formDat['firstName']) #print 'dbg3 first ', name.first,'mid ',name.middle,'last ',name.last; formDat['firstName'] = name.first + ' ' + name.middle formDat['lastName'] = name.last addr_parser = StreetAddressParser() tmp = addr_parser.parse(formDat['address']) if tmp['house'] and tmp['street_full']: formDat['address'] = ' '.join([tmp['house'], tmp['street_full']]) elif tmp['street_full']: formDat['address'] = tmp['street_full'] else: formDat['address'] = '' formDat['suite'] = '' for tmp2 in ['suite_type', 'suite_num', 'other']: try: formDat['suite'] += tmp[tmp2] + ' ' except: pass #send the data to spreadsheet spreadSheet = formDat['sheet'] wks = adrSheet.adrSheet(spreadSheet) #exits if spreadsheet not found #print 'dbg12' formDat.pop('sheet', None) status, msg = wks.addRow(formDat) if status == False: #print 'dbg14',status,msg return json.dumps(msg), 404 return jsdata
def refresh_zillow_housing(location): results = get_zillow_housing( address=location.address, city_state=f"{location.city}, {location.state}", zipcode=location.zipcode, ) # reset outdated zillow informations result_zpids = set(response.zpid for response in results) location.apartment_set.exclude(zpid__in=result_zpids).update( zpid=None, estimated_rent_price=None, last_estimated=None, zillow_url=None) addr_parser = StreetAddressParser() for response in results: loc_addr = addr_parser.parse(location.full_address) response_addr = addr_parser.parse(response.address.full_address) if loc_addr["street_full"] != response_addr["street_full"]: continue if location.city != response.address.city: continue if location.state != response.address.state: continue # https://docs.djangoproject.com/en/2.2/ref/models/querysets/#update-or-create if response_addr.get("suite_num"): apt, created = Apartment.objects.update_or_create( suite_num=response_addr.get("suite_num"), defaults={ "zpid": response.zpid, "estimated_rent_price": response.estimated_rent_price, "last_estimated": response.last_estimated, "zillow_url": response.url, "location": location, }, ) else: # sometimes Zillow does not provide suite number apt, created = Apartment.objects.update_or_create( zpid=response.zpid, defaults={ "zpid": response.zpid, "estimated_rent_price": response.estimated_rent_price, "last_estimated": response.last_estimated, "zillow_url": response.url, "location": location, "suite_num": response_addr.get("suite_num"), }, ) if created: apt.rent_price = apt.estimated_rent_price apt.save() location.last_fetched_zillow = timezone.now() location.save()
def clean_address(self): # import ipdb; ipdb.set_trace() self.cleaned_data['address'] = "%d %s %s %s" % ( self.cleaned_data['street_no'], self.cleaned_data['street_dir'], self.cleaned_data['street_name'], self.cleaned_data['street_type']) parser = StreetAddressParser() parts = parser.parse(self.cleaned_data['address']) street_dir, street_name = parts['street_name'].split(' ', 1) self.cleaned_data['street_no'] = parts['house'] self.cleaned_data['street_dir'] = street_dir self.cleaned_data['street_name'] = street_name self.cleaned_data['street_type'] = normalize_street_type( parts['street_type']) return self.cleaned_data['address']
def get_trash_day_info(intent, session): """ Generates response object for a trash day inquiry. """ reprompt_text = None print("IN GET_TRASH_DAY_INFO, SESSION: " + str(session)) if alexa_constants.CURRENT_ADDRESS_KEY in session.get('attributes', {}): current_address = \ session['attributes'][alexa_constants.CURRENT_ADDRESS_KEY] # grab relevant information from session address address_parser = StreetAddressParser() a = address_parser.parse(current_address) # currently assumes that trash day is the same for all units at # the same street address address = str(a['house']) + " " + str(a['street_name']) # rest call to data.boston.gov for trash/recycle information url = 'https://data.boston.gov/api/action/datastore_search?' + \ 'resource_id=fee8ee07-b8b5-4ee5-b540-5162590ba5c1&q=' + \ '{{"Address":"{}"}}'.format(address) resp = requests.get(url).json() print("RESPONSE FROM DATA.BOSTON.GOV: " + str(resp)) # format script of response record = resp['result']['records'][0] speech_output = "Trash is picked up on the following days, " + \ ", ".join(parse_days(record['Trash'])) + \ ". Recycling is picked up on the following days, " + \ " ,".join(parse_days(record['Recycling'])) session_attributes = session.get('attributes', {}) should_end_session = True else: session_attributes = session.get('attributes', {}) speech_output = "I'm not sure what your address is. " \ "You can tell me your address by saying, " \ "my address is 123 Main St., apartment 3." should_end_session = False # Setting reprompt_text to None signifies that we do not want to reprompt # the user. If the user does not respond or says something that is not # understood, the session will end. return build_response( session_attributes, build_speechlet_response(intent['name'], speech_output, reprompt_text, should_end_session))
def save(self, force_insert=False, force_update=False, using=None, update_fields=None): self.address = self.address.strip() self.description = self.description.strip() self.location = self.location.strip() if self.address and not self.anon_address: # Let's anonymize this address address = StreetAddressParser().parse(self.address) if address.get('block') and int(address.get('block')) > 0: self.anon_address = u'%s block of %s' % (address.get('block'), address.get('street_full')) else: self.anon_address = self.address return super(CincinnatiPolice, self).save(force_insert, force_update, using, update_fields)
def get_trash_day_info(mycity_request): """ Generates response object for a trash day inquiry. :param mycity_request: MyCityRequestDataModel object :return: MyCityResponseDataModel object """ print('[module: trash_intent]', '[method: get_trash_day_info]', 'MyCityRequestDataModel received:', str(mycity_request)) mycity_response = MyCityResponseDataModel() if intent_constants.CURRENT_ADDRESS_KEY in mycity_request.session_attributes: current_address = \ mycity_request.session_attributes[intent_constants.CURRENT_ADDRESS_KEY] # grab relevant information from session address address_parser = StreetAddressParser() a = address_parser.parse(current_address) # currently assumes that trash day is the same for all units at # the same street address address = str(a['house']) + " " + str(a['street_name']) try: trash_days = get_trash_and_recycling_days(address) trash_days_speech = build_speech_from_list_of_days(trash_days) mycity_response.output_speech = "Trash and recycling is picked up on {}."\ .format(trash_days_speech) except InvalidAddressError: mycity_response.output_speech = "I can't seem to find {}. Try another address"\ .format(address) except BadAPIResponse: mycity_response.output_speech = "Hmm something went wrong. Maybe try again?" mycity_response.should_end_session = False else: print("Error: Called trash_day_intent with no address") # Setting reprompt_text to None signifies that we do not want to reprompt # the user. If the user does not respond or says something that is not # understood, the session will end. mycity_response.reprompt_text = None mycity_response.session_attributes = mycity_request.session_attributes mycity_response.card_title = mycity_request.intent_name return mycity_response
def parse_address_using_lib_streetaddress(address_string): """ Purpose: Use the street-address library to parse a string into a dict Args: address_string (String): Raw address string to try and parse Return: parsed_address (Dict): Dict of the parsed address with fields that could be determined from a string """ field_mapping = { "house": "address_number", "other": "ignore", "PlaceName": "city", "StateName": "state", "street_full": "ignore", "street_name": "street_name", "street_type": "street_type", "suite_num": "suite_num", "suite_type": "suite_type", "ZipCode": "zip_code", } regex_remove_nonmatching_characters = r"[^A-Za-z0-9\-]+" streetaddress_result = {} streetaddress_parser = StreetAddressParser() raw_streetaddress_result = streetaddress_parser.parse(address_string) for raw_address_key, address_value in raw_streetaddress_result.items(): # Parsing Raw into wanted form address_key = field_mapping.get(raw_address_key, None) if not address_key: raise Exception(f"Missing Field Mapping: {raw_address_key}") elif address_key == "ignore": continue if isinstance(address_value, str): streetaddress_result[address_key] =\ re.sub(regex_remove_nonmatching_characters, " ", address_value).strip() else: streetaddress_result[address_key] = address_value return streetaddress_result
class TestStreetAddress(unittest.TestCase): def setUp(self): self.addr_parser = StreetAddressParser() self.addr_formatter = StreetAddressFormatter() def test_success_abbrev_street_avenue_etc(self): addr = self.addr_parser.parse('221B Baker Street') eq_(self.addr_formatter.abbrev_street_avenue_etc(addr['street_full']), 'Baker St')
def validate_found_address(found_address, user_provided_address): """ Validates that the street name and number found in trash collection database matches the provided values. We do not treat partial matches as valid. :param found_address: Full address found in trash collection database :param user_provided_address: Street number and name provided by user :return: boolean: True if addresses are considered a match, else False """ logger.debug('found_address: ' + str(found_address) + 'user_provided_address: ' + str(user_provided_address)) address_parser = StreetAddressParser() found_address = address_parser.parse(found_address) user_provided_address = address_parser.parse(user_provided_address) if found_address["house"] != user_provided_address["house"]: return False # Re-collect replaces South with S and North with N found_address["street_name"] = re.sub(r'^S\.? ', "South ", found_address["street_name"]) found_address["street_name"] = re.sub(r'^N\.? ', "North ", found_address["street_name"]) if found_address["street_name"].lower() != \ user_provided_address["street_name"].lower(): return False # Allow for mismatched "Road" street_type between user input and ReCollect API if "rd" in found_address["street_type"].lower() and \ "road" in user_provided_address["street_type"].lower(): return True # Allow fuzzy match on street type to allow "ave" to match "avenue" if found_address["street_type"].lower() not in \ user_provided_address["street_type"].lower() and \ user_provided_address["street_type"].lower() not in \ found_address["street_type"].lower(): return False return True
def _build_origin_address(session): """ Builds an address from an Alexa session. Assumes city is Boston if not specified :param session: Alexa session object :return: String containing full address """ address_parser = StreetAddressParser() current_address = \ session['attributes'][alexa_constants.CURRENT_ADDRESS_KEY] parsed_address = address_parser.parse(current_address) origin_address = " ".join( [parsed_address["house"], parsed_address["street_full"]]) if parsed_address["other"]: origin_address += " {}".format(parsed_address["other"]) else: origin_address += " Boston MA" return origin_address
def build_speech_work_zones(current_address): # grab relevant information from user given address addr_parser = StreetAddressParser().parse(current_address) address = str(addr_parser['street_name']) # rest call to data.boston.gov for active work zone information url = 'https://data.boston.gov/api/3/action/datastore_search?' + \ 'resource_id=36fcf981-e414-4891-93ea-f5905cec46fc&q=' + \ '{{"Street":"{}"}}'.format(address) resp = requests.get(url).json() print("RESPONSE FROM DATA.BOSTON.GOV: " + str(resp))
def save(self, force_insert=False, force_update=False, using=None, update_fields=None): self.location = self.location.strip() self.address = self.address.strip() if self.location and not self.anon_location: address = StreetAddressParser().parse(self.location) if address.get('block') and int(address.get('block')) > 0: self.anon_location = u'%s block of %s' % (address.get('block'), address.get('street_full')) else: self.anon_location = self.location if self.address and not self.anon_address: address = StreetAddressParser().parse(self.address) if address.get('block') and int(address.get('block')) > 0: self.anon_address = u'%s block of %s' % (address.get('block'), address.get('street_full')) else: self.anon_address = self.address return super(GenericData, self).save(force_insert, force_update, using, update_fields=update_fields)
def build_origin_address(req): """ Builds an address from an Alexa session. Assumes city is Boston if not specified :param req: MyCityRequestDataModel object :return: String containing full address """ print('[method: address_utils.build_origin_address]', 'MyCityRequestDataModel received:', str(req)) address_parser = StreetAddressParser() current_address = \ req.session_attributes[intent_constants.CURRENT_ADDRESS_KEY] parsed_address = address_parser.parse(current_address) origin_address = " ".join( [parsed_address["house"], parsed_address["street_full"]]) if parsed_address["other"]: origin_address += " {}".format(parsed_address["other"]) else: origin_address += " Boston MA" return origin_address
def __init__(self): self.running = False self.data = None self.address_columns = [] self.street_address_parser = StreetAddressParser() self.progress_bar = None try: config = configparser.ConfigParser() config.read(CREDENTIALS) self.geo_coder_apis = [ GeoAPIWrapper(GoogleV3, user_agent="google_locator", api_key=config["Google"]["api_token"]), GeoAPIWrapper(Nominatim, user_agent="nominatim_locator") ] except Exception: self.geo_coder_apis = [ GeoAPIWrapper(Nominatim, user_agent="nominatim_locator") ] self.geocode_cache = {} self.processed_items, self.found = 0, 0
def _build_origin_address(mycity_request): """ Builds an address from an Alexa session. Assumes city is Boston if not specified :param mycity_request: MyCityRequestDataModel object :return: String containing full address """ print('[method: _build_origin_address]', 'MyCityRequestDataModel received:', str(mycity_request)) # @todo: Repeated code -- look into using same code here and in trash intent address_parser = StreetAddressParser() current_address = \ mycity_request.session_attributes[intent_constants.CURRENT_ADDRESS_KEY] parsed_address = address_parser.parse(current_address) origin_address = " ".join( [parsed_address["house"], parsed_address["street_full"]]) if parsed_address["other"]: origin_address += " {}".format(parsed_address["other"]) else: origin_address += " Boston MA" return origin_address
def save(self, force_insert=False, force_update=False, using=None, update_fields=None): self.arrest_address = self.arrest_address.strip() self.home_address = self.home_address.strip() if self.arrest_address and not self.anon_arrest_address: address = StreetAddressParser().parse(self.arrest_address) if address.get('block') and int(address.get('block')) > 0: self.anon_arrest_address = u'%s block of %s' % (address.get('block'), address.get('street_full')) else: self.anon_arrest_address = self.arrest_address if self.home_address and not self.anon_home_address: address = StreetAddressParser().parse(self.home_address) if address.get('block') and int(address.get('block')) > 0: self.anon_home_address = u'%s block of %s' % (address.get('block'), address.get('street_full')) else: self.anon_home_address = self.home_address return super(Arrest, self).save(force_insert, force_update, using, update_fields=update_fields)
def get_address_coordinates_from_session(mycity_request) -> dict: """ Gets coordinates of the provided address from the session attributes. Returns None if no address is available. :param mycity_request: MyCityRequestDataModel for the current request :return dict: Dictionary containing coordinates of the address """ user_address = None if intent_constants.CURRENT_ADDRESS_KEY in mycity_request.session_attributes: current_address = mycity_request.session_attributes[ intent_constants.CURRENT_ADDRESS_KEY] parsed_address = StreetAddressParser().parse(current_address) address = " ".join([ parsed_address["house"], parsed_address["street_name"], parsed_address["street_type"] ]) user_address = gis_utils.geocode_address(address) return user_address
def _normalize_address_str(address_val): """ Normalize the address to conform to short abbreviations. If an invalid address_val is provided, None is returned. If a valid address is provided, a normalized version is returned. """ # if this string is empty the regular expression in the sa wont # like it, and fail, so leave returning nothing if not address_val: return None # now parse the address into number, street name and street type parser = StreetAddressParser() addr = parser.parse( str(address_val)) # TODO: should probably use unicode() normalized_address = '' if not addr: return None if 'house' in addr and addr['house'] is not None: normalized_address = addr['house'].lstrip( "0") # some addresses have leading zeros, strip them here if 'street_name' in addr and addr['street_name'] is not None: normalized_address = normalized_address + ' ' + addr['street_name'] if 'street_type' in addr and addr['street_type'] is not None: normalized_address = normalized_address + ' ' + addr['street_type'] formatter = StreetAddressFormatter() normalized_address = formatter.abbrev_street_avenue_etc(normalized_address) return normalized_address.lower().strip()
def test_missing_street_number_is_not_validated(self): address = StreetAddressParser().parse("Everdean St") self.assertFalse(address_utils.is_address_valid(address))
def test_missing_street_name_is_not_validated(self): address = StreetAddressParser().parse("46") self.assertFalse(address_utils.is_address_valid(address))
def get_trash_day_info(mycity_request): """ Generates response object for a trash day inquiry. :param mycity_request: MyCityRequestDataModel object :return: MyCityResponseDataModel object """ logger.debug('MyCityRequestDataModel received:' + mycity_request.get_logger_string()) mycity_response = MyCityResponseDataModel() if intent_constants.CURRENT_ADDRESS_KEY in mycity_request.session_attributes: current_address = \ mycity_request.session_attributes[intent_constants.CURRENT_ADDRESS_KEY] # grab relevant information from session address address_parser = StreetAddressParser() a = address_parser.parse(current_address) if not address_utils.is_address_valid(a): mycity_response.output_speech = speech_constants.ADDRESS_NOT_UNDERSTOOD mycity_response.dialog_directive = "ElicitSlotTrash" mycity_response.reprompt_text = None mycity_response.session_attributes = mycity_request.session_attributes mycity_response.card_title = CARD_TITLE mycity_response.should_end_session = True return clear_address_from_mycity_object(mycity_response) # currently assumes that trash day is the same for all units at # the same street address address = str(a['house']) + " " + str(a['street_full']) zip_code = str(a["other"]).zfill(5) if a["other"] and a["other"].isdigit() else None neighborhood = a["other"] if a["other"] and not a["other"].isdigit() else None zip_code_key = intent_constants.ZIP_CODE_KEY if zip_code is None and zip_code_key in \ mycity_request.session_attributes: zip_code = mycity_request.session_attributes[zip_code_key] if "Neighborhood" in mycity_request.intent_variables and \ "value" in mycity_request.intent_variables["Neighborhood"]: neighborhood = mycity_request.intent_variables["Neighborhood"]["value"] try: trash_days = get_trash_and_recycling_days(address, zip_code, neighborhood) trash_days_speech = build_speech_from_list_of_days(trash_days) mycity_response.output_speech = speech_constants.PICK_UP_DAY.format(trash_days_speech) mycity_response.should_end_session = True except InvalidAddressError: address_string = address if zip_code: address_string = address_string + " with zip code {}"\ .format(zip_code) mycity_response.output_speech = speech_constants.ADDRESS_NOT_FOUND.format(address_string) mycity_response.dialog_directive = "ElicitSlotTrash" mycity_response.reprompt_text = None mycity_response.session_attributes = mycity_request.session_attributes mycity_response.card_title = CARD_TITLE mycity_response.should_end_session = True return clear_address_from_mycity_object(mycity_response) except BadAPIResponse: mycity_response.output_speech = speech_constants.BAD_API_RESPONSE mycity_response.should_end_session = True except MultipleAddressError as error: addresses = [re.sub(r' \d{5}', '', address) for address in error.addresses] address_list = ', '.join(addresses) mycity_response.output_speech = speech_constants.MULTIPLE_ADDRESS_ERROR.format(address_list) mycity_response.dialog_directive = "ElicitSlotNeighborhood" mycity_response.should_end_session = False else: logger.error("Error: Called trash_day_intent with no address") mycity_response.output_speech = speech_constants.ADDRESS_NOT_UNDERSTOOD mycity_response.should_end_session = True # Setting reprompt_text to None signifies that we do not want to reprompt # the user. If the user does not respond or says something that is not # understood, the session will end. mycity_response.reprompt_text = None mycity_response.session_attributes = mycity_request.session_attributes mycity_response.card_title = CARD_TITLE return mycity_response
2081 N Webb Rd 2081 N. Webb Rd 1515 West 22nd Street 2029 Stierlin Court P.O. Box 33170 The Landmark @ One Market, Suite 200 One Market, Suite 200 One Market One Union Square One Union Square, Apt 22-C 186 Avenue A 10 Avenue of America 25 West St """.split("\n") addr_parser = StreetAddressParser() addr_formatter = StreetAddressFormatter() if opts.addr: lst = [opts.addr] else: lst = map(str.strip,tests) for t in lst: if t: print '"%s"' % t logging.info('addr_str: ' + unicode(t)) addr = addr_parser.parse(t) if addr['street_full'] is not None: street = addr_formatter.append_TH_to_street(addr['street_full'])
2081 N Webb Rd 2081 N. Webb Rd 1515 West 22nd Street 2029 Stierlin Court P.O. Box 33170 The Landmark @ One Market, Suite 200 One Market, Suite 200 One Market One Union Square One Union Square, Apt 22-C 186 Avenue A 10 Avenue of America 25 West St """.split("\n") addr_parser = StreetAddressParser() addr_formatter = StreetAddressFormatter() if opts.addr: lst = [opts.addr] else: lst = map(str.strip, tests) for t in lst: if t: print '"%s"' % t logging.info('addr_str: ' + unicode(t)) addr = addr_parser.parse(t) if addr['street_full'] is not None: street = addr_formatter.append_TH_to_street(
def test_valid_address_is_validated(self): address = StreetAddressParser().parse("46 Everdean St") self.assertTrue(address_utils.is_address_valid(address))
import psycopg2 import psycopg2.extras import requests import json import re import os import copy import geopy from geopy.geocoders import ArcGIS from streetaddress import StreetAddressFormatter, StreetAddressParser ap = StreetAddressParser() import Transit import configparser import sys config = configparser.RawConfigParser() config.read( os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', 'settings.cfg'))) REVERSE_GEOCODE_PROVIDER = config.get('geocode', 'reverse_geocode_provider') class HexagonRegion(object): def __init__(self): self.hexagons = [] self.hexagon_geo = {}
class PandasGeocoder: def __init__(self): self.running = False self.data = None self.address_columns = [] self.street_address_parser = StreetAddressParser() self.progress_bar = None try: config = configparser.ConfigParser() config.read(CREDENTIALS) self.geo_coder_apis = [ GeoAPIWrapper(GoogleV3, user_agent="google_locator", api_key=config["Google"]["api_token"]), GeoAPIWrapper(Nominatim, user_agent="nominatim_locator") ] except Exception: self.geo_coder_apis = [ GeoAPIWrapper(Nominatim, user_agent="nominatim_locator") ] self.geocode_cache = {} self.processed_items, self.found = 0, 0 def write_cache(self): disc_cache = dc.Cache(API_CACHE) for address, geo_data in self.geocode_cache.items(): disc_cache[address] = geo_data def read_cache(self): disc_cache = dc.Cache(API_CACHE) for key in disc_cache.iterkeys(): self.geocode_cache[key] = disc_cache[key] def run(self, data: pd.DataFrame, address_columns: list) -> pd.DataFrame: self.running = True self.data = data.fillna("") self.address_columns = address_columns self.processed_items, self.found = 0, 0 if len(self.geocode_cache) == 0: self.read_cache() try: self.progress_bar = IncrementalBar(name="geocoding progress", max=len(self.data.index)) with ThreadPoolExecutor() as pool: self.data['result'] = \ list(pool.map(self.get_coordinates_args, self.data.loc[:, self.address_columns].to_numpy(), chunksize=10)) finally: self.progress_bar.finish() if len(self.geocode_cache) > 0: self.write_cache() self.running = False return self.data def is_running(self): return self.running def get_status(self): return { 'in_progress': self.running, 'processed_items': self.processed_items, 'total_items': len(self.data.index) if self.data is not None else 'Unknown', 'api_found_items': self.found, } def get_coordinates_args(self, address_row) -> [float, float]: arr = [ str(addr_unit).strip().lower() for addr_unit in address_row if len(str(addr_unit)) > 0 ] address_string = re.sub(r'[@():{}]', "", ",".join(arr)) result = self.street_address_parser.parse(address_string) search_address = result.get("street_name") or address_string longitude, latitude = None, None if address_string in self.geocode_cache or search_address in self.geocode_cache: longitude, latitude = self.geocode_cache.get( address_string) or self.geocode_cache.get(search_address) else: for geo_coder_api in self.geo_coder_apis: longitude, latitude = geo_coder_api.get_lon_lat( search_address, address_string) if longitude is not None and latitude is not None: self.found += 1 break self.geocode_cache[search_address] = self.geocode_cache[ address_string] = longitude, latitude self.progress_bar.next() self.processed_items += 1 return longitude, latitude
def setUp(self): self.addr_parser = StreetAddressParser() self.addr_formatter = StreetAddressFormatter()
def personal_info_extract(terms_text_full, terms, heading_index): backup_terms_text_full = terms_text_full name_extra = "Email|EMAIL|NAME|Name|name" exclude_list = [ "CV", "CURRICULUM VITAE", "RESU", "BIO", "BIODATA", "RESUME", "DATA", "SUMMARY" ] email = [] phone = [] urls = [] pincode = [] email_id = [] email_index = 0 search_index = 0 extractor = URLExtract() addr_parser = StreetAddressParser() personal_info_dict = { "name": [], "phone": [], "email": [], "url": [], "address": [], "passport_no": "", "dob": "", "gender": "" } name_list = [] check = 0 terms_text = "" no_more = 0 email_terms = terms c = 0 terms_text_full = backup_terms_text_full phone_numbers = phonenumbers.PhoneNumberMatcher(terms_text_full, None) try: for pno in phone_numbers: if c == 0: personal_info_dict["phone"].append(pno.raw_string) c += 1 except: personal_info_dict["phone"].append( re.findall( r"\s*(?:\+?(\d{1,3}))?[-. (]*(\d{2,3})[-. )]*(\d{2,3})[-. ]*(\d{3,4,5})(?: *x(\d+))?\s*", terms_text_full)) if len(personal_info_dict["phone"]) == 0 and re.search( r"\s*(?:\+?(\d{1,3}))?[-. (]*(\d{2,3})[-. )]*(\d{2,3})[-. ]*(\d{3,4,5})(?: *x(\d+))?\s*", terms_text_full): if len( re.search( r"\s*(?:\+?(\d{1,3}))?[-. (]*(\d{2,3})[-. )]*(\d{2,3})[-. ]*(\d{3,4,5})(?: *x(\d+))?\s*", terms_text_full)[0]) > 8: personal_info_dict["phone"].append( re.search( r"\s*(?:\+?(\d{1,3}))?[-. (]*(\d{2,3})[-. )]*(\d{2,3})[-. ]*(\d{3,4,5})(?: *x(\d+))?\s*", terms_text_full)[0]) if len(personal_info_dict["phone"]) == 0 and re.search( r"\d{10}|\d{3}\s{1}\d{3}\s{1}\d{4}", terms_text_full): if len( re.search(r"\d{10}|\d{3}\s{1}\d{3}\s{1}\d{4}", terms_text_full)[0]) > 8: personal_info_dict["phone"].append( re.search(r"\d{10}|\d{3}\s{1}\d{3}\s{1}\d{4}", terms_text_full)[0]) if len(personal_info_dict["phone"]) == 0 and re.search( r"\d{3}(-)\d{3}(-)\d{4}", terms_text_full): if len(re.search(r"\d{3}(-)\d{3}(-)\d{4}", terms_text_full)[0]) > 8: personal_info_dict["phone"].append( re.search(r"\d{3}(-)\d{3}(-)\d{4}", terms_text_full)[0]) if len(personal_info_dict["phone"]) == 0 and re.search( r"(\()\d{3}(\))(-)\d{3}(-)\d{4}", terms_text_full): if len( re.search(r"(\()\d{3}(\))(-)\d{3}(-)\d{4}", terms_text_full)[0]) > 8: personal_info_dict["phone"].append( re.search(r"(\()\d{3}(\))(-)\d{3}(-)\d{4}", terms_text_full)[0]) if len(personal_info_dict["phone"]) == 0 and re.search( r"(\()\d{3}(\))\s{0,1}\d{3}(-)\d{4}", terms_text_full): if len( re.search(r"(\()\d{3}(\))\s{0,1}\d{3}(-)\d{4}", terms_text_full)[0]) > 8: personal_info_dict["phone"].append( re.search(r"(\()\d{3}(\))\s{0,1}\d{3}(-)\d{4}", terms_text_full)[0]) if len(personal_info_dict["phone"]) == 0 and re.search( r"(\()\d{3}(\))\s{1}\d{3}\s{1}\d{4}", terms_text_full): if len( re.search(r"(\()\d{3}(\))\s{1}\d{3}\s{1}\d{4}", terms_text_full)[0]) > 8: personal_info_dict["phone"].append( re.search(r"(\()\d{3}(\))\s{1}\d{3}\s{1}\d{4}", terms_text_full)[0]) if len(personal_info_dict["phone"]) == 0 and re.search( r"\d{3}(.)\d{3}(.)\d{4}", terms_text_full): if len(re.search(r"\d{3}(.)\d{3}(.)\d{4}", terms_text_full)[0]) > 8: personal_info_dict["phone"].append( re.search(r"\d{3}(.)\d{3}(.)\d{4}", terms_text_full)[0]) if re.search("[1-9]{1}[0-9]{2}\\s{0, 1}[0-9]{3}", terms_text_full): personal_info_dict["address"].append( re.search("[1-9]{1}[0-9]{2}\\s{0, 1}[0-9]{3}", terms_text_full)[0]) urls_list = [] temp_email_text = "" for i in range(len(terms)): try: temp_line_text = terms[i][2] # pincode = extract_pincode(temp_line_text) # if len(pincode)>4: # personal_info_dict["address"].append(pincode) if extractor.find_urls(terms[i][2]): urls.append(extractor.find_urls(terms[i][2])) except: a = 0 if len(urls) > 0: for j in range(len(urls)): #if "www" in urls[j] or "http" in urls[j]: personal_info_dict["url"].append(urls[j]) pincode = extract_pincode(terms_text_full) personal_info_dict["address"].append(pincode) email_id = extract_email_id(email_terms, terms_text_full) personal_info_dict["email"].append(email_id) back_up_name = personal_info_dict["name"] try: birth_date = "" birth_date = extract_dob(terms) personal_info_dict["dob"] = birth_date pno = "" pno = extract_pno(terms_text_full) personal_info_dict["passport_no"] = pno gender = "" gender = extract_gender(terms_text_full) personal_info_dict["gender"] = gender except: a = 0 personal_info_dict["name"].append("") return personal_info_dict
def get_nearby_food_trucks(mycity_request): """ Gets food truck info near an address :param mycity_request: MyCityRequestDataModel object :return: MyCityResponseObject """ mycity_response = MyCityResponseDataModel() # Get current address location if CURRENT_ADDRESS_KEY in mycity_request.session_attributes: current_address = \ mycity_request.session_attributes[CURRENT_ADDRESS_KEY] # Parsing street address using street-address package address_parser = StreetAddressParser() a = address_parser.parse(current_address) address = str(a["house"]) + " " + str(a["street_name"]) + " " \ + str(a["street_type"]) # Parsing zip code zip_code = str(a["other"]).zfill(5) if a["other"] else None zip_code_key = intent_constants.ZIP_CODE_KEY if zip_code is None and zip_code_key in \ mycity_request.session_attributes: zip_code = mycity_request.session_attributes[zip_code_key] # Get user's GIS Geocode Address and list of available trucks usr_addr = gis_utils.geocode_address(address) truck_unique_locations = get_truck_locations() nearby_food_trucks = [] try: # Loop through food truck list and search for nearby food trucks # limit to 5 to speed up response counter = 0 for t in truck_unique_locations: dist = gis_utils.calculate_distance(usr_addr, t) if dist <= MILE: nearby_food_trucks.append(t) counter += 1 if counter == FOOD_TRUCK_LIMIT: break count = len(nearby_food_trucks) if count == 0: mycity_response.output_speech = "I didn't find any food trucks!" if count == 1: response = f"I found {count} food truck within a mile " \ "from your address! " response += add_response_text(nearby_food_trucks) mycity_response.output_speech = response if 1 < count <= 3: response = f"I found {count} food trucks within a mile " \ "from your address! " response += add_response_text(nearby_food_trucks) mycity_response.output_speech = response elif count > 3: response = f"There are at least {count} food trucks within " \ f"a mile from your address! Here are the first " \ + str(count) + ". " response += add_response_text(nearby_food_trucks) mycity_response.output_speech = response except InvalidAddressError: address_string = address if zip_code: address_string = address_string + " with zip code {}"\ .format(zip_code) mycity_response.output_speech = \ speech_constants.ADDRESS_NOT_FOUND.format(address_string) mycity_response.dialog_directive = "ElicitSlotFoodTruck" mycity_response.reprompt_text = None mycity_response.session_attributes = \ mycity_request.session_attributes mycity_response.card_title = "Food Trucks" mycity_request = clear_address_from_mycity_object(mycity_request) mycity_response = clear_address_from_mycity_object(mycity_response) return mycity_response except BadAPIResponse: mycity_response.output_speech = \ "Hmm something went wrong. Maybe try again?" except MultipleAddressError: mycity_response.output_speech = \ speech_constants.MULTIPLE_ADDRESS_ERROR.format(address) mycity_response.dialog_directive = "ElicitSlotZipCode" else: logger.error("Error: Called food_truck_intent with no address") mycity_response.output_speech = "I didn't understand that address, " \ "please try again" # Setting reprompt_text to None signifies that we do not want to reprompt # the user. If the user does not respond or says something that is not # understood, the session will end. mycity_response.reprompt_text = None mycity_response.session_attributes = mycity_request.session_attributes mycity_response.card_title = "Food Trucks" return mycity_response
from elasticsearch import Elasticsearch from streetaddress import StreetAddressParser import requests import requests import json addr_parser = StreetAddressParser() headers = {'content-type': 'application/json'} elasticsearch_index_uri = 'http://*****:*****@localhost:9200/twitter_data_mining/tweet' mapping = { "mappings": { "tweet": { "properties": { "text": { "type": "keyword" }, "timestamp": { "type": "date", "format": "yyyy-MM-dd HH:mm:ss" }, "location": { "type": "geo_point" }, } } } } es = Elasticsearch(http_auth=('elastic', 'KWKWmZTobKtc3WsjVwWB')) es.indices.create(index='twitter_traffic_nyc', body=mapping, ignore=400)