def tz_ids_for_address(country, state=None, city=None, zipcode=None, **kwargs): """ Get the TZ identifiers for a given address, e.g.: >>> tztrout.tz_ids_for_address('US', state='CA', city='Palo Alto') [u'America/Los_Angeles'] >>> tztrout.tz_ids_for_address('PL') [u'Europe/Warsaw'] >>> tztrout.tz_ids_for_address('CN') [ u'Asia/Shanghai', u'Asia/Harbin', u'Asia/Chongqing', u'Asia/Urumqi', u'Asia/Kashgar' ] """ if country == 'US': if zipcode: if not isinstance(zipcode, basestring): zipcode = str(zipcode) # If an extended zipcode in a form of XXXXX-XXXX is provided, # only use the first part zipcode = zipcode.split('-')[0] return td.us_zip_to_tz_ids.get(zipcode) elif state or city: if city and 'city:%s' % city.lower() in data_exceptions: return data_exceptions['city:%s' % city.lower()]['include'] if len(state) != 2: state = td.normalized_states['US'].get(state.lower(), state) zcdb = ZipCodeDatabase() zipcodes = zcdb.find_zip(city=city, state=state, exact=True, limit=1) if zipcodes: return td.us_zip_to_tz_ids.get(zipcodes[0].zip) elif city is not None: zipcodes = zcdb.find_zip(state=state, exact=True, limit=1) if zipcodes: return td.us_zip_to_tz_ids.get(zipcodes[0].zip) elif country == 'CA' and state: if len(state) != 2: state = td.normalized_states['CA'].get(state.lower(), state) return td.ca_state_to_tz_ids.get(state) elif country == 'AU' and state: if len(state) != 2: state = td.normalized_states['AU'].get(state.lower(), state) return td.au_state_to_tz_ids.get(state) return pytz.country_timezones.get(country)
def load_location_data(data_dir,group_by='zip3'): """Helper function to load lat/long coordinates of each 3-digit zip code. Returns data grouped by either 3-digit zip, 5-digit zip, or county FIPS""" import us sys.path.append('/Users/james/data_science/pyzipcode') from pyzipcode import ZipCodeDatabase # Build pandas zip code database with lat/long coordinates zcdb = ZipCodeDatabase() #create zipcode database object zip_dict = dict([('zip_code',[]),('latitude',[]),('longitude',[]),('state',[])]) for state in us.STATES: cur_zips = zcdb.find_zip(state='%s' % state.abbr) for cur_zip in cur_zips: zip_dict['zip_code'].append(cur_zip.zip) zip_dict['latitude'].append(cur_zip.latitude) zip_dict['longitude'].append(cur_zip.longitude) zip_dict['state'].append(cur_zip.state) zip_data = pd.DataFrame(zip_dict) #make dict into dataframe zip_data['zip3'] = zip_data['zip_code'].apply(lambda x: int(x[:3])) #add a column containing 3-digit zips zip_data['zip_code'] = zip_data['zip_code'].astype(int) zip_to_fips = make_zip_to_fips_dict(data_dir) zip_data['fips'] = zip_data['zip_code'].map(zip_to_fips) if group_by == 'zip3': zip_data = zip_data.groupby('zip3').agg({'latitude': np.mean,'longitude':np.mean}) elif group_by == 'zip': zip_data.set_index('zip_code', inplace=True) elif group_by == 'fips': zip_data = zip_data.groupby('fips').agg({'latitude': np.mean,'longitude':np.mean}) return zip_data
def __init__(self, key, city='San Francisco'): """ The construct for the YelpCity class. Parameters ---------- key : str The yelp API key needed to call Yelp Fusion API. city : str The city to search over. """ api_host = 'https://api.yelp.com' search_path = '/v3/businesses/search' business_path = '/v3/businesses/' zcdb = ZipCodeDatabase() url = api_host + search_path header = { 'Authorization': 'Bearer %s' % key, } param = { 'location': city, 'limit': 1, 'offset': 0, 'term': 'restaurants', 'sort_by': 'best_match', } response = requests.get(url=url, headers=header, params=param) self.business_id_set = set() self.business_list = [] for zip_obj in zcdb.find_zip(city=city): self.business_list += self.find_businesses(zip_obj, url, header) self.region = response.json()['region']
def _get_zipcode_list(self, location): if location.lower() == "all": with file("yelp-city-USA-list.txt") as f: return f.read().split() zip_db = ZipDB() if re.findall(r'[0-9,]+', location): zips = location.split(',') elif location.find(",") < 0 and len(location) > 2: print "search city ..." zips = [z.zip for z in zip_db.find_zip(city=location) or []] elif len(location) == 2: print "search state ..." zips = [z.zip for z in zip_db.find_zip(state=location) or []] else: print "search city: %s state: %s" % tuple(location.split(",")) zips = [ z.zip for z in zip_db.find_zip(city=location.split(',')[0], state=location.split(',')[1]) or [] ] return list( set(["%s,%s" % (zip_db[i].city, zip_db[i].state) for i in zips]))
def generate_zip_to_tz_id_map(self): """ Generate the map of US zip codes to time zone identifiers. The method finds all the possible time zone identifiers for each zip code based on a UTC offset stored for that zip in pyzipcode.ZipCodeDatabase. """ zcdb = ZipCodeDatabase() zips = list(zcdb.find_zip()) zips_len = len(zips) tz_ids_to_zips = defaultdict(list) for cnt, zip in enumerate(zips): ids = tuple(self._get_tz_identifiers_for_us_zipcode(zip)) # apply the data exceptions exceptions = data_exceptions.get( 'zip:' + zip.zip) or data_exceptions.get('state:' + zip.state) or {} exceptions['include'] = exceptions.get( 'include', []) + data_exceptions['all'].get( 'include', []) if 'all' in data_exceptions else [] exceptions['exclude'] = exceptions.get( 'exclude', []) + data_exceptions['all'].get( 'exclude', []) if 'all' in data_exceptions else [] if exceptions: ids = tuple((set(ids) - set(exceptions['exclude'])) | set(exceptions['include'])) tz_ids_to_zips[ids].append(zip.zip) stdout.write('\r%d/%d' % (cnt + 1, zips_len)) stdout.flush() zips_to_tz_ids = { tuple(zips): json.dumps(ids) for ids, zips in tz_ids_to_zips.iteritems() } file = open(US_ZIPS_TO_TZ_IDS_MAP_PATH, 'w') file.write(pickle.dumps(zips_to_tz_ids)) file.close()
def load_location_data(data_dir, group_by='zip3'): """Helper function to load lat/long coordinates of each 3-digit zip code. Returns data grouped by either 3-digit zip, 5-digit zip, or county FIPS""" import us sys.path.append('/Users/james/data_science/pyzipcode') from pyzipcode import ZipCodeDatabase # Build pandas zip code database with lat/long coordinates zcdb = ZipCodeDatabase() #create zipcode database object zip_dict = dict([('zip_code', []), ('latitude', []), ('longitude', []), ('state', [])]) for state in us.STATES: cur_zips = zcdb.find_zip(state='%s' % state.abbr) for cur_zip in cur_zips: zip_dict['zip_code'].append(cur_zip.zip) zip_dict['latitude'].append(cur_zip.latitude) zip_dict['longitude'].append(cur_zip.longitude) zip_dict['state'].append(cur_zip.state) zip_data = pd.DataFrame(zip_dict) #make dict into dataframe zip_data['zip3'] = zip_data['zip_code'].apply( lambda x: int(x[:3])) #add a column containing 3-digit zips zip_data['zip_code'] = zip_data['zip_code'].astype(int) zip_to_fips = make_zip_to_fips_dict(data_dir) zip_data['fips'] = zip_data['zip_code'].map(zip_to_fips) if group_by == 'zip3': zip_data = zip_data.groupby('zip3').agg({ 'latitude': np.mean, 'longitude': np.mean }) elif group_by == 'zip': zip_data.set_index('zip_code', inplace=True) elif group_by == 'fips': zip_data = zip_data.groupby('fips').agg({ 'latitude': np.mean, 'longitude': np.mean }) return zip_data
def generate_zip_to_tz_id_map(self): """ Generate the map of US zip codes to time zone identifiers. The method finds all the possible time zone identifiers for each zip code based on a UTC offset stored for that zip in pyzipcode.ZipCodeDatabase. """ zcdb = ZipCodeDatabase() zips = list(zcdb.find_zip()) tz_ids_to_zips = defaultdict(list) for zip in _progressbar(zips): ids = tuple(self._get_tz_identifiers_for_us_zipcode(zip)) # apply the data exceptions exceptions = data_exceptions.get('zip:' + zip.zip) or data_exceptions.get('state:' + zip.state) or {} exceptions['include'] = exceptions.get('include', []) + data_exceptions['all'].get('include', []) if 'all' in data_exceptions else [] exceptions['exclude'] = exceptions.get('exclude', []) + data_exceptions['all'].get('exclude', []) if 'all' in data_exceptions else [] if exceptions: ids = tuple((set(ids) - set(exceptions['exclude'])) | set(exceptions['include'])) tz_ids_to_zips[ids].append(zip.zip) zips_to_tz_ids = {zip: ids for ids, zips in tz_ids_to_zips.items() for zip in zips} _dump_json_data(US_ZIPS_TO_TZ_IDS_MAP_PATH, zips_to_tz_ids)
for file in os.listdir(dir): if file.endswith(".csv"): file_length = file_len(file) with open(file, 'r') as inp, open(file.replace('.csv', '_edit.csv'), 'w', newline='') as out: writer = csv.writer(out) curr_line = 0 for row in csv.reader(inp): if curr_line < file_length - 31: curr_line += 1 writer.writerow(row) # getting NJ zip codes zc = ZipCodeDatabase() zipped_codes_nj = zc.find_zip(state='NJ') uz_codes_nj = [z.zip for z in zipped_codes_nj] # number of zip codes num_zips = len(uz_codes_nj) print(num_zips) # want to use random zip codes to determine trends for the whole state - roughly want 70% num_rand = math.floor(num_zips * .7) # shuffle the zip codes random.seed(10) random.shuffle(uz_codes_nj) # extract the first 70% zips -- these are now the random zip codes we will collect info on good_zips = uz_codes_nj[0:num_rand] # these zip codes gave problems when trying to get data - remove them good_zips.remove('08017') good_zips.remove('08370') good_zips.remove('07427')
zcdb = ZipCodeDatabase() geolocator = Nominatim() for area_code in area_code_and_place: state = area_code.split("-")[1].split("(")[0].strip() if "DC" in state: state = us.states.lookup("DC").abbr else: state = us.states.lookup(state).abbr city = area_code.split("-")[1].split("(")[1].rstrip(")") city = city.strip() if "," in city: city = city.split(",")[0] if " " in city: if [prefix for prefix in prefixes if prefix in city] == []: city = city.split(" ")[0] if isinstance(zcdb.find_zip(city=city, state=state), list): zip_code = zcdb.find_zip(city=city, state=state)[0] else: zip_code = zcdb.find_zip(city=city, state=state) if zip_code is None: try: zip_code = zcdb.find_zip(state=state)[0] except: if state == "MP": zip_code = edict({ "latitude": 15.200755, "longitude": 145.756952 }) elif state == "GU": zip_code = edict({ "latitude": 13.463345,
zcdb = ZipCodeDatabase() geolocator = Nominatim() for area_code in area_code_and_place: state = area_code.split("-")[1].split("(")[0].strip() if "DC" in state: state = us.states.lookup("DC").abbr else: state = us.states.lookup(state).abbr city = area_code.split("-")[1].split("(")[1].rstrip(")") city = city.strip() if "," in city: city = city.split(",")[0] if " " in city: if [prefix for prefix in prefixes if prefix in city] == []: city = city.split(" ")[0] if isinstance(zcdb.find_zip(city=city,state=state),list): zip_code = zcdb.find_zip(city=city,state=state)[0] else: zip_code = zcdb.find_zip(city=city,state=state) if zip_code is None: try: zip_code = zcdb.find_zip(state=state)[0] except: if state == "MP": zip_code = edict({ "latitude":15.200755, "longitude":145.756952 }) elif state == "GU": zip_code = edict({ "latitude":13.463345,
from session import db_session from models import Zip from pyzipcode import ZipCodeDatabase zcdb = ZipCodeDatabase() for z in zcdb.find_zip(): zip = Zip(zip_code=z.zip) db_session.add(zip) db_session.commit()
flist=str(sys.argv) tfile= flist[12:len(flist)-2] #f = open(tfile, 'r') f = open("sent9.txt", 'r') g = open("t9.txt", 'w') zcdb = ZipCodeDatabase() c=0 for line in f: fcoords=() tweet = line.split("|") coords = re.search(r"\[(.*)\]", tweet[0]).group(1) x, y = map(float, re.findall(r'[+-]?[0-9.]+', coords)) location = rg.search([x,y]) if location[0]['cc'] == "US": state = location[0]['admin1'] city = location[0]['name'] zlist=zcdb.find_zip(city=city) if zlist>0: zipcode = random.choice(zlist) s = tweet[-1].strip('\n')+","+zipcode.zip+","+state+"\n" # print s.encode("UTF-8") g.write(s.encode('utf8')) c+=1 if c>100: g.flush() c=0 f.close() g.close()
def get_dispensary_list(location): global driverpath, chrome_options, dispensaries_site_url url_location = location.lower() url_locaton = url_location.replace(' ', '-') driver = webdriver.Chrome(driverpath, options=chrome_options) city = location.split('/')[1] zcdb = ZipCodeDatabase() zip_list = zcdb.find_zip(city=city) if len(zip_list): zip_code = zip_list[0].zip else: print(" Can't find zip code of given city!") zip_code = "" print( f"---------------- {location} - start --------------------------------" ) dispensary_list = [] page_index = 1 while (1): page_url = dispensaries_site_url + url_location + "?page=" + str( page_index) driver.get(page_url) time.sleep(1) containers = driver.find_elements_by_class_name( 'map-listings-list__ListWrapper-sc-1ynfzzj-0') if containers: drawer = containers[0] data_lists = drawer.find_elements_by_class_name( 'styled-components__Main-sc-1e5myvf-6') if data_lists: for every_data in data_lists: title = rating = helper = "" title_element = every_data.find_element_by_class_name( "base-card__Title-sc-1fhygl1-4") try: rating_element = every_data.find_element_by_class_name( "rating__RatingValue-sc-12pds58-1") couting_element = every_data.find_element_by_class_name( "rating__Count-sc-12pds58-2") except: rating_element = [] helper_element = every_data.find_element_by_class_name( "base-card__Helper-sc-1fhygl1-5") if (title_element): title = title_element.text if (rating_element): rating = rating_element.text + couting_element.text if (helper_element): helper = helper_element.text if (title != ""): data_json = { "title": title, "rating": rating, "helper": helper, "location": location, "zip code": zip_code } dispensary_list.append(data_json) print(" ", data_json) page_index += 1 else: print(" Sorry, can't find list in this URL ", page_url) break newlist = sorted(dispensary_list, key=itemgetter('title')) print( f"---------------- {location} - End--------------------------------") return newlist
def search(input): #initializing for the for loop @ line 25 try: events = [] try: #testing if it's an int and if it's length is 5 (zip code reqs) z = int(input) if (len(input) == 5): #use jambase.py to get the events in this zipcode in a list #event list should look like[ [eventname1, eventid1, eventlocation1], ...] events = jambase.eventsHelp(input, None, 0) return [events, None, input] except ValueError: zcdb = ZipCodeDatabase() #parse yahoo geo.places api GET api here, to see if it's actually a location parsedInput = input.replace(" ", "+") rawData = urllib2.urlopen( "http://query.yahooapis.com/v1/public/yql?q=select%20%2a%20from%20geo.places%20where%20text='" + parsedInput + "'&format=json").read() data = json.loads(rawData) #check if any places with this name exist if (data['query']['results'] == None): #check if artist exists if (jambase.artistExists(input) == False): return [ "Error: Not found. Please search an artist OR a location, not both.", True ] else: artists = jambase.eventsHelp(None, input, 0) return [None, artists, input] elif (isinstance(data['query']['results']['place'], list)): if (data['query']['results']['place'][0]['country']['content'] != "United States"): if (jambase.artistExists(input) == False): return [ "All locations must be within the United States.", False ] else: artists = jambase.eventsHelp(None, input, 0) return [None, artists, input] elif (not isinstance(data['query']['results']['place'], list)): if (data['query']['results']['place']['country']['content'] != "United States"): if (jambase.artistExists(input) == False): return [ "All locations must be within the United States.", False ] else: artists = jambase.eventsHelp(None, input, 0) return [None, artists, input] else: #yahoo location will get the city where their place is placeCity = data['query']['results']['place'][0]['locality1'][ 'content'] z = zcdb.find_zip(city=placeCity) #50 radius events = jambase.eventsHelp(z[len(z) / 2].zip, None, 50) #returning the final rendertemplate, either eventList or artistList can be null. return [events, None, input] except urllib2.HTTPError: return ["Too many API requests, please try again in a day.", True]
from pyzipcode import ZipCodeDatabase from scipy.spatial import distance import pandas as pd zcdb = ZipCodeDatabase() sf_zipcodes = zcdb.find_zip(city="San Francisco", state="CA") # print(len(sf_zipcodes)) def long_lat_to_zipcode(input_longitude,input_latitude): """ function to convert latitude and longitude to zipcode: find euclidean distance between user provided latitude and longitude and all latitudes, longitudes from sf_zipcodes and select the row from latter which has minimum distance. then extract its zip code :param input_longitude,input_latitude: latitude and longitude that you want to convert to closest zipcode :return closest_zip: zip code """ closest_zip = None euc_dist = None for i, sf_zipcode in enumerate(sf_zipcodes): # extract latitude and longitude from each row in sf_zipcode lat = sf_zipcode.latitude long = sf_zipcode.longitude # calculate euclidean distance between lat and long from sf_zipcode and input latitude and longitude value euclidean_dist = round( distance.euclidean((long,lat), (input_longitude,input_latitude)), 4) # assign the euclidean distance calculated for first row as euc_dist if i == 0: euc_dist = euclidean_dist
import csv import redfin_scraper as rs from pyzipcode import ZipCodeDatabase from datetime import datetime as dt from selenium.webdriver.chrome.options import Options import os noBrowserUI = True os.environ["DBUS_SESSION_BUS_ADDRESS"] = '/dev/null' zcdb = ZipCodeDatabase() zips = [zc.zip for zc in zcdb.find_zip()] zips = ['01510'] sttm = dt.now().strftime('%Y%m%d-%H%M%S') dataDir = './data' chrome_options = Options() chrome_options.add_extension("./proxy.zip") # chrome_options.add_argument("--load-extension=./proxy.zip") chrome_options.add_argument("--ignore-certificate-errors") # chrome_options.add_argument("--window-size=1024,768") chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--disable-infobars") sttm = dt.now().strftime('%Y%m%d-%H%M%S') with open('not_listed.csv', 'rb') as f: reader = csv.reader(f) not_listed = [zc for zclist in reader for zc in zclist] with open('./processed_zips.csv', 'rb') as f: reader = csv.reader(f) processed = [row[0] for row in reader]