def generate_feature(cls, data, column=None, **kwargs): """ :param data: dataframe containing zip codes :param column: column label containing zip codes :param kwargs: ignored :return: dataframe with new columns for county, city, latitude and longitude """ if column is None: raise ValueError('zipcode column must be given') zip_searcher = SearchEngine(simple_zipcode=True) data['state'] = '' data['county'] = '' data['city'] = '' data['lat'] = '' data['lng'] = '' data['timezone'] = '' for zipcode in data[column].unique(): zip_search = zip_searcher.by_zipcode(zipcode) data.loc[data[column] == zipcode, 'city'] = zip_search.major_city data.loc[data[column] == zipcode, 'county'] = zip_search.county data.loc[data[column] == zipcode, 'lat'] = zip_search.lat data.loc[data[column] == zipcode, 'lng'] = zip_search.lng data.loc[data[column] == zipcode, 'state'] = zip_search.state data.loc[data[column] == zipcode, 'timezone'] = zip_search.timezone return data
def validate_zip_city_pfds(pfds_file_name): correct = 0 incorrect = 0 code_city_dict = dict() code_city_dict.clear() search = SearchEngine(simple_zipcode=False) with open(pfds_file_name, "r") as f: lines = f.readlines() for line in lines: if line.startswith("==") or line.startswith("Coverage"): continue else: s1 = line.rsplit('::', 1) s2 = re.split('(\d*\.\d+|\W)', s1[0]) ss = [t for t in s2 if len(t) > 0] city1 = s1[0].rsplit('\'', 1) rec_city = city1[1] for ii in ss: if ii.isspace(): continue if (ii[0].isdigit()): Zip = ii break zipcode = search.by_zipcode(Zip) true_city = zipcode.major_city if not (true_city.lower() == rec_city.lower()): print(Zip, rec_city, "|=", true_city) incorrect += 1 else: correct += 1 error_rate = incorrect / (correct + incorrect) * 100 accuracy = 100 - error_rate print("Correct = ", correct, "\tIncorrect = ", incorrect, "error rate = {0:.2f}".format(error_rate) + "%", "\tAccuracy = {0:.2f}".format(accuracy), "%")
def __init__(self, **kwargs): super().__init__(**kwargs) self.transformer_name = self.__class__.__name__ if self.transformer_name.endswith("Transformer"): self.transformer_name = self.transformer_name[:-len("Transformer")] self.search = SearchEngine(simple_zipcode=False)
def getZipcode(city, state): search = SearchEngine() zipSearch = search.by_city_and_state(city, state) zipcode = zipSearch[0] zipcode = zipcode.zipcode return zipcode
def get_population_df(zipcodes): search = SearchEngine(simple_zipcode=False) #feature_df= pd.DataFrame() age_df = pd.DataFrame() race_df = pd.DataFrame() householdincome_df = pd.DataFrame() for z in zipcodes: result = search.by_zipcode(z) if result.population_by_age: a = pd.DataFrame( result.population_by_age[2]['values']).set_index('x').T a['zipcode'] = z age_df = pd.concat([age_df, a]) if result.population_by_race: r = pd.DataFrame( result.population_by_race[0]['values']).set_index('x').T r['zipcode'] = z race_df = pd.concat([race_df, r]) if result.household_income: h = pd.DataFrame( result.household_income[0]['values']).set_index('x').T h['zipcode'] = z householdincome_df = pd.concat([householdincome_df, h]) return age_df, race_df, householdincome_df
def main(): state2 = input("What" + '\x1b[1;31;40m' + ' state ' + '\x1b[0m' + "do you want the temperature of?\n") city2 = input("What" + '\x1b[1;31;40m' + ' city ' + '\x1b[0m' + "do you want the temperature of?\n") search = SearchEngine(simple_zipcode=True) res = search.by_city_and_state(city2, state2, zipcode_type='Standard', sort_by='zipcode', ascending=True, returns=5) len(res) try: zipcode = res[0] except IndexError: print("Please type in a valid USA State/City\n") main() zipcode city = zipcode.major_city state = zipcode.state urlend = zipcode.zipcode URL = 'https://weather.com/weather/today/l/' + urlend page = requests.get(URL) soup = BeautifulSoup(page.content, 'html.parser') temp = soup.find(class_='CurrentConditions--tempValue--3KcTQ').get_text() print('\nThe temperature right now in ' + city + ', ' + state + ' is ' + '\x1b[1;32;40m' + temp + '\x1b[0m' + "\n") main()
def getCovidData(zipcode): try: search = SearchEngine(simple_zipcode=True) zipcode = search.by_zipcode(str(zipcode)) zipcode = zipcode.to_dict() state = states[zipcode["state"]].lower() county = zipcode["county"].replace(" County", "").lower() url = "https://covid-api.onrender.com/get?state={state}&county={county}".format( state=state, county=county) response = requests.get(url) response_dict = json.loads(response.text) if str(response_dict['Recovered']) == '0': msg = "COVID UPDATE " + state.upper() + ", " + county.upper() + " COUNTY. DATE: " + str(response_dict['Date']) + "\n" \ "There are " + str(response_dict['Confirmed']) + " confirmed cases.\n" + \ "There are " + \ str(response_dict['Deaths']) + " confirmed deaths." else: msg = "COVID UPDATE " + state.upper() + ", " + county.upper() + " COUNTY. DATE: " + str(response_dict['Date']) + "\n" \ "There are " + str(response_dict['Confirmed']) + " confirmed cases.\n" + \ "There are " + str(response_dict['Deaths']) + " confirmed deaths.\n" + \ "There are " + \ str(response_dict['Recovered']) + " confirmed recoveries." return msg except: return "error"
def housing_query(zip_code): search = SearchEngine(simple_zipcode=False) zipcode = search.by_zipcode(zip_code) zip_dict = zipcode.to_dict() # to dict # zip_code = json.dumps(zip_dict) # load_zip = json.loads(zip_code) return jsonify(zip_dict)
def __init__(self, region, county_info, redfin_cookies, redfin_headers, redfin_params, interest_rate, borrowing_pct, mortgage_term_years, insurance_cost): Scrape.__init__(self) self.region = region self.insurance_cost = insurance_cost self.county_info = county_info self.interest_rate = interest_rate self.borrowing_pct = borrowing_pct self.redfin_headers = redfin_headers self.redfin_params = redfin_params self.redfin_cookies = redfin_cookies self.mortgage_term_years = mortgage_term_years self.housing_data = {} self.data = [] self.exception_counties = { "King County": "Kings County", } self.search = SearchEngine(simple_zipcode=True) self.air_dna_headers = { 'Sec-Fetch-Mode': 'cors', 'Referer': 'https://www.airdna.co/vacation-rental-data/app/us/california/union-city/rentalizer', 'Origin': 'https://www.airdna.co', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36', 'DNT': '1', } self.air_dna_access_token = [ 'MjkxMTI|8b0178bf0e564cbf96fc75b8518a5375', 'ODkwMTc|478ce2c743244a7eb3d1cfddc14909b3', 'MjA2Mjcw|69e663b4c51c4830a8bde0d3355be8ee', 'MjA2Mjcz|e35b14ebfb794d849f9484afcffced1d' ]
def execute(self, data: pd.DataFrame): if self.command in self.valid_commands: organized_dataframe: pd.DataFrame = data # Doing a query is for filtering based on row values like location query = "" searched_by_zip = False # Doing a group by is for filtering based on a column heading like # of male students sort_by = [] for item in self.modifiers: # All modifiers below are for the query function if item.name == 'locationZip': search = SearchEngine(simple_zipcode=True) zipcode = search.by_zipcode(item.value) query += f'locationState == "{zipcode.state_long.lower()}" and ' \ f'locationCity == "{zipcode.major_city.lower()}"' searched_by_zip = True elif item.name == 'locationCity' or item.name == 'locationState': query += f'{item.name} == "{item.value}" and ' print(query) # All modifiers below are for the sort_values function elif item.name == 'minority': sort_by.extend([ 'American Indian/Alaska Native Students', 'Asian or Asian/Pacific Islander Students', 'Hispanic Students', 'Black Students', 'Hawaiian Nat./Pacific Isl. Students', 'Free & Reduced Lunch Students' ]) elif item.name == 'sex': if item.value == 'Male': sort_by.extend(['Male Students']) else: sort_by.extend(['Female Students']) elif item.name == 'free&reducedLunch': sort_by.extend(['Free & Reduced Lunch Students' ]) if item.value else item.value if item.name == 'hasWebsite': if item.value == 'true': organized_dataframe = organized_dataframe.dropna( subset=['Web Site URL']) else: organized_dataframe = organized_dataframe[ organized_dataframe['Web Site URL'].isnull()] # Only query if there are actual modifiers given by user if len(query) >= 1: new_query = query if not searched_by_zip: new_query = query[:len(query) - 5] print(new_query) organized_dataframe = organized_dataframe.query(new_query) if len(sort_by) >= 1: organized_dataframe = organized_dataframe.sort_values( by=sort_by, ascending=False, ignore_index=True) # Only sort_values if there are actual modifier values given by user elif len(sort_by) >= 1: organized_dataframe = organized_dataframe.sort_values( by=sort_by, ascending=False, ignore_index=True) print(organized_dataframe.iloc[self.row_start:self.row_end, :])
def get_ownership_info(self): print "Owner:", self.cdm.case_list[ -9] # complainant name to search for zip = self.cdm.case_list[-2] # gets complainant zip code search = SearchEngine() data = search.by_zipcode(zip) # Get comp county; if not in Florida (or our list, ask for it manually) if data.state != "FL": print "The complainant lives in %s, not Florida." % data.state zip = self.cdm.case_list[7] # try the respondent instead of comp data = search.by_zipcode(zip) if data.state != "FL": print "The respondent is not in FL, either." county = get_string("In what county is the unit located?") county = county.title().replace("County", "").strip() else: county = data.county.replace("County", "").strip() else: county = data.county.replace("County", "").strip() # Get the list of counties from the AppraiserSites table results = self.dbm.query("SELECT * FROM AppraiserSites") counties = [result[0] for result in results] if county not in counties: print "%s not found in the list of counties." % county county = get_string("In what county is the unit located?") county = county.title().replace("County", "").strip() sql = " SELECT Site From AppraiserSites WHERE County = '%s' " % county results = self.dbm.query(sql) appraiser_site = results.fetchone()[0] wb.open_new_tab(appraiser_site) self.owner = get_bool("Is complainant a unit owner?") self.owner = "unit owner" if self.owner is True else "not unit owner"
def test_zipcode_info(zipcodes): df = zipcodes df_comp = df.copy() searcher = SearchEngine(simple_zipcode=True) df_comp['state'] = '' df_comp['county'] = '' df_comp['city'] = '' df_comp['lat'] = '' df_comp['lng'] = '' df_comp['timezone'] = '' for zipcode in df_comp['zip_code'].unique(): zip_search = searcher.by_zipcode(zipcode) df_comp.loc[df_comp['zip_code'] == zipcode, 'city'] = zip_search.major_city df_comp.loc[df_comp['zip_code'] == zipcode, 'county'] = zip_search.county df_comp.loc[df_comp['zip_code'] == zipcode, 'lat'] = zip_search.lat df_comp.loc[df_comp['zip_code'] == zipcode, 'lng'] = zip_search.lng df_comp.loc[df_comp['zip_code'] == zipcode, 'state'] = zip_search.state df_comp.loc[df_comp['zip_code'] == zipcode, 'timezone'] = zip_search.timezone zip_info = ZipCodeInfo.generate_feature(df, 'zip_code') assert zip_info.equals(df_comp)
def zip_stats(zipcodes, minimum=0, maximum=5000000, simple=True): """ Lookup median home values for zipcodes or return zip codes of a min and max median home value #TODO: add input options for city state county #TODO: add input options for other keywords besides median home val *Prerequisites: USZIPCODE() pypi package is a required dependency **ARGS zipcodes: dataframe or array of strings (zipcodes) > Example1: zipcodes=df[zipcode'] > Example2: zipcodes=['01267','90025'] minimum: integer for dollar amount min threshold (default is 0) maximum: integer for dollar amount max threshold (default is 5000000, i.e. no maximum) **KWARGS simple: default=True > set simple_zipcode=False to use rich info database (will only apply once TODOs above are added) """ # pypi package for retrieving information based on us zipcodes import uszipcode from uszipcode import SearchEngine # set simple_zipcode=False to use rich info database if simple: search = SearchEngine(simple_zipcode=True) else: search = SearchEngine(simple_zipcode=False) # create empty dictionary dzip = {} # search pypi uszipcode library to retrieve data for each zipcode for code in zipcodes: z = search.by_zipcode(code) dzip[code] = z.to_dict() keyword = 'median_home_value' # # pull just the median home values from dataset and append to list # create empty lists for keys and vals keys = [] zips = [] for index in dzip: keys.append(dzip[index][keyword]) # put zipcodes in other list for index in dzip: zips.append(dzip[index]['zipcode']) # zip both lists into dictionary zipkey = dict(zip(zips, keys)) zipvals = {} for k, v in zipkey.items(): if v > minimum and v < maximum: zipvals[k] = v return zipvals
def create_crime_zip_codes(crime_df): ''' Use the uszipcode library to identify a zip code for each unique pair of latitude and longitude coordinates in the Crime Dataset. Merges zip code information back into the Crime Dataset to later join with ACS data. NOTE: the uszipcode library can take a while to run (this is normal) Input: crime_df (dataframe): original crime dataframe Output: crime_df (dataframe): new crime dataframe including zip codes ''' crime_df.loc[:, 'latitude'] = crime_df.latitude.astype(float) crime_df.loc[:, 'longitude'] = crime_df.longitude.astype(float) truncated = crime_df[['block', 'latitude',\ 'longitude']].drop_duplicates(subset=['block']) truncated = truncated.dropna() search = SearchEngine(simple_zipcode=True) truncated['zip_code'] = truncated.apply(lambda x: search.by_coordinates( x['latitude'], x['longitude'])[0].zipcode, axis=1) merged_df = pd.merge(crime_df, truncated, on=['block', 'latitude',\ 'longitude'], how='left') merged_df.loc[:, 'zip_code'] = pd.to_numeric(merged_df['zip_code'], errors='coerce') return merged_df
def topTenUsers(): pq = PriorityQueue() count = 0 reader = csv.reader(open('../utilities/data.csv', 'r')) data = [] for row in reader: data.append(row) if(row[0] == "User_ID"): pass else: pq.put((-int(row[3]), row)) count += 1 result = {"users" : []} defaultzip = "Brooklyn" for i in range(count): result["users"].append((pq.get())[1]) for i in range(count): zipcode = result["users"][i][4] search = SearchEngine(simple_zipcode=True) zipSearch = search.by_zipcode(zipcode) if zipSearch.major_city != None: result["users"][i][4] = zipSearch.major_city else: result["users"][i][4] = "Brooklyn" return json.dumps(result)
def is_valid(int_zip): search = SearchEngine() zipcode = search.by_zipcode(int(int_zip)) if zipcode.zipcode and zipcode.lat and zipcode.lng: return True else: return False
def validate_location(self, field): """Check that the location given is valid using uszipcode module.""" search = SearchEngine(simple_zipcode=True) z = search.by_zipcode(field.data) if z.zipcode is None: raise ValidationError('Invalid ZIP code.')
def get_lat_lon_by_postalcode_country(self, postal_code, country='US', return_result_object=False, db_file_dir=None): if country == "US": if db_file_dir: search = SearchEngine(simple_zipcode=True, db_file_dir=db_file_dir) else: search = SearchEngine(simple_zipcode=True) zipcode = search.by_zipcode(postal_code) if zipcode.lat is None or zipcode.lng is None: raise InvalidZipCodeError('Invalid ZIP Code') if return_result_object: return zipcode.lat, zipcode.lng, zipcode return zipcode.lat, zipcode.lng else: nomi = pgeocode.Nominatim(country) query_results = nomi.query_postal_code(postal_code) if math.isnan(query_results.latitude) or math.isnan( query_results.longitude): raise InvalidZipCodeError('Invalid ZIP Code') if return_result_object: return query_results.latitude, query_results.longitude, query_results return query_results.latitude, query_results.longitude
def get_income_df(df): """ Attach zip code median income data to dataframe In: DataFrame Out: DataFrame w/ Median Income column """ search = SearchEngine() zipcodes = list(df['ZIPCODE'].dropna().unique()) zipcodes = list(map(int, zipcodes)) zipcodes = sorted(zipcodes) zipcodes.remove(12345) zipcodes.remove(30339) zipdf = pd.DataFrame(zipcodes) income = [] for zip_ in zipcodes: tosearch = search.by_zipcode(zip_) income.append(tosearch.median_household_income) zipdf['Median_Income'] = income zipdf.set_index(0, inplace=True) df.ZIPCODE = df.ZIPCODE.fillna(0) df = df.astype({'ZIPCODE': int}) trim_inc = df.join(zipdf, on='ZIPCODE', how='left') return trim_inc
def state_zip_by_popdense(state_str, num_returns): search = SearchEngine() res = search.query(state=state_str, sort_by=Zipcode.population_density, ascending=False, returns=num_returns) return [x.zipcode for x in res]
def inference(self, zip_code, population_density=0, median_home_value=0): pd = population_density mhv = median_home_value zp = int(zip_code) print(zp) if pd + mhv == 0: search = SearchEngine(simple_zipcode=False) zip_dct = search.by_zipcode(zp).to_dict() pd = zip_dct['population_density'] mhv = zip_dct['median_home_value'] print(mhv) print(pd) inp = np.asarray([float(mhv), float(zip_code), float(pd)]) print(inp) print(self.reg_coef) result = 0 # self.reg_coef= map(lambda x: float(x),self.reg_coef) # inp = np.asarray([[mhv, zip_code, pd]]) # print(inp) # result = self.reg.predict(inp)[0] try: result = np.dot(self.reg_coef, inp) + self.reg_intercept print(result) except: print('failed') print(result) return result
def search_by_zipcode(self, category='Confirmed', zipcode="21029", radius=50): # df_conf = self.ds.dataSet[category] # date_cols = [c for c in df_conf.columns if '2020-' in c] search = SearchEngine() zipinfo = search.by_zipcode(zipcode) nei = search.by_coordinates(zipinfo.lat, zipinfo.lng, radius, sort_by='dist', ascending=True, returns=100) nei = list(set([(n.county, n.state) for n in nei])) nei_rec = [] for neib in nei: try: county = neib[0] if 'County' in county: county = county.split('County')[0].strip() state = us.states.lookup(neib[1]).name nei_rec.append((county, state)) # df_local = df_conf[(df_conf['County_Name']==county)&(df_conf['State_Name']==state)][date_cols] # if df_local.shape[0] > 0 and df_local.iloc[0,-1] > 0: # nei_rec['{},{}'.format(county,state)] = {'category':category,} except: pass return nei_rec # return a list of (county, state)
def zipcode(): search = SearchEngine() code = input("Please enter your five digit zipcode: ") zipcode = search.by_zipcode(code) print("We will be searching for jobs in " + zipcode.county + ", " + zipcode.state) return zipcode
def get(event, context): try: req_zipcode = event['pathParameters']['id'] zipcode_db_dir = os.path.join(os.path.dirname(__file__), ".uszipcode") search = SearchEngine(simple_zipcode=True, db_file_dir=zipcode_db_dir) zipcode = search.by_zipcode(req_zipcode) search.close() if zipcode.to_dict()['zipcode'] is not None: rlt = zipcode.to_dict() print(rlt) else: rlt = '' statusCode = 200 except Exception as e: print('[Error] ' + str(e)) statusCode = 400 rlt = '' response = { "statusCode": statusCode, "body": json.dumps(rlt) } return response
def find_all_valid_zipcode(): zipsearch = SearchEngine(simple_zipcode=True) l = [] for i in range(500, 100000): temp = zipsearch.by_zipcode(str(i).zfill(5)) if temp.zipcode: l.append(temp.zipcode) return l
def order(store): """ Lambda quantifier --- store: (arr) cleaned --- return: (num) death + cases """ # extract location data address = store[2].split(", ") zip = address[3] state = address[2] # lookup county search = SearchEngine(simple_zipcode=True) county = search.by_zipcode(zip).to_dict()['county'] # invalid names if ' County' in county: county = county.replace(' County', '') if county == "New York": county = "New York City" cases = CASES.get_cases(county, state) cases = int(cases[0][4]) + int(cases[0][5]) return cases
def retrieve_zips(col): search = SearchEngine(simple_zipcode=True) states = [] for zipc in users['ZipCode']: zipcode = search.by_zipcode(zipc) state = zipcode.state states = np.append(states, state) return states
def findState(df): search = SearchEngine() for index, row in df.iterrows(): view = row['view'] zipcode = row['zipcode'] zipcode = search.by_zipcode(zipcode) print(str(zipcode)) print("")
def find_zipcode_in_major_states(): major_states = ['NY', 'CA', 'WA', 'IL', 'DC', 'TX', 'GA', 'VA'] zipsearch = SearchEngine(simple_zipcode=False) l = [] for i in range(500, 100000): temp = zipsearch.by_zipcode(str(i).zfill(5)) if temp.zipcode and temp.state in major_states: l.append(temp.zipcode) return l
def get_zipcode(self, df): from uszipcode import SearchEngine search = SearchEngine(simple_zipcode=True) zipcode = search.by_coordinates(df['latitude'], df['longitude']) if not zipcode: return None else: return zipcode[0].values()[0]