def get_ownership_info(self): print "Owner:", self.cdm.case_list[ -9] # complainant name to search for zip = self.cdm.case_list[-2] # gets complainant zip code search = SearchEngine() data = search.by_zipcode(zip) # Get comp county; if not in Florida (or our list, ask for it manually) if data.state != "FL": print "The complainant lives in %s, not Florida." % data.state zip = self.cdm.case_list[7] # try the respondent instead of comp data = search.by_zipcode(zip) if data.state != "FL": print "The respondent is not in FL, either." county = get_string("In what county is the unit located?") county = county.title().replace("County", "").strip() else: county = data.county.replace("County", "").strip() else: county = data.county.replace("County", "").strip() # Get the list of counties from the AppraiserSites table results = self.dbm.query("SELECT * FROM AppraiserSites") counties = [result[0] for result in results] if county not in counties: print "%s not found in the list of counties." % county county = get_string("In what county is the unit located?") county = county.title().replace("County", "").strip() sql = " SELECT Site From AppraiserSites WHERE County = '%s' " % county results = self.dbm.query(sql) appraiser_site = results.fetchone()[0] wb.open_new_tab(appraiser_site) self.owner = get_bool("Is complainant a unit owner?") self.owner = "unit owner" if self.owner is True else "not unit owner"
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_dir') options = parser.parse_args() dataset_dir = options.data_dir # national plan & provider enumeration system # wget https://data.cms.gov/api/views/85jw-maq9/rows.csv?accessType=DOWNLOAD nppes = pd.read_csv(Path( dataset_dir, 'Medicare_Provider_Utilization_and_Payment_Data__Physician_and_Other_Supplier_PUF_CY2016.csv' ), low_memory=False) columns_int = [ x for x in nppes['Provider Type'].unique() if 'oncology' in str.lower(x) or 'dermatology' in str.lower(x) ] prov_ix = nppes[nppes['Provider Type'] == 'Dermatology'].isin( columns_int).index dermatologies = nppes.loc[prov_ix][[ 'National Provider Identifier', 'Provider Type', 'Last Name/Organization Name of the Provider', 'First Name of the Provider', 'Middle Initial of the Provider', 'Credentials of the Provider', 'Gender of the Provider', 'Street Address 1 of the Provider', 'City of the Provider', 'Zip Code of the Provider', 'State Code of the Provider', 'Country Code of the Provider' ]].drop_duplicates() columns = [x.replace(' ', '_') for x in dermatologies.columns] dermatologies.columns = columns search = SearchEngine() dermatologies['latitude'] = [0] * dermatologies.shape[0] dermatologies['longitude'] = [0] * dermatologies.shape[0] for zipc_ix, zipc in enumerate(dermatologies['Zip_Code_of_the_Provider']): if len(str(zipc)) > 5: zipc = int(str(zipc)[:5]) elif len(str(zipc)) < 5: zipc = np.nan dermatologies.loc[zipc_ix, 'latitude'] = search.by_zipcode(zipc).lat dermatologies.loc[zipc_ix, 'longitude'] = search.by_zipcode(zipc).lng dermatologies.to_csv(Path(dataset_dir, 'Doctors4.csv'), index=False)
def topTenUsers(): pq = PriorityQueue() count = 0 reader = csv.reader(open('../utilities/data.csv', 'r')) data = [] for row in reader: data.append(row) if(row[0] == "User_ID"): pass else: pq.put((-int(row[3]), row)) count += 1 result = {"users" : []} defaultzip = "Brooklyn" for i in range(count): result["users"].append((pq.get())[1]) for i in range(count): zipcode = result["users"][i][4] search = SearchEngine(simple_zipcode=True) zipSearch = search.by_zipcode(zipcode) if zipSearch.major_city != None: result["users"][i][4] = zipSearch.major_city else: result["users"][i][4] = "Brooklyn" return json.dumps(result)
def getCovidData(zipcode): try: search = SearchEngine(simple_zipcode=True) zipcode = search.by_zipcode(str(zipcode)) zipcode = zipcode.to_dict() state = states[zipcode["state"]].lower() county = zipcode["county"].replace(" County", "").lower() url = "https://covid-api.onrender.com/get?state={state}&county={county}".format( state=state, county=county) response = requests.get(url) response_dict = json.loads(response.text) if str(response_dict['Recovered']) == '0': msg = "COVID UPDATE " + state.upper() + ", " + county.upper() + " COUNTY. DATE: " + str(response_dict['Date']) + "\n" \ "There are " + str(response_dict['Confirmed']) + " confirmed cases.\n" + \ "There are " + \ str(response_dict['Deaths']) + " confirmed deaths." else: msg = "COVID UPDATE " + state.upper() + ", " + county.upper() + " COUNTY. DATE: " + str(response_dict['Date']) + "\n" \ "There are " + str(response_dict['Confirmed']) + " confirmed cases.\n" + \ "There are " + str(response_dict['Deaths']) + " confirmed deaths.\n" + \ "There are " + \ str(response_dict['Recovered']) + " confirmed recoveries." return msg except: return "error"
def execute(self, data: pd.DataFrame): if self.command in self.valid_commands: organized_dataframe: pd.DataFrame = data # Doing a query is for filtering based on row values like location query = "" searched_by_zip = False # Doing a group by is for filtering based on a column heading like # of male students sort_by = [] for item in self.modifiers: # All modifiers below are for the query function if item.name == 'locationZip': search = SearchEngine(simple_zipcode=True) zipcode = search.by_zipcode(item.value) query += f'locationState == "{zipcode.state_long.lower()}" and ' \ f'locationCity == "{zipcode.major_city.lower()}"' searched_by_zip = True elif item.name == 'locationCity' or item.name == 'locationState': query += f'{item.name} == "{item.value}" and ' print(query) # All modifiers below are for the sort_values function elif item.name == 'minority': sort_by.extend([ 'American Indian/Alaska Native Students', 'Asian or Asian/Pacific Islander Students', 'Hispanic Students', 'Black Students', 'Hawaiian Nat./Pacific Isl. Students', 'Free & Reduced Lunch Students' ]) elif item.name == 'sex': if item.value == 'Male': sort_by.extend(['Male Students']) else: sort_by.extend(['Female Students']) elif item.name == 'free&reducedLunch': sort_by.extend(['Free & Reduced Lunch Students' ]) if item.value else item.value if item.name == 'hasWebsite': if item.value == 'true': organized_dataframe = organized_dataframe.dropna( subset=['Web Site URL']) else: organized_dataframe = organized_dataframe[ organized_dataframe['Web Site URL'].isnull()] # Only query if there are actual modifiers given by user if len(query) >= 1: new_query = query if not searched_by_zip: new_query = query[:len(query) - 5] print(new_query) organized_dataframe = organized_dataframe.query(new_query) if len(sort_by) >= 1: organized_dataframe = organized_dataframe.sort_values( by=sort_by, ascending=False, ignore_index=True) # Only sort_values if there are actual modifier values given by user elif len(sort_by) >= 1: organized_dataframe = organized_dataframe.sort_values( by=sort_by, ascending=False, ignore_index=True) print(organized_dataframe.iloc[self.row_start:self.row_end, :])
def test_zipcode_info(zipcodes): df = zipcodes df_comp = df.copy() searcher = SearchEngine(simple_zipcode=True) df_comp['state'] = '' df_comp['county'] = '' df_comp['city'] = '' df_comp['lat'] = '' df_comp['lng'] = '' df_comp['timezone'] = '' for zipcode in df_comp['zip_code'].unique(): zip_search = searcher.by_zipcode(zipcode) df_comp.loc[df_comp['zip_code'] == zipcode, 'city'] = zip_search.major_city df_comp.loc[df_comp['zip_code'] == zipcode, 'county'] = zip_search.county df_comp.loc[df_comp['zip_code'] == zipcode, 'lat'] = zip_search.lat df_comp.loc[df_comp['zip_code'] == zipcode, 'lng'] = zip_search.lng df_comp.loc[df_comp['zip_code'] == zipcode, 'state'] = zip_search.state df_comp.loc[df_comp['zip_code'] == zipcode, 'timezone'] = zip_search.timezone zip_info = ZipCodeInfo.generate_feature(df, 'zip_code') assert zip_info.equals(df_comp)
def validate_location(self, field): """Check that the location given is valid using uszipcode module.""" search = SearchEngine(simple_zipcode=True) z = search.by_zipcode(field.data) if z.zipcode is None: raise ValidationError('Invalid ZIP code.')
def housing_query(zip_code): search = SearchEngine(simple_zipcode=False) zipcode = search.by_zipcode(zip_code) zip_dict = zipcode.to_dict() # to dict # zip_code = json.dumps(zip_dict) # load_zip = json.loads(zip_code) return jsonify(zip_dict)
def zipcode(): search = SearchEngine() code = input("Please enter your five digit zipcode: ") zipcode = search.by_zipcode(code) print("We will be searching for jobs in " + zipcode.county + ", " + zipcode.state) return zipcode
def is_valid(int_zip): search = SearchEngine() zipcode = search.by_zipcode(int(int_zip)) if zipcode.zipcode and zipcode.lat and zipcode.lng: return True else: return False
def get_income_df(df): """ Attach zip code median income data to dataframe In: DataFrame Out: DataFrame w/ Median Income column """ search = SearchEngine() zipcodes = list(df['ZIPCODE'].dropna().unique()) zipcodes = list(map(int, zipcodes)) zipcodes = sorted(zipcodes) zipcodes.remove(12345) zipcodes.remove(30339) zipdf = pd.DataFrame(zipcodes) income = [] for zip_ in zipcodes: tosearch = search.by_zipcode(zip_) income.append(tosearch.median_household_income) zipdf['Median_Income'] = income zipdf.set_index(0, inplace=True) df.ZIPCODE = df.ZIPCODE.fillna(0) df = df.astype({'ZIPCODE': int}) trim_inc = df.join(zipdf, on='ZIPCODE', how='left') return trim_inc
def validate_zip_city_pfds(pfds_file_name): correct = 0 incorrect = 0 code_city_dict = dict() code_city_dict.clear() search = SearchEngine(simple_zipcode=False) with open(pfds_file_name, "r") as f: lines = f.readlines() for line in lines: if line.startswith("==") or line.startswith("Coverage"): continue else: s1 = line.rsplit('::', 1) s2 = re.split('(\d*\.\d+|\W)', s1[0]) ss = [t for t in s2 if len(t) > 0] city1 = s1[0].rsplit('\'', 1) rec_city = city1[1] for ii in ss: if ii.isspace(): continue if (ii[0].isdigit()): Zip = ii break zipcode = search.by_zipcode(Zip) true_city = zipcode.major_city if not (true_city.lower() == rec_city.lower()): print(Zip, rec_city, "|=", true_city) incorrect += 1 else: correct += 1 error_rate = incorrect / (correct + incorrect) * 100 accuracy = 100 - error_rate print("Correct = ", correct, "\tIncorrect = ", incorrect, "error rate = {0:.2f}".format(error_rate) + "%", "\tAccuracy = {0:.2f}".format(accuracy), "%")
def generate_feature(cls, data, column=None, **kwargs): """ :param data: dataframe containing zip codes :param column: column label containing zip codes :param kwargs: ignored :return: dataframe with new columns for county, city, latitude and longitude """ if column is None: raise ValueError('zipcode column must be given') zip_searcher = SearchEngine(simple_zipcode=True) data['state'] = '' data['county'] = '' data['city'] = '' data['lat'] = '' data['lng'] = '' data['timezone'] = '' for zipcode in data[column].unique(): zip_search = zip_searcher.by_zipcode(zipcode) data.loc[data[column] == zipcode, 'city'] = zip_search.major_city data.loc[data[column] == zipcode, 'county'] = zip_search.county data.loc[data[column] == zipcode, 'lat'] = zip_search.lat data.loc[data[column] == zipcode, 'lng'] = zip_search.lng data.loc[data[column] == zipcode, 'state'] = zip_search.state data.loc[data[column] == zipcode, 'timezone'] = zip_search.timezone return data
def zip_stats(zipcodes, minimum=0, maximum=5000000, simple=True): """ Lookup median home values for zipcodes or return zip codes of a min and max median home value #TODO: add input options for city state county #TODO: add input options for other keywords besides median home val *Prerequisites: USZIPCODE() pypi package is a required dependency **ARGS zipcodes: dataframe or array of strings (zipcodes) > Example1: zipcodes=df[zipcode'] > Example2: zipcodes=['01267','90025'] minimum: integer for dollar amount min threshold (default is 0) maximum: integer for dollar amount max threshold (default is 5000000, i.e. no maximum) **KWARGS simple: default=True > set simple_zipcode=False to use rich info database (will only apply once TODOs above are added) """ # pypi package for retrieving information based on us zipcodes import uszipcode from uszipcode import SearchEngine # set simple_zipcode=False to use rich info database if simple: search = SearchEngine(simple_zipcode=True) else: search = SearchEngine(simple_zipcode=False) # create empty dictionary dzip = {} # search pypi uszipcode library to retrieve data for each zipcode for code in zipcodes: z = search.by_zipcode(code) dzip[code] = z.to_dict() keyword = 'median_home_value' # # pull just the median home values from dataset and append to list # create empty lists for keys and vals keys = [] zips = [] for index in dzip: keys.append(dzip[index][keyword]) # put zipcodes in other list for index in dzip: zips.append(dzip[index]['zipcode']) # zip both lists into dictionary zipkey = dict(zip(zips, keys)) zipvals = {} for k, v in zipkey.items(): if v > minimum and v < maximum: zipvals[k] = v return zipvals
def get_lat_lon_by_postalcode_country(self, postal_code, country='US', return_result_object=False, db_file_dir=None): if country == "US": if db_file_dir: search = SearchEngine(simple_zipcode=True, db_file_dir=db_file_dir) else: search = SearchEngine(simple_zipcode=True) zipcode = search.by_zipcode(postal_code) if zipcode.lat is None or zipcode.lng is None: raise InvalidZipCodeError('Invalid ZIP Code') if return_result_object: return zipcode.lat, zipcode.lng, zipcode return zipcode.lat, zipcode.lng else: nomi = pgeocode.Nominatim(country) query_results = nomi.query_postal_code(postal_code) if math.isnan(query_results.latitude) or math.isnan( query_results.longitude): raise InvalidZipCodeError('Invalid ZIP Code') if return_result_object: return query_results.latitude, query_results.longitude, query_results return query_results.latitude, query_results.longitude
def inference(self, zip_code, population_density=0, median_home_value=0): pd = population_density mhv = median_home_value zp = int(zip_code) print(zp) if pd + mhv == 0: search = SearchEngine(simple_zipcode=False) zip_dct = search.by_zipcode(zp).to_dict() pd = zip_dct['population_density'] mhv = zip_dct['median_home_value'] print(mhv) print(pd) inp = np.asarray([float(mhv), float(zip_code), float(pd)]) print(inp) print(self.reg_coef) result = 0 # self.reg_coef= map(lambda x: float(x),self.reg_coef) # inp = np.asarray([[mhv, zip_code, pd]]) # print(inp) # result = self.reg.predict(inp)[0] try: result = np.dot(self.reg_coef, inp) + self.reg_intercept print(result) except: print('failed') print(result) return result
def get(event, context): try: req_zipcode = event['pathParameters']['id'] zipcode_db_dir = os.path.join(os.path.dirname(__file__), ".uszipcode") search = SearchEngine(simple_zipcode=True, db_file_dir=zipcode_db_dir) zipcode = search.by_zipcode(req_zipcode) search.close() if zipcode.to_dict()['zipcode'] is not None: rlt = zipcode.to_dict() print(rlt) else: rlt = '' statusCode = 200 except Exception as e: print('[Error] ' + str(e)) statusCode = 400 rlt = '' response = { "statusCode": statusCode, "body": json.dumps(rlt) } return response
def order(store): """ Lambda quantifier --- store: (arr) cleaned --- return: (num) death + cases """ # extract location data address = store[2].split(", ") zip = address[3] state = address[2] # lookup county search = SearchEngine(simple_zipcode=True) county = search.by_zipcode(zip).to_dict()['county'] # invalid names if ' County' in county: county = county.replace(' County', '') if county == "New York": county = "New York City" cases = CASES.get_cases(county, state) cases = int(cases[0][4]) + int(cases[0][5]) return cases
def find_all_valid_zipcode(): zipsearch = SearchEngine(simple_zipcode=True) l = [] for i in range(500, 100000): temp = zipsearch.by_zipcode(str(i).zfill(5)) if temp.zipcode: l.append(temp.zipcode) return l
def get_population_df(zipcodes): search = SearchEngine(simple_zipcode=False) #feature_df= pd.DataFrame() age_df = pd.DataFrame() race_df = pd.DataFrame() householdincome_df = pd.DataFrame() for z in zipcodes: result = search.by_zipcode(z) if result.population_by_age: a = pd.DataFrame( result.population_by_age[2]['values']).set_index('x').T a['zipcode'] = z age_df = pd.concat([age_df, a]) if result.population_by_race: r = pd.DataFrame( result.population_by_race[0]['values']).set_index('x').T r['zipcode'] = z race_df = pd.concat([race_df, r]) if result.household_income: h = pd.DataFrame( result.household_income[0]['values']).set_index('x').T h['zipcode'] = z householdincome_df = pd.concat([householdincome_df, h]) return age_df, race_df, householdincome_df
def search_by_zipcode(self, category='Confirmed', zipcode="21029", radius=50): # df_conf = self.ds.dataSet[category] # date_cols = [c for c in df_conf.columns if '2020-' in c] search = SearchEngine() zipinfo = search.by_zipcode(zipcode) nei = search.by_coordinates(zipinfo.lat, zipinfo.lng, radius, sort_by='dist', ascending=True, returns=100) nei = list(set([(n.county, n.state) for n in nei])) nei_rec = [] for neib in nei: try: county = neib[0] if 'County' in county: county = county.split('County')[0].strip() state = us.states.lookup(neib[1]).name nei_rec.append((county, state)) # df_local = df_conf[(df_conf['County_Name']==county)&(df_conf['State_Name']==state)][date_cols] # if df_local.shape[0] > 0 and df_local.iloc[0,-1] > 0: # nei_rec['{},{}'.format(county,state)] = {'category':category,} except: pass return nei_rec # return a list of (county, state)
def retrieve_zips(col): search = SearchEngine(simple_zipcode=True) states = [] for zipc in users['ZipCode']: zipcode = search.by_zipcode(zipc) state = zipcode.state states = np.append(states, state) return states
def findState(df): search = SearchEngine() for index, row in df.iterrows(): view = row['view'] zipcode = row['zipcode'] zipcode = search.by_zipcode(zipcode) print(str(zipcode)) print("")
def zip_code_distance_calc(starting_point, hospital_location, amount_of_beds, acceptable_distance=40.0): print(hospital_location) search = SearchEngine(simple_zipcode=True) zip1 = search.by_zipcode(starting_point) lat1 = zip1.lat long1 = zip1.lng zip2 = search.by_zipcode(hospital_location) lat2 = zip2.lat long2 = zip2.lng dist = round(mpu.haversine_distance((lat1, long1), (lat2, long2))) count = [0, 0] if acceptable_distance > dist: pass else: count[0] = int(count[0]) + 1 count[1] = int(count[1]) + int(amount_of_beds) return count
def find_zipcode_in_major_states(): major_states = ['NY', 'CA', 'WA', 'IL', 'DC', 'TX', 'GA', 'VA'] zipsearch = SearchEngine(simple_zipcode=False) l = [] for i in range(500, 100000): temp = zipsearch.by_zipcode(str(i).zfill(5)) if temp.zipcode and temp.state in major_states: l.append(temp.zipcode) return l
def is_valid_zip(zip_code: int): """See if the given value is a valid US zip code.""" search = SearchEngine(simple_zipcode=True) origin_zip = search.by_zipcode(zip_code) if origin_zip.zipcode is None: return None return (origin_zip.lat, origin_zip.lng)
def get_lat_long(apps, schema_editor): Places = apps.get_model('webmanager', 'Place') search = SearchEngine(simple_zipcode=True) for place in Places.objects.all(): zipcode = search.by_zipcode(place.zip_code) place.lng = zipcode.lng place.lat = zipcode.lat place.save(update_fields=['lat', 'lng'])
def states(): zipcode = request.args.get('zip') statesvalues = pd.read_csv('stateslived.csv').to_dict('records') if zipcode != '': search = SearchEngine(simple_zipcode=True) state = state_names[search.by_zipcode(int(zipcode)).state] statesvalues.append({'state':state,'visited':1}) return json.dumps(statesvalues)
def get_location_by_zip_code(zip_code): search = SearchEngine() zip_data = search.by_zipcode(zip_code) if zip_data: zc = zip_data.to_dict() return f"{zc['lat']}, {zc['lng']}" else: return None
def get_distance_in_miles(home_team_zip, away_team_zip): #for extensive list of zipcodes, set simple_zipcode=False search = SearchEngine(simple_zipcode=True) zip1 = search.by_zipcode(home_team_zip) zip2 = search.by_zipcode(away_team_zip) return round(mpu.haversine_distance((zip1.lat, zip1.lng), (zip2.lat, zip2.lng)), 2)