def callCensusSingleGeocoder(df): print(datetime.datetime.now(), ' Geocoding addresses with Census Single Geocoder') t7 = datetime.datetime.now() try: #cg.address(street, city, state, zipcode) # df = pd.read_csv(filename, names=['AddressID', 'StreetAddress', 'CITY','STATE', 'ZIP5']) # using dataframe of all addresses for j, row in df.iterrows(): if df.loc[j, 'match']: pass else: df.at[j, 'parsed'] = '-' #payload=" '{}' ".format(df_temp.loc[j, 'address']) #cg.onelineaddress('1600 Pennsylvania Avenue, Washington, DC', returntype='locations') g = cg.onelineaddress(df.loc[j, 'address'], returntype='locations') #g = cg.onelineaddress(payload) if len(g) == 0: pass else: df.at[j, 'lat'] = g[0]['coordinates']['y'] df.at[j, 'lon'] = g[0]['coordinates']['x'] df.at[j, 'geocoder'] = 'Census Single' df.at[j, 'parsed'] = g[0]['matchedAddress'] except Exception: print('Something went wrong') else: pass # name, ext = os.path.splitext(filename) # folder = r'\\fs-sea-1\Protection_Data_Files\Projects\19_022_NFIRS_EDA\IL\geocoded_results' # parts = filename.split("\\") # newpath = os.path.join(folder, parts[-1]) # name, ext = os.path.splitext(newpath) # df.to_csv(name + '_bulk_single' + ext, header=True, index=False, float_format="%.6f") df.to_csv( r'\\fs-sea-1\Protection_Data_Files\Projects\19_022_NFIRS_EDA\IL\geocoded_results\before_osm_bing.csv' ) print(datetime.datetime.now(), ' Finished geocoding') t8 = datetime.datetime.now() return t7, t8, df
def get_census_tract_by_address(self, address, verbose=True): """ find the census tract to which a given point defined by their longitude and latitude belongs. :param longitude: float :param latitude: float :param: verbose: boolean -> whether to print detailed outputs as the program runs :return: matched_dict: dictionary with four keys: - census_block_id - census_tract_id - county_id - state_id """ geocoded_result = None repeated_trial = 0 while geocoded_result is None: # repeatly calling the Census API until the program gets the right return repeated_trial += 1 if repeated_trial >= 3: return None try: one_line_address = ", ".join([address, self.city.title(), self.city_to_state[self.city]]) geocoded_result = cg.onelineaddress(one_line_address) if len(geocoded_result) == 0: geocoded_result = None except ValueError: time.sleep(random.random()) except KeyError: time.sleep(random.random()) assert len(geocoded_result) longitude = list(geocoded_result)[0]['coordinates']['x'] latitude = list(geocoded_result)[0]['coordinates']['y'] census_block_id = list(geocoded_result)[0]['geographies']['2010 Census Blocks'][0]['GEOID'] census_tract_id = census_block_id[:-4] county_id = census_block_id[:5] state_id = census_block_id[:2] matched_dict = { 'longitude': longitude, 'latitude': latitude, 'census_block_id': census_block_id, 'census_tract_id': census_tract_id, 'county_id': county_id, 'state_id': state_id } if verbose: pprint(matched_dict) return matched_dict
def get_block_groups_study_area(self): addr_census_result = cg.onelineaddress(self.addr) # get coordinates of addr addr_point = Point(addr_census_result[0]['coordinates']['x'], addr_census_result[0]['coordinates']['y']) # get study area pluto data study_area = addr_point.buffer(self.radius / 69) study_area_polygon = Polygon(study_area.exterior.coords) # Load census block groups shape file block_groups = gpd.read_file('data/ny_block_groups/tl_2015_36_bg.shp') return block_groups[block_groups.apply( lambda row: study_area_polygon.contains(row['geometry'].centroid), axis=1)]
def main(): arguments = docopt(__doc__) conn, cur = get_cursor() try: coordinates = cg.onelineaddress( arguments['<address>'])[0]['coordinates'] except IndexError: print() print("Address \"%s\" could not be found!" % arguments['<address>']) print() return if arguments['submit']: submit(arguments['<address>'], arguments['<review>'], coordinates, cur) if arguments['find']: find(arguments['<address>'], coordinates, int(arguments['--m']), cur) conn.commit() cur.close() conn.close()
csv_reader = csv.reader(csv_input, delimiter=',') csv_writer = csv.writer(csv_output, delimiter='|', lineterminator='\n') csv_writer.writerow(['address'] + ['longitude'] + ['latitude'] + ['id']) line_count = 0 for row in csv_reader: if line_count == 0: line_count += 1 continue else: # make address from relevant columns in csv (will vary depending on input format) address = row[7] + ', ' + row[8] + ', ' + row[9] + ' ' + row[12] # geocode using Census; if match, write to csv try: address_geocode = cg.onelineaddress(address) if len(address) > 0: # there is a match csv_writer.writerow( [address] + [address_geocode[0]['coordinates']['x']] + [address_geocode[0]['coordinates']['y']] + [line_count]) else: # address not matched addwriter.writerow([address] + ['-99'] + ['-99'] + [line_count]) except: print 'geocode failed for ' + address # address not matched addwriter.writerow([address] + ['-99'] + ['-99'] + [line_count])
def predict(): # request the text from the form, aggregate and code as needed loan_amount = int(request.form['loan_amount']) down_payment = int(request.form['down_payment']) loan_to_value_ratio = ((loan_amount - down_payment)/loan_amount)*100 loan_term = int(request.form['loan_term']) * 12 income = int(request.form['income']) monthly_debt = int(request.form['monthly_debt']) debt_to_income_ratio = (monthly_debt/(income/12))*100 # Put DIR in bin if debt_to_income_ratio < 20: debt_to_income_ratio = 15 if debt_to_income_ratio in range(20,30): debt_to_income_ratio = 25 elif debt_to_income_ratio in range(30,36): debt_to_income_ratio = 33 elif debt_to_income_ratio in range(50,60): debt_to_income_ratio = 55 else: debt_to_income_ratio = debt_to_income_ratio applicant_age = request.form['age'] # Convert address to Census Tract Number, then bin into Census category address = request.form['address'] address = cg.onelineaddress(address, returntype='geographies') address = address[0].get('geographies') census_tracts = address.get('Census Tracts')[0] state = census_tracts.get('STATE') county = census_tracts.get('COUNTY') tract = census_tracts.get('TRACT') tract_category = FIPS_dict.get(str(state+county+tract)) #Convert census_tract to dummy list for model census_lst = [0,0,0,0,0,0,0,0,0,0] census_lst[tract_category - 1] = 1 conforming_loan_limit = 0 if loan_amount < 548250: conforming_loan_limit = 1 construction_type = int(request.form['construction_type']) ethnicity = request.form['ethnicity'] race = request.form['race'] sex = request.form['sex'] loan_type = request.form['loan_type'] X_user = [] X_user.extend([[loan_amount], [loan_to_value_ratio], [loan_term], [income/1000], [debt_to_income_ratio], \ [applicant_age], census_lst, [conforming_loan_limit], [construction_type], ethnicity.split(','), \ race.split(','), sex.split(','), loan_type.split(','), [construction_type]]) X_user = list(chain.from_iterable(X_user)) X_user = [int(i) for i in X_user] # predict on the new data model_lst = [boa, wells, chase, USB, LD, fair, cal] bank_lst = ['Bank of America', 'Wells Fargo', 'JPMorgan Chase', 'U.S. Bank', 'Loan Depot', 'Fairway Independent Mortgage', 'Caliber Home Loans'] prob_dict = {} for model, bank in zip(model_lst, bank_lst): y_pred = model.predict_proba(np.array(X_user).reshape(1,-1))[:,1][0] prob_dict[bank] = y_pred # Create a dataframe of all static bank qualities dicts = [boa_dict, wells_dict, chase_dict, USB_dict, LD_dict, cal_dict, fair_dict] bank_performance = pd.DataFrame(dicts, columns=['Lender', 'Likelihood of Approval', 'Certainty of Prediction', 'Approval Threshold', 'Black applicant error rate', 'Asian applicant error rate', 'Hispanic applicant error rate', 'Female applicant error rate']) # Add in predictions for k, v in prob_dict.items(): idx = bank_performance.index[bank_performance['Lender'] == k] bank_performance.at[idx, 'Likelihood of Approval'] = v bank_performance = bank_performance.set_index('Lender').applymap(lambda x: round(x*100,2)) # Add in thresholds, other scores # Convert to HTML table return render_template('view.html',tables=[bank_performance.to_html()], titles = ['Banks and stuff'])
def address_to_census(address, aggregation="blocks", max_requests=100): """ Converts street addresses to the GEOID of the selected aggregation choice Args: address (str): Address should be in the following format "<Street>, <City>, <State>, <Zip>". Addresses are not case sensitive and the spacing between commas and entries do not matter. The State can be given in longform or abbreviated. Examples: "1 Shields Avenue, Davis, CA, 95616", "1 Shields Avenue,Davis,California,95616", "1 shields avenue, davis,ca, 95616" aggregation (str): Census aggregation method: block groups, blocks, tracts Returns: GEOID of selected aggregation """ if pd.isna(address): return address OPTIONS = { "census block groups", "census block group", "block groups", "block group", "census blocks", "census block", "blocks", "block", "census tracts", "census tract", "tracts", "tract" } assert aggregation in OPTIONS, "The selected aggregation is not a valid option. Please select from the 3 possible choices: block groups, blocks, tracts" result = cg.onelineaddress(address, returntype="geographies") if result: geographies = result[0]["geographies"] census_blocks = geographies["2020 Census Blocks"][0] else: geolocator = ArcGIS() g = geolocator.geocode(address) x = g.longitude y = g.latitude result = None # This while loop is meant to deal with errors thrown on portions of the responses from https://geocoding.geo.census.gov/geocoder/ # https://github.com/fitnr/censusgeocode/issues/18 req_counter = 0 while result is None and req_counter < max_requests: try: result = cg.coordinates(x=x, y=y, returntype="geographies") except: pass req_counter += 1 census_blocks = result["2020 Census Blocks"][0] STATE = census_blocks["STATE"] COUNTY = census_blocks["COUNTY"] TRACT = census_blocks["TRACT"] BLOCK_GROUP = census_blocks["BLKGRP"] BLOCK = census_blocks["BLOCK"] if str.lower(aggregation) in { "census block groups", "census block group", "block groups", "block group" }: return STATE + COUNTY + TRACT + BLOCK_GROUP elif str.lower(aggregation) in { "census blocks", "census block", "blocks", "block" }: return STATE + COUNTY + TRACT + BLOCK elif str.lower(aggregation) in { "census tracts", "census tract", "tracts", "tract" }: return STATE + COUNTY + TRACT
(len(crimes18) - len(crimes17)) / len(crimes17) # Percentage change of all crimes data for 43rd Ward in 2017 and 2018 crimes18_tot = crimes[crimes.ward == '43']['2018-01-01':'2018-12-31'] crimes17_tot = crimes[crimes.ward == '43']['2017-01-01':'2017-12-31'] (len(crimes18_tot) - len(crimes17_tot)) / len(crimes17_tot) # ## 3.3 - 3,4 See notebook for writeup # ## 4A # Geocode address import censusgeocode as cg result = cg.onelineaddress('2111 S Michigan Ave, Chicago, IL') loc = result[0]['coordinates'] point = Point(loc['x'], loc['y']) # Find the block group for using the lat, long of the address blocks[blocks.geometry.apply(lambda x: point.within(x))] # Find crime types for this block group and the probabilities call_block = gcrimes[gcrimes.GEOID == '170313301004'] call_types = call_block.groupby('primary_type').size().sort_values(ascending=False).reset_index() call_types.columns = ['crime_type', 'count'] call_types['probability'] = call_types['count'] / len(call_block) call_types # ## 4B
#pip install censusgeocode import censusgeocode as cg res = cg.onelineaddress('1600 Pennsylvania Avenue, Washington, DC') print("censusgeocode") print(res) import geocoder arcgisEncoder = geocoder.arcgis('1600 Pennsylvania Avenue, Washington, DC') print("\ngeocoder - arcgis") print(arcgisEncoder.json) osmEncoder = geocoder.osm('1600 Pennsylvania Avenue, Washington, DC') print("\ngeocoder - osm (open street maps)") print(osmEncoder.json) #$ export GOOGLE_API_KEY=<Secret API Key> #$ export GOOGLE_CLIENT=<Secret Client> #$ export GOOGLE_CLIENT_SECRET=<Secret Client Secret> print("\ngeocoder - google") googleEncoder = geocoder.google('1600 Pennsylvania Avenue, Washington, DC') print(googleEncoder.json)