Ejemplo n.º 1
0
def callCensusSingleGeocoder(df):

    print(datetime.datetime.now(),
          '    Geocoding addresses with Census Single Geocoder')
    t7 = datetime.datetime.now()

    try:
        #cg.address(street, city, state, zipcode)
        #        df = pd.read_csv(filename, names=['AddressID', 'StreetAddress', 'CITY','STATE', 'ZIP5'])

        # using dataframe of all addresses
        for j, row in df.iterrows():
            if df.loc[j, 'match']:
                pass
            else:
                df.at[j, 'parsed'] = '-'
                #payload=" '{}' ".format(df_temp.loc[j, 'address'])
                #cg.onelineaddress('1600 Pennsylvania Avenue, Washington, DC', returntype='locations')
                g = cg.onelineaddress(df.loc[j, 'address'],
                                      returntype='locations')
                #g = cg.onelineaddress(payload)
                if len(g) == 0:
                    pass
                else:
                    df.at[j, 'lat'] = g[0]['coordinates']['y']
                    df.at[j, 'lon'] = g[0]['coordinates']['x']
                    df.at[j, 'geocoder'] = 'Census Single'
                    df.at[j, 'parsed'] = g[0]['matchedAddress']

    except Exception:
        print('Something went wrong')
    else:
        pass

    #    name, ext = os.path.splitext(filename)


#    folder = r'\\fs-sea-1\Protection_Data_Files\Projects\19_022_NFIRS_EDA\IL\geocoded_results'
#    parts = filename.split("\\")
#    newpath = os.path.join(folder, parts[-1])
#    name, ext = os.path.splitext(newpath)

#    df.to_csv(name + '_bulk_single' + ext, header=True, index=False, float_format="%.6f")

    df.to_csv(
        r'\\fs-sea-1\Protection_Data_Files\Projects\19_022_NFIRS_EDA\IL\geocoded_results\before_osm_bing.csv'
    )
    print(datetime.datetime.now(), '    Finished geocoding')

    t8 = datetime.datetime.now()
    return t7, t8, df
    def get_census_tract_by_address(self, address, verbose=True):
        """
        find the census tract to which a given point defined by their
        longitude and latitude belongs.

        :param longitude: float

        :param latitude: float
        :param: verbose: boolean -> whether to print detailed outputs as the program runs
        :return: matched_dict: dictionary with four keys:
                                - census_block_id
                                - census_tract_id
                                - county_id
                                - state_id
        """
        geocoded_result = None
        repeated_trial = 0
        while geocoded_result is None:  # repeatly calling the Census API until the program gets the right return
            repeated_trial += 1
            if repeated_trial >= 3:
                return None
            try:
                one_line_address = ", ".join([address, self.city.title(), self.city_to_state[self.city]])
                geocoded_result = cg.onelineaddress(one_line_address)
                if len(geocoded_result) == 0:
                    geocoded_result = None
            except ValueError:
                time.sleep(random.random())
            except KeyError:
                time.sleep(random.random())
        assert len(geocoded_result)
        longitude = list(geocoded_result)[0]['coordinates']['x']
        latitude = list(geocoded_result)[0]['coordinates']['y']
        census_block_id = list(geocoded_result)[0]['geographies']['2010 Census Blocks'][0]['GEOID']
        census_tract_id = census_block_id[:-4]
        county_id = census_block_id[:5]
        state_id = census_block_id[:2]
        matched_dict = {
            'longitude': longitude,
            'latitude': latitude,
            'census_block_id': census_block_id,
            'census_tract_id': census_tract_id,
            'county_id': county_id,
            'state_id': state_id
        }
        if verbose:
            pprint(matched_dict)
        return matched_dict
Ejemplo n.º 3
0
    def get_block_groups_study_area(self):
        addr_census_result = cg.onelineaddress(self.addr)

        # get coordinates of addr
        addr_point = Point(addr_census_result[0]['coordinates']['x'],
                           addr_census_result[0]['coordinates']['y'])

        # get study area pluto data
        study_area = addr_point.buffer(self.radius / 69)
        study_area_polygon = Polygon(study_area.exterior.coords)

        # Load census block groups shape file
        block_groups = gpd.read_file('data/ny_block_groups/tl_2015_36_bg.shp')

        return block_groups[block_groups.apply(
            lambda row: study_area_polygon.contains(row['geometry'].centroid),
            axis=1)]
Ejemplo n.º 4
0
def main():

    arguments = docopt(__doc__)
    conn, cur = get_cursor()

    try:
        coordinates = cg.onelineaddress(
            arguments['<address>'])[0]['coordinates']
    except IndexError:
        print()
        print("Address \"%s\" could not be found!" % arguments['<address>'])
        print()
        return

    if arguments['submit']:
        submit(arguments['<address>'], arguments['<review>'], coordinates, cur)

    if arguments['find']:
        find(arguments['<address>'], coordinates, int(arguments['--m']), cur)

    conn.commit()
    cur.close()
    conn.close()
Ejemplo n.º 5
0
        csv_reader = csv.reader(csv_input, delimiter=',')
        csv_writer = csv.writer(csv_output, delimiter='|', lineterminator='\n')
        csv_writer.writerow(['address'] + ['longitude'] + ['latitude'] +
                            ['id'])

        line_count = 0
        for row in csv_reader:
            if line_count == 0:
                line_count += 1
                continue
            else:
                # make address from relevant columns in csv (will vary depending on input format)
                address = row[7] + ', ' + row[8] + ', ' + row[9] + ' ' + row[12]
                # geocode using Census; if match, write to csv
                try:
                    address_geocode = cg.onelineaddress(address)
                    if len(address) > 0:  # there is a match
                        csv_writer.writerow(
                            [address] +
                            [address_geocode[0]['coordinates']['x']] +
                            [address_geocode[0]['coordinates']['y']] +
                            [line_count])
                    else:
                        # address not matched
                        addwriter.writerow([address] + ['-99'] + ['-99'] +
                                           [line_count])
                except:
                    print 'geocode failed for ' + address
                    # address not matched
                    addwriter.writerow([address] + ['-99'] + ['-99'] +
                                       [line_count])
Ejemplo n.º 6
0
def predict():
    # request the text from the form, aggregate and code as needed 
    loan_amount = int(request.form['loan_amount'])
    down_payment = int(request.form['down_payment'])
    loan_to_value_ratio = ((loan_amount - down_payment)/loan_amount)*100
    loan_term = int(request.form['loan_term']) * 12
    income = int(request.form['income'])
    monthly_debt = int(request.form['monthly_debt'])
    debt_to_income_ratio = (monthly_debt/(income/12))*100
                                
    # Put DIR in bin
    if debt_to_income_ratio < 20:
        debt_to_income_ratio = 15
    if debt_to_income_ratio in range(20,30): 
        debt_to_income_ratio  = 25
    elif debt_to_income_ratio in range(30,36): 
        debt_to_income_ratio = 33
    elif debt_to_income_ratio in range(50,60):
        debt_to_income_ratio = 55
    else:
        debt_to_income_ratio = debt_to_income_ratio
        
    applicant_age = request.form['age']

    # Convert address to Census Tract Number, then bin into Census category
    address = request.form['address']
    address = cg.onelineaddress(address, returntype='geographies')
    address = address[0].get('geographies')
    census_tracts = address.get('Census Tracts')[0]
    state = census_tracts.get('STATE')
    county = census_tracts.get('COUNTY')
    tract = census_tracts.get('TRACT')
    tract_category = FIPS_dict.get(str(state+county+tract))
    
    #Convert census_tract to dummy list for model
    census_lst = [0,0,0,0,0,0,0,0,0,0]
    census_lst[tract_category - 1] = 1
    
    conforming_loan_limit = 0
    if loan_amount < 548250: 
        conforming_loan_limit = 1
    construction_type = int(request.form['construction_type'])
    ethnicity = request.form['ethnicity']
    race = request.form['race']
    sex = request.form['sex']
    loan_type = request.form['loan_type']
    
    X_user = []
    X_user.extend([[loan_amount], [loan_to_value_ratio], [loan_term], [income/1000], [debt_to_income_ratio], \
                    [applicant_age], census_lst, [conforming_loan_limit], [construction_type], ethnicity.split(','), \
                       race.split(','), sex.split(','), loan_type.split(','), [construction_type]])

    X_user = list(chain.from_iterable(X_user))
    X_user = [int(i) for i in X_user]
   

    # predict on the new data
    model_lst = [boa, wells, chase, USB, LD, fair, cal]
    bank_lst = ['Bank of America', 'Wells Fargo', 'JPMorgan Chase', 'U.S. Bank', 'Loan Depot', 
           'Fairway Independent Mortgage', 'Caliber Home Loans']
    prob_dict = {}
    for model, bank in zip(model_lst, bank_lst):
        y_pred = model.predict_proba(np.array(X_user).reshape(1,-1))[:,1][0]
           
        prob_dict[bank] = y_pred
    
    # Create a dataframe of all static bank qualities
    dicts = [boa_dict, wells_dict, chase_dict, USB_dict, LD_dict, cal_dict, fair_dict]
    bank_performance = pd.DataFrame(dicts, columns=['Lender', 'Likelihood of Approval', 'Certainty of Prediction', 
                                         'Approval Threshold', 'Black applicant error rate', 
                                         'Asian applicant error rate', 'Hispanic applicant error rate', 
                                         'Female applicant error rate'])
    # Add in predictions
    for k, v in prob_dict.items(): 
        idx  = bank_performance.index[bank_performance['Lender'] == k]
        bank_performance.at[idx, 'Likelihood of Approval'] = v
    bank_performance = bank_performance.set_index('Lender').applymap(lambda x: round(x*100,2))
    # Add in thresholds, other scores
    
    # Convert to HTML table
    return render_template('view.html',tables=[bank_performance.to_html()], titles = ['Banks and stuff'])
Ejemplo n.º 7
0
def address_to_census(address, aggregation="blocks", max_requests=100):
    """
    Converts street addresses to the GEOID of the selected aggregation choice

    Args:
        address (str): Address should be in the following format "<Street>, <City>, <State>, <Zip>". 
            Addresses are not case sensitive and the spacing between commas and entries do not matter. The State can be given in longform or abbreviated. 
            Examples: "1 Shields Avenue, Davis, CA, 95616", "1 Shields Avenue,Davis,California,95616", "1 shields avenue, davis,ca,   95616"
        aggregation (str): Census aggregation method: block groups, blocks, tracts

    Returns: 
        GEOID of selected aggregation
    """
    if pd.isna(address):
        return address

    OPTIONS = {
        "census block groups", "census block group", "block groups",
        "block group", "census blocks", "census block", "blocks", "block",
        "census tracts", "census tract", "tracts", "tract"
    }

    assert aggregation in OPTIONS, "The selected aggregation is not a valid option. Please select from the 3 possible choices: block groups, blocks, tracts"

    result = cg.onelineaddress(address, returntype="geographies")

    if result:
        geographies = result[0]["geographies"]
        census_blocks = geographies["2020 Census Blocks"][0]
    else:
        geolocator = ArcGIS()
        g = geolocator.geocode(address)
        x = g.longitude
        y = g.latitude
        result = None
        # This while loop is meant to deal with errors thrown on portions of the responses from https://geocoding.geo.census.gov/geocoder/
        # https://github.com/fitnr/censusgeocode/issues/18
        req_counter = 0
        while result is None and req_counter < max_requests:
            try:
                result = cg.coordinates(x=x, y=y, returntype="geographies")
            except:
                pass
            req_counter += 1
        census_blocks = result["2020 Census Blocks"][0]

    STATE = census_blocks["STATE"]
    COUNTY = census_blocks["COUNTY"]
    TRACT = census_blocks["TRACT"]
    BLOCK_GROUP = census_blocks["BLKGRP"]
    BLOCK = census_blocks["BLOCK"]

    if str.lower(aggregation) in {
            "census block groups", "census block group", "block groups",
            "block group"
    }:
        return STATE + COUNTY + TRACT + BLOCK_GROUP
    elif str.lower(aggregation) in {
            "census blocks", "census block", "blocks", "block"
    }:
        return STATE + COUNTY + TRACT + BLOCK
    elif str.lower(aggregation) in {
            "census tracts", "census tract", "tracts", "tract"
    }:
        return STATE + COUNTY + TRACT
Ejemplo n.º 8
0
(len(crimes18) - len(crimes17)) / len(crimes17) 


# Percentage change of all crimes data for 43rd Ward in 2017 and 2018
crimes18_tot = crimes[crimes.ward == '43']['2018-01-01':'2018-12-31']
crimes17_tot = crimes[crimes.ward == '43']['2017-01-01':'2017-12-31']
(len(crimes18_tot) - len(crimes17_tot)) / len(crimes17_tot) 


# ## 3.3 - 3,4 See notebook for writeup

# ## 4A

# Geocode address
import censusgeocode as cg
result = cg.onelineaddress('2111 S Michigan Ave, Chicago, IL')
loc = result[0]['coordinates']
point = Point(loc['x'], loc['y'])

# Find the block group for using the lat, long of the address
blocks[blocks.geometry.apply(lambda x: point.within(x))]

# Find crime types for this block group and the probabilities
call_block = gcrimes[gcrimes.GEOID == '170313301004']
call_types = call_block.groupby('primary_type').size().sort_values(ascending=False).reset_index()
call_types.columns = ['crime_type', 'count']
call_types['probability'] = call_types['count'] / len(call_block)
call_types


# ## 4B
Ejemplo n.º 9
0
#pip install censusgeocode
import  censusgeocode as cg
res = cg.onelineaddress('1600 Pennsylvania Avenue, Washington, DC')
print("censusgeocode")
print(res)

import geocoder
arcgisEncoder = geocoder.arcgis('1600 Pennsylvania Avenue, Washington, DC')
print("\ngeocoder - arcgis")
print(arcgisEncoder.json)


osmEncoder = geocoder.osm('1600 Pennsylvania Avenue, Washington, DC')
print("\ngeocoder - osm (open street maps)")
print(osmEncoder.json)

#$ export GOOGLE_API_KEY=<Secret API Key>
#$ export GOOGLE_CLIENT=<Secret Client>
#$ export GOOGLE_CLIENT_SECRET=<Secret Client Secret>
print("\ngeocoder - google")
googleEncoder = geocoder.google('1600 Pennsylvania Avenue, Washington, DC')
print(googleEncoder.json)