Example #1
0
def split_geo(df, add, city, state, zipcode, chunk_size=500):
    df_new = df.copy()
    df_new.reset_index(inplace=True)
    splits = np.ceil( df.shape[0]/chunk_size)
    chunk_li = np.array_split(df_new['index'], splits)
    res_li = []
    pick_fi = []
    for i,c in enumerate(chunk_li):
        # Grab data, export to csv
        sub_data = df_new.loc[c, ['index',add,city,state,zipcode]]
        sub_data.to_csv('temp_geo.csv',header=False,index=False)
        # Geo the results and turn back into df
        print(f'Geocoding round {int(i)+1} of {int(splits)}, {datetime.now()}')
        result = cg.addressbatch('temp_geo.csv') #should try/except?
        # May want to dump the intermediate results
        #pi_str = f'pickres_{int(i)}.p'
        #pickle.dump( favorite_color, open( pi_str, "wb" ) )
        #pick_fi.append(pi_str.copy())
        names = list(result[0].keys())
        res_zl = []
        for r in result:
            res_zl.append( list(r.values()) )
        res_df = pd.DataFrame(res_zl, columns=names)
        res_li.append( res_df.copy() )
        time.sleep(10) #sleep 10 seconds to not get cutoff from request
    final_df = pd.concat(res_li)
    final_df.rename(columns={'id':'row'}, inplace=True)
    final_df.reset_index(inplace=True, drop=True)
    # Clean up csv file
    os.remove('temp_geo.csv')
    return final_df
Example #2
0
def callCensusBulkGeocoder(filename):

    print(datetime.datetime.now(), '    Geocoding addresses with Census Bulk')
    t5 = datetime.datetime.now()

    df = pd.read_csv(filename)
    #    name, ext = os.path.splitext(filename)

    folder = r'\\fs-sea-1\Protection_Data_Files\Projects\19_022_NFIRS_EDA\IL\geocoded_results'
    parts = filename.split("\\")
    newpath = os.path.join(folder, parts[-1])
    name, ext = os.path.splitext(newpath)

    try:
        result = cg.addressbatch(filename)
        df = pd.DataFrame(result)

    except Exception:
        print('Something went wrong')
    else:
        pass

    #df.to_csv(name + '_bulk' + ext, header=True, index=False, float_format="%.6f")

    print(datetime.datetime.now(),
          '    Finished geocoding with Census Bulk Geocoder')

    t6 = datetime.datetime.now()
    return t5, t6, filename, name, ext, df
Example #3
0
    def get_census_data(self, debug=False):

        #Create list to store return data
        self.receiveData = []

        #Store list of people in firstname lastname format
        self.people_list = [i[0] + " " + i[1] for i in self.inputDataRaw]

        #Create and write to temporary sending csv file
        with open("tempSend.csv", "w", newline="\n") as file:
            csv_writer = csv.writer(file, dialect='excel')

            #Loop over the input data
            for index, row in enumerate(self.inputDataRaw):

                #Split address into streetname, city, state, zip code
                temp = row[3].split(",")

                #Create list in proper format for the census API
                toWrite = [
                    self.people_list[index], temp[0], temp[1], temp[2], temp[3]
                ]
                print(toWrite)

                #Write row
                csv_writer.writerow(toWrite)

        #Send API request
        self.receiveData = cg.addressbatch("tempSend.csv")

        if debug == True:
            for house in self.receiveData:
                print(house)

        print("Done!\n")
Example #4
0
def _geocode_batch(df):
    df[['street_address', 'city', 'postal_abbreviation', 'code'
        ]].dropna().to_csv('/Users/travis.howe/Downloads/test_address2.csv',
                           header=False,
                           index=True)
    return pd.DataFrame(
        cg.addressbatch(
            '/Users/travis.howe/Downloads/test_address2.csv',
            returntype='geographies'))  # I have to send in a .csv file
Example #5
0
def geocode_wrapper(df_input_chunk):
    """
    Runs the censusgeocode wrapper and merges the results to the df_input_chunk table
    """
    print("calling censusgeocode addressbatch")
    print(df_input_chunk.iloc[0, :])
    dict_output_chunk = cg.addressbatch(df_input_chunk[["street", "city", "state", "zip"]].to_dict('records'))
    print("Finished calling censusgeocode addressbatch")
    df_output_chunk = pd.DataFrame.from_dict(dict_output_chunk)
    df_output_chunk_merged = df_input_chunk.merge(df_output_chunk, on='address')
    return df_output_chunk_merged
def wrapper_api_call(df_input_addresses_queued):
    """
    API Wrapper (in this case censusgeocode pkg) call to return geo_codes.
    For censusgeocode pkg, input is a list of dict (input batch has to be less than 1000 addresses)
        and the output will be a list of [original address df, OrderedDict with geocodes].
    Dictionary has to have columns as street, city, state, and zip. (ID autogenerated in censusgeocode pkg).
    """
    logging.info("Worker is now working.")
    geo_codes_ordered_dict = cg.addressbatch(
        df_input_addresses_queued.to_dict('records'))
    logging.info("Worker's work is complete.")
    return geo_codes_ordered_dict
 def batch_function(num):
     try:
         time.sleep(num % 100)
         temp_filename = "dummy{num}.csv".format(num=num)
         new_addresses.loc[num * 100:num * 100 + 99,
                           ("address", "city", "state",
                            "zip")].to_csv(temp_filename)
         z = censusgeocode.addressbatch(temp_filename)
         os.remove(temp_filename)
         print num
         return [[i['address'], i['lat'], i['lon']] for i in z]
     except:
         os.remove(temp_filename)
         print "error on {num}".format(num=num)
Example #8
0
def geocode_batch(df):
    _batch_prep(df)
    # todo: is there anything I can do to increase the number of matches---about 89% of observations have a match

    df['id'] = df.index
    vars = ['id', 'last_name', 'age', 'sex', 'postal_abbreviation']

    df_census = pd.DataFrame(
        cg.addressbatch('/Users/travis.howe/Downloads/test_address2.csv',
                        returntype='geographies'))
    df_census['id'] = df_census['id'].astype(int)

    joblib.dump(
        df[vars].merge(df_census, how='outer', on='id', indicator=True),
        'results.pkl')
Example #9
0
 def get_census_batch(self, addresses):
     '''
     This function sends the input addresses through the address parser and returns them
     in the format the Census geocoder wants.
     '''
     address_column, city_column, state_column, zip_column = [], [], [], []
     for address in addresses:
         street_address, city, state, zip_code = self.parse_address(address)
         address_column.append(street_address)
         city_column.append(city)
         state_column.append(state)
         zip_column.append(zip_code)
     census_batch = pd.DataFrame(list(zip(address_column, city_column, state_column, zip_column)))
     census_batch.to_csv(self.batch_filename, header=None)
     results = cg.addressbatch(self.batch_filename)
     results_df = pd.DataFrame.from_dict(results)
     results_df.to_csv(self.census_results_filename)
Example #10
0
def process_batch(batch_path):
    print(f"Begin processing {batch_path}...")
    geocoded_results = cg.addressbatch(batch_path, timeout=1000)
    print(f"Finish processing {batch_path}...")
    return geocoded_results
Example #11
0
import censusgeocode as cg

data = cg.addressbatch('addresses.csv')
print(data)

for address in data:
    print(address['id'])
    print(address['lat'])
    print(address['lon'])
import censusgeocode as cg
import os
import pandas as pd

if not os.path.exists("./address_chunks/results"):
    os.makedirs("./address_chunks/results")

#path = os.path.dirname(__file__) + '/../../dataRAW/texasPractice/'

directory = "./address_chunks/"

for file in os.listdir(directory):
    filename = os.fsdecode(file)

    geocodeResults = pd.DataFrame(
        cg.addressbatch(directory + filename, delim=','))
    geocodeResults.to_csv(directory + 'results/R' + filename + '.csv')

    print(filename + " completed!")
Example #13
0
def geocode_nfirs(year):
    """Geocode files within temp directory (created with create_nfirs_temp function).
    Writes output geocoded files to temp_output folder.
    Uses censusgeocode library to access the census geocoder api, described at
    https://geocoding.geo.census.gov/ 

    Args:
        year: int, year to consolidate
        
    Returns:
        None
    """

    # Directory paths
    nfirs_interim = utils.DATA['interim'] / 'nfirs'
    temp_input = nfirs_interim / f'temp_{year}' / 'input'
    temp_output = nfirs_interim / f'temp_{year}' / 'output'

    # Start and current time
    start_time = datetime.now()
    cur_time = datetime.now()

    # Geocode the files
    for filename in os.listdir(temp_input):

        input_path = os.path.join(temp_input, filename)
        output_path = os.path.join(temp_output, f'{filename[:-4]}_output.csv')

        if os.path.exists(output_path):
            print(f'{filename} already geocoded.')
            continue

        #Try up to 10 attempts to geocode file. Sometimes the connection to Census API will
        #time out and cause the particular file to fail to be geocoded on that attempt.
        for attempt in range(10):
            try:
                # Sleep random amount to keep from being banned from geocoder api
                sleep(np.random.randint(1, 5))

                # Print time info
                cur_time = datetime.now()
                print('\nCurrent time:', cur_time.strftime('%H:%M:%S'))
                print(f'Geocoding: {filename}')

                # Geocode file
                results = pd.DataFrame(
                    cg.addressbatch(input_path)).sort_values('id')
                results.to_csv(output_path, index=False)

                # Print time info
                step_time = (datetime.now() - cur_time).total_seconds() / 60
                total_time = (datetime.now() - start_time).total_seconds() / 60
                print(f'Step time elapsed: {step_time:.02f} minutes')
                print(f'Total time elapsed: {total_time:.02f} minutes')

                break
            except:
                print(f'Failed on try {attempt}')
                continue

    print('\n\nFinished geocoding.\n')
    return