def test_stringio(self):
     with open(self.small_path, 'r') as f:
         if six.PY3:
             sample = io.StringIO(f.read())
         else:
             sample = io.BytesIO(f.read())
     result = censusbatchgeocoder.geocode(sample)
     self.assertEqual(len(result), 5)
 def test_wide(self):
     result = censusbatchgeocoder.geocode(
         self.wide_path,
         id="Affidavit ID",
         address="Street",
         city="City",
         state="State",
         zipcode="Zip"
     )
     self.assertEqual(len(result), 10)
 def test_weird_headers(self):
     result = censusbatchgeocoder.geocode(
         self.weird_path,
         id="foo",
         address="bar",
         city="baz",
         state="bada",
         zipcode="boom"
     )
     self.assertEqual(len(result), 5)
 def test_extra_columns(self):
     result = censusbatchgeocoder.geocode(self.extra_path)
     self.assertEqual(
         [d['metadata_1'] for d in result],
         ['foo', 'bar', 'baz', 'bada', 'bing']
     )
     self.assertEqual(
         [d['metadata_2'] for d in result],
         ['eenie', 'meenie', 'miney', 'moe', 'catch a tiger by the toe']
     )
     self.assertEqual(len(result), 5)
 def test_bom(self):
     result = censusbatchgeocoder.geocode(
         self.bom_path,
         id="Affidavit ID",
         address="Street",
         city="City",
         state="State",
         zipcode="Zip",
         encoding="utf-8-sig"
     )
     self.assertEqual(len(result), 4)
Ejemplo n.º 6
0
 def run(self):
     try:
         results = censusbatchgeocoder.geocode(self.data)
     except:
         traceback.print_exc()
         exctype, value = sys.exc_info()[:2]
         self.signals.error.emit((exctype, value, traceback.format_exc()))
     else:
         self.signals.result.emit(results) # Return result
     finally:
         self.signals.finished.emit() # Done
 def test_list(self):
     my_list = [{
         'address': '521 SWARTHMORE AVENUE',
         'city': 'PACIFIC PALISADES',
         'id': '1',
         'state': 'CA',
         'zipcode': '90272-4350'},
         {
         'address': '2015 W TEMPLE STREET',
         'city': 'LOS ANGELES',
         'id': '2',
         'state': 'CA',
         'zipcode': '90026-4913'
     }]
     result = censusbatchgeocoder.geocode(my_list)
     self.assertEqual(len(result), 2)
Ejemplo n.º 8
0
def geocode_batch(start_idx, batch_size=batch_size):
    try:
        start_time = time.time()
        end_idx = start_idx + batch_size
        batch_df = df_raw.iloc[start_idx:end_idx][:]
        dict_lst = batch_df.to_dict('records')
        result_dicts = censusbatchgeocoder.geocode(dict_lst, pooling=False)
        update_query = ';'.join([gen_update_q(d) for d in result_dicts])
        curr.execute(update_query)
        print('thread finished for batch {} size: {} in {} seconds'.format(
            (start_idx, end_idx), batch_size,
            time.time() - start_time))
        return True
    except:
        traceback.print_exc(file=sys.stdout)
        return False
Ejemplo n.º 9
0
def geocode(f, seattle_acs, new_name):
    df = pd.read_csv(f)
    print("size of df here", df.shape)
    df['RegStNum'] = df['RegStNum'].apply(str)
    print("got here")
    df['address'] = df['RegStNum'] + " " + df['RegStName'] + " " + df[
        'RegStType']
    df['city'] = df['RegCity'].copy()
    df['state'] = df['RegState'].copy()
    df['zipcode'] = df['RegZipCode'].copy()
    df['id'] = df.index

    filtered_df = df[['address', "city", "state", "zipcode", "id"]]
    result = censusbatchgeocoder.geocode(filtered_df.to_dict("records"))
    filtered_df = pd.DataFrame(result)
    print("filtered df size before merge", filtered_df.shape)
    df = pd.merge(df,
                  filtered_df[['id', 'tract']],
                  how="left",
                  left_on="id",
                  right_on="id")

    print("df size before dropping null tracts", df.shape)
    df = df[df['tract'].notnull()]
    print("df size after dropping null tracts", df.shape)

    print("adding census tract income quintile rank for each address")
    df2 = seattle_acs

    # make both tract columns ints to avoid this error:  "ValueError: You are trying to merge on object and int64 columns. If you wish to proceed you should use pd.concat"
    df["tract"] = df["tract"].astype(int)
    df = pd.merge(df,
                  df2[['TRACT', 'income_quintile']],
                  left_on='tract',
                  right_on="TRACT",
                  how='left')

    df.to_csv(new_name)

    print("DONE")
Ejemplo n.º 10
0
 def test_path(self):
     result = censusbatchgeocoder.geocode(self.small_path)
     self.assertEqual(len(result), 5)
Ejemplo n.º 11
0
 def test_coordinates(self):
     result = censusbatchgeocoder.geocode(self.small_path)
     for row in result:
         self.assertTrue('latitude' in row)
         self.assertTrue('longitude' in row)
Ejemplo n.º 12
0
 def test_batch_size(self):
     result = censusbatchgeocoder.geocode(self.small_path, batch_size=2)
     self.assertEqual(len(result), 5)
Ejemplo n.º 13
0
 def test_nopooling(self):
     result = censusbatchgeocoder.geocode(self.small_path, pooling=False)
     self.assertEqual(len(result), 5)
Ejemplo n.º 14
0
 def test_no_state_and_zipcode(self):
     result = censusbatchgeocoder.geocode(self.incomplete_path, state=None, zipcode=None)
     self.assertEqual(len(result), 5)
Ejemplo n.º 15
0
 def test_big_batch(self):
     result = censusbatchgeocoder.geocode(self.big_path)
     self.assertEqual(len(result), 1498)
Ejemplo n.º 16
0
print(datetime.datetime.now())
addressdata.head()

# In[ ]:

addressdata.shape

# In[ ]:

fetchaddress = addressdata.to_dict("records")

# In[ ]:

print(datetime.datetime.now())
results = censusbatchgeocoder.geocode(fetchaddress.to_dict("records"),
                                      zipcode=None)
print(datetime.datetime.now())
#2019-10-18 20:07:03.927510
#2019-10-18 20:10:55.237334

# In[ ]:

pd_df = pd.DataFrame(results)

# In[ ]:

pd_df.to_csv(
    'C:\\Data Analytics\\Sem 3\\ICT Solution\\Data Sets\\extractgeocodesdata.csv',
    index=False)
Ejemplo n.º 17
0
import pandas as pd
import censusbatchgeocoder

import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

df = pd.read_excel(
    "~/Code/python-censusbatchgeocoder-example/privateschools1617.xls",
    skiprows=3)

result = censusbatchgeocoder.geocode(
    df.to_dict("records")[770:775],
    id="Affidavit ID",
    address="Street",
    city="City",
    state="State",
    zipcode="Zip",
)