예제 #1
0
def geocodeme(row, addr_lst):
	cg = CensusGeocode()
	result = cg.address(row[addr_lst[0]], city=row[addr_lst[1]], state=row[addr_lst[2]], zipcode=row[addr_lst[3]])
	if len(result) != 0:
		return result[0]['geographies']['Census Tracts'][0]['GEOID']
	else:
		return None
예제 #2
0
def GeoCode(GeoCoder, strAddr):
    strBingMapKey = cfg.getConfigValue(r"Geocoder/BingKey")

    #strBingMapKey = 'AjlU0VglpeaGSVjfdrvFNEEZKSRWLtUYbDGGBbkVq1SsFK6Vz724WpqxqRi2m8SJ'
    try:
        if GeoCoder == 'google':
            g = geocoder.google(strAddr)
            return (g.lat, g.lng, g.address, GeoCoder, g.neighborhood,
                    g.quality, g.accuracy, None)
        elif GeoCoder == 'bing':
            g = geocoder.bing(strAddr, key=strBingMapKey)
            return (g.lat, g.lng, g.address, GeoCoder, g.neighborhood,
                    g.quality, g.accuracy, g.confidence)
        elif GeoCoder == 'census':
            cg = CensusGeocode()
            j = cg.onelineaddress(strAddr)
            try:
                return (j[0]['coordinates']['y'], j[0]['coordinates']['x'],
                        j[0]['matchedAddress'], GeoCoder, None, None, None,
                        None)
            except:
                return (None, None, None, GeoCoder, None, None, None, None)
        else:
            g = geocoder.yahoo(strAddr)
            return (g.lat, g.lng, g.json['address'], GeoCoder, g.neighborhood,
                    g.quality, g.accuracy, None)

    except:
        print('error encountered when geocoding address: {0}'.format(strAddr))
        traceback.print_exc()
        return (None, None, None, GeoCoder, None, None, None, None)
예제 #3
0
    def __init__(self, objid, x, y):
        self.x = x
        self.y = y
        self.objid = objid

        success = False
        barrier = 10
        try:
            while not success:
                try:
                    self.geo = CensusGeocode().coordinates(x, y)

                    self.result_str = self.geo.__str__().replace("'", '"')
                    self.result = json.loads(self.geo.__str__().replace(
                        "'", '"'))
                    self.census_tract = self.result[0]['Census Tracts'][0][
                        'BASENAME']
                    success = True
                except KeyError as e:
                    if barrier > 0:
                        print("Error: {0}. Retrying in 2 seconds.".format(e))
                        time.sleep(2)
                        barrier -= 1
                    else:
                        success = True
                        print("Hard Error: {0} - failed too many times".format(
                            e, barrier))
                        self.geo = None
                        self.result = None
                        self.census_tract = None
        except Exception as e:
            print("Hard Error: {0} - Type: {1}".format(e, type(e)))
            self.geo = None
            self.result = None
            self.census_tract = None
예제 #4
0
    def test_benchmark_vintage(self):
        bmark, vint = 'Public_AR_Census2010', 'Census2010_Census2010'

        cg = CensusGeocode(benchmark=bmark, vintage=vint)
        result = cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500', returntype='geographies')

        self.assertEqual(result.input['benchmark']['benchmarkName'], bmark)
        self.assertEqual(result.input['vintage']['vintageName'], vint)
        self.assertEqual(result[0]['geographies']['Census Tracts'][0]['GEOID'], '11001006202')
예제 #5
0
def geocodeme(row, addr_lst):
    cg = CensusGeocode()
    result = cg.address(row[addr_lst[0]],
                        city=row[addr_lst[1]],
                        state=row[addr_lst[2]],
                        zipcode=row[addr_lst[3]])
    if len(result) != 0:
        return result[0]['geographies']['Census Tracts'][0]['GEOID']
    else:
        return None
예제 #6
0
    def test_benchmark_vintage(self):
        bmark, vint = 'Public_AR_Census2010', 'Census2010_Census2010'

        cg = CensusGeocode(benchmark=bmark, vintage=vint)
        result = cg.address('1600 Pennsylvania Avenue NW',
                            city='Washington',
                            state='DC',
                            zipcode='20500',
                            returntype='geographies')

        self.assertEqual(result.input['benchmark']['benchmarkName'], bmark)
        self.assertEqual(result.input['vintage']['vintageName'], vint)
        self.assertEqual(result[0]['geographies']['Census Tracts'][0]['GEOID'],
                         '11001006202')
예제 #7
0
def get_census_tract(lat_lng):
    #helper function that uses the Census Geocoder API to get Census Tract from latitude and longitude (daily rate limits)

    census_tract = ''
    if lat_lng is not None:
        cg = CensusGeocode()
        lat = lat_lng[0]
        lng = lat_lng[1]
        result = cg.coordinates(x=lng, y=lat)

        if len(result) > 0 and 'Census Tracts' in result[0] and len(
                result[0]['Census Tracts']) > 0:
            if 'TRACT' in result[0]['Census Tracts'][0]:
                census_tract = int(
                    result[0]['Census Tracts'][0]['TRACT']) / 100

    return census_tract
예제 #8
0
 def setUp(self):
     self.cg = CensusGeocode()
예제 #9
0
class CensusGeoCodeTestCase(unittest.TestCase):

    cg = None

    def setUp(self):
        self.cg = CensusGeocode()

    @vcr.use_cassette('tests/fixtures/coordinates.yaml')
    def test_returns_geo(self):
        results = self.cg.coordinates(-74, 43, returntype='geographies')
        assert isinstance(results, GeographyResult)
        assert results.input

    @vcr.use_cassette('tests/fixtures/coordinates.yaml')
    def test_coords(self):
        results = self.cg.coordinates(-74, 43)
        assert results['Counties'][0]['BASENAME'] == 'Saratoga'
        assert results['Counties'][0]['GEOID'] == '36091'
        assert results['Census Tracts'][0]['BASENAME'] == "615"

    def test_url(self):
        r = self.cg._geturl('coordinates', 'geographies')
        assert r == 'https://geocoding.geo.census.gov/geocoder/geographies/coordinates'

    @vcr.use_cassette('tests/fixtures/address-geographies.yaml')
    def test_address(self):
        results = self.cg.address('1600 Pennsylvania Avenue NW',
                                  city='Washington',
                                  state='DC',
                                  zipcode='20500')
        assert results[0]
        assert results[0]['geographies']['Counties'][0][
            'BASENAME'] == 'District of Columbia'

    @vcr.use_cassette('tests/fixtures/onelineaddress.yaml')
    def test_onelineaddress(self):
        results = self.cg.onelineaddress(
            '1600 Pennsylvania Avenue NW, Washington, DC, 20500', layers='all')
        assert results[0]
        try:
            assert results[0]['geographies']['Counties'][0][
                'BASENAME'] == 'District of Columbia'
        except AssertionError:
            print(results[0]['geographies']['Counties'][0])
            raise

        assert 'Metropolitan Divisions' in results[0]['geographies'].keys()
        assert 'Alaska Native Village Statistical Areas' in results[0][
            'geographies'].keys()

    @vcr.use_cassette('tests/fixtures/address-locations.yaml')
    def test_address_return_type(self):
        results = self.cg.address('1600 Pennsylvania Avenue NW',
                                  city='Washington',
                                  state='DC',
                                  zipcode='20500',
                                  returntype='locations')

        assert results[0]['matchedAddress'].upper(
        ) == '1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20502'
        assert results[0]['addressComponents']['streetName'] == 'PENNSYLVANIA'

    @vcr.use_cassette('tests/fixtures/test_benchmark_vintage.yaml')
    def test_benchmark_vintage(self):
        bmark, vint = 'Public_AR_Census2010', 'Census2010_Census2010'

        cg = CensusGeocode(benchmark=bmark, vintage=vint)
        result = cg.address('1600 Pennsylvania Avenue NW',
                            city='Washington',
                            state='DC',
                            zipcode='20500',
                            returntype='geographies')

        self.assertEqual(result.input['benchmark']['benchmarkName'], bmark)
        self.assertEqual(result.input['vintage']['vintageName'], vint)
        self.assertEqual(result[0]['geographies']['Census Tracts'][0]['GEOID'],
                         '11001006202')

    @vcr.use_cassette('tests/fixtures/address-batch.yaml')
    def test_addressbatch(self):
        result = self.cg.addressbatch('tests/fixtures/batch.csv',
                                      returntype='locations')
        assert isinstance(result, list)
        resultdict = {int(r['id']): r for r in result}
        assert resultdict[3][
            'parsed'] == '3 GRAMERCY PARK W, NEW YORK, NY, 10003'
        assert resultdict[2]['match'] is False

        result = self.cg.addressbatch('tests/fixtures/batch.csv',
                                      returntype='geographies')
        assert isinstance(result, list)
        resultdict = {int(r['id']): r for r in result}
        assert resultdict[3]['tigerlineid'] == '59653655'
        assert resultdict[3]['statefp'] == '36'
        assert resultdict[2]['match'] is False
예제 #10
0
from censusgeocode import CensusGeocode
import csv
import time

cg = CensusGeocode()

start = time.time()
with open('flights2.csv', 'r') as f:
    d_reader = csv.DictReader(f)

    fieldnames = d_reader.fieldnames
    fieldnames.append('county')

    with open('flights.csv', 'a') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames)

        # writer.writeheader()

        for num, row in enumerate(d_reader):
            if num % 100 == 0:
                print(num, "rows processed")
            try:
                result = cg.coordinates(x=row['longitude'], y=row['latitude'])

                county = result[0]['Counties']
                county_name = county[0]['NAME']

                new_row = row
                new_row['county'] = county_name

                writer.writerow(new_row)
예제 #11
0
파일: data.py 프로젝트: mhdella/solar
def init():

    #clean globals
    lat = settings.lat
    lon = settings.lon
    state = settings.state
    postal = settings.postal
    year = settings.year

    # --------------------------------------------------------------------------------------------------------------------------------
    # WEATHER DATA - NSRDB  - https://nsrdb.nrel.gov/api-instructions
    # --------------------------------------------------------------------------------------------------------------------------------
    # Declare all variables as strings. Spaces must be replaced with '+', i.e., change 'John Smith' to 'John+Smith'.

    # You must request an NSRDB api key from the link above
    api_key = 'JJQmF8CNU7qDCgGptU1krnjESLAa4RBzH2aOaOrs'
    # Set the attributes to extract (e.g., dhi, ghi, etc.), separated by commas.
    attributes = 'ghi,dhi,dni,wind_speed_10m_nwp,surface_air_temperature_nwp,solar_zenith_angle'
    # Choose year of data
    #year = '2015'
    # Set leap year to true or false. True will return leap day data if present, false will not.
    leap_year = 'false'
    # Set time interval in minutes, i.e., '30' is half hour intervals. Valid intervals are 30 & 60.
    interval = '30'
    # Specify Coordinated Universal Time (UTC), 'true' will use UTC, 'false' will use the local time zone of the data.
    # NOTE: In order to use the NSRDB data in SAM, you must specify UTC as 'false'. SAM requires the data to be in the
    # local time zone.
    utc = 'true'
    # Your full name, use '+' instead of spaces.
    your_name = 'Jon+James'
    # Your reason for using the NSRDB.
    reason_for_use = 'Find+best+places+to+put+solar+panels.'
    # Your affiliation
    your_affiliation = 'simplyenable'
    # Your email address
    your_email = '*****@*****.**'
    # Please join our mailing list so we can keep you up-to-date on new developments.
    mailing_list = 'false'

    # Declare url string
    nsrdb = 'http://developer.nrel.gov/api/solar/nsrdb_0512_download.csv?wkt=POINT({lon}%20{lat})&names={year}&leap_day={leap}&interval={interval}&utc={utc}&full_name={name}&email={email}&affiliation={affiliation}&mailing_list={mailing_list}&reason={reason}&api_key={api}&attributes={attr}'.format(
        year=year,
        lat=lat,
        lon=lon,
        leap=leap_year,
        interval=interval,
        utc=utc,
        name=your_name,
        email=your_email,
        mailing_list=mailing_list,
        affiliation=your_affiliation,
        reason=reason_for_use,
        api=api_key,
        attr=attributes)
    # Return just the first 2 lines to get metadata:
    weather = pd.read_csv(nsrdb, nrows=1)
    # See metadata for specified properties, e.g., timezone and elevation
    timezone, elevation = weather['Local Time Zone'], weather['Elevation']

    radiance = weather['Fill Flag 2']

    sky = weather['Fill Flag 3']

    settings.weather = weather
    settings.radiance = radiance
    settings.sky = sky

    # --------------------------------------------------------------------------------------------------------------------------------
    # POLITICAL DATA - DSIRE - http://www.dsireusa.org/resources/data-and-tools/
    # --------------------------------------------------------------------------------------------------------------------------------

    dsire = 'http://programs.dsireusa.org/api/v1/getprograms/json?fromSir=0&state={state}'.format(
        state=state)

    polreq = requests.get(dsire)

    settings.politics = polreq.json

    # --------------------------------------------------------------------------------------------------------------------------------
    # ECONOMIC DATA - EIA - https://www.eia.gov/opendata/commands.php
    # --------------------------------------------------------------------------------------------------------------------------------

    eia_key = 'afe0bc288e7de03842e061ac596b9301'

    #series IDs found from data browser - https://www.eia.gov/electricity/data/browser/
    #timeline
    monthly = 'M'
    quarterly = 'Q'
    yearly = 'Y'

    timeline = monthly

    #sector
    all = 'ALL'
    residential = 'RES'
    commercial = 'COM'
    Industrial = 'IND'
    transportation = 'TRA'
    other = 'OTH'

    sector = residential

    #dataset
    avg_price = 'ELEC.PRICE'
    avg_rev = 'ELEC.REV'

    dataset = avg_price

    #generate series ID
    series_id = '{dataset}.{state}-{sector}.{timeline}'.format(
        dataset=dataset, state=state, sector=sector, timeline=timeline)

    eia = 'http://api.eia.gov/series/?api_key={eia_key}&series_id={series_id}'.format(
        eia_key=eia_key, series_id=series_id)

    #request
    e = requests.get(eia)
    settings.economics = e.json

    # --------------------------------------------------------------------------------------------------------------------------------
    # DEMOGRAPHIC DATA - CENSUS - https://api.census.gov/data/2015/acs5/variables.html
    # --------------------------------------------------------------------------------------------------------------------------------

    #Census geocoder to get regional codes for the location
    cg = CensusGeocode()

    cg_results = cg.coordinates(x=lon, y=lat)

    #Census regional codes
    c_state = cg_results[0]['Census Tracts'][0]['STATE']
    c_county = cg_results[0]['Census Tracts'][0]['COUNTY']
    c_tract = cg_results[0]['Census Tracts'][0]['TRACT']

    #Census instantiate with API Key
    c = Census("fb10dd39ec721dda4caf2baf5eed40a57f724084")

    #Census variables
    #Median Household Income by Household Size
    size = 'B19019_001E'

    #Aggregate household income in the past 12 months
    agg = 'B19025_001E'

    #Age of Householder by Household Income
    age = 'B19037_001E'

    #INCOME IN THE PAST 12 MONTHS
    income = 'B06011_001E'

    #retrieve the census data
    d = c.acs5.state_county_tract(('NAME', size), c_state, c_county, c_tract)

    settings.demographics = d

    return
예제 #12
0
import requests
import pdb
from censusgeocode import CensusGeocode

cg = CensusGeocode()


def example(lat, long):
    # grab some lat/long coords from wherever. For this example,
    # I just opened a javascript console in the browser and ran:
    #
    # navigator.geolocation.getCurrentPosition(function(p) {
    #   console.log(p);
    # })
    #
    latitude = 35.1330343
    longitude = -90.0625056

    # Did the geocoding request comes from a device with a
    # location sensor? Must be either true or false.
    sensor = 'true'

    # Hit Google's reverse geocoder directly
    # NOTE: I *think* their terms state that you're supposed to
    # use google maps if you use their api for anything.
    base = "http://maps.googleapis.com/maps/api/geocode/json?"
    params = "latlng={lat},{lon}&sensor={sen}".format(lat=latitude,
                                                      lon=longitude,
                                                      sen=sensor)
    url = "{base}{params}".format(base=base, params=params)
    response = requests.get(url)
예제 #13
0
 def setUp(self):
     self.cg = CensusGeocode()
예제 #14
0
class CensusGeoCodeTestCase(unittest.TestCase):

    cg = None

    def setUp(self):
        self.cg = CensusGeocode()

    @vcr.use_cassette('tests/fixtures/coordinates.yaml')
    def test_returns_geo(self):
        results = self.cg.coordinates(-74, 43, returntype='geographies')
        assert isinstance(results, GeographyResult)
        assert results.input

    @vcr.use_cassette('tests/fixtures/coordinates.yaml')
    def test_coords(self):
        results = self.cg.coordinates(-74, 43)
        assert results['Counties'][0]['BASENAME'] == 'Saratoga'
        assert results['Counties'][0]['GEOID'] == '36091'
        assert results['Census Tracts'][0]['BASENAME'] == "615"

    def test_url(self):
        r = self.cg._geturl('coordinates', 'geographies')
        assert r == 'https://geocoding.geo.census.gov/geocoder/geographies/coordinates'

    @vcr.use_cassette('tests/fixtures/address-geographies.yaml')
    def test_address(self):
        results = self.cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500')
        assert results[0]
        assert results[0]['geographies']['Counties'][0]['BASENAME'] == 'District of Columbia'

    @vcr.use_cassette('tests/fixtures/onelineaddress.yaml')
    def test_onelineaddress(self):
        results = self.cg.onelineaddress('1600 Pennsylvania Avenue NW, Washington, DC, 20500', layers='all')
        assert results[0]
        try:
            assert results[0]['geographies']['Counties'][0]['BASENAME'] == 'District of Columbia'
        except AssertionError:
            print(results[0]['geographies']['Counties'][0])
            raise

        assert 'Metropolitan Divisions' in results[0]['geographies'].keys()
        assert 'Alaska Native Village Statistical Areas' in results[0]['geographies'].keys()

    @vcr.use_cassette('tests/fixtures/address-locations.yaml')
    def test_address_return_type(self):
        results = self.cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500', returntype='locations')

        assert results[0]['matchedAddress'].upper() == '1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20502'
        assert results[0]['addressComponents']['streetName'] == 'PENNSYLVANIA'

    @vcr.use_cassette('tests/fixtures/test_benchmark_vintage.yaml')
    def test_benchmark_vintage(self):
        bmark, vint = 'Public_AR_Census2010', 'Census2010_Census2010'

        cg = CensusGeocode(benchmark=bmark, vintage=vint)
        result = cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500', returntype='geographies')

        self.assertEqual(result.input['benchmark']['benchmarkName'], bmark)
        self.assertEqual(result.input['vintage']['vintageName'], vint)
        self.assertEqual(result[0]['geographies']['Census Tracts'][0]['GEOID'], '11001006202')

    @vcr.use_cassette('tests/fixtures/address-batch.yaml')
    def test_addressbatch(self):
        result = self.cg.addressbatch('tests/fixtures/batch.csv', returntype='locations')
        assert isinstance(result, list)
        resultdict = {int(r['id']): r for r in result}
        assert resultdict[3]['parsed'] == '3 GRAMERCY PARK W, NEW YORK, NY, 10003'
        assert resultdict[2]['match'] is False

        result = self.cg.addressbatch('tests/fixtures/batch.csv', returntype='geographies')
        assert isinstance(result, list)
        resultdict = {int(r['id']): r for r in result}
        assert resultdict[3]['tigerlineid'] == '59653655'
        assert resultdict[3]['statefp'] == '36'
        assert resultdict[2]['match'] is False
예제 #15
0
                select entity_id, 
                    json_build_object('address1', trim(lower(address_1)), 
                                      'address2', trim(lower(address_1b)),
                                      'city', trim(lower(city_1)),
                                      'state', trim(lower(state_1)),
                                      'zipcode', zipcode_1) as address,
                    enroll_date::date as update_date
                from raw.epic_adress
                join patients_ucm.main 
                    ON mrn = patient_id::int
                join raw.final_mrns USING (mrn)
        ''')
df = query_db(query, connection)
print(df.head())

cg = CensusGeocode()


def getGeocode(x):
    try:
        address = " ".join([str(i) for i in filter(None, x.values())])
        #result = cg.onelineaddress(address)
        #geoid = result[0]['geographies']['Census Tracts'][0]['GEOID']
    except:
        #del x['address2']
        print(address)
        return None
    return geoid


df['geoid'] = df['address'].apply(getGeocode)
예제 #16
0
def census_geocode(datafile, delim, header, start, addcol):
    """ (str,str,str,int,list[int]) -> files
    Datafile is file or path to process, delim is file's
    delimiter, specify if file has header row with y or n,
    specify 0 to read from beginning of file or index # to resume,
    addcol is a list of column numbers containing address components.
"""

    import csv, locale, traceback, time, datetime
    from urllib import error
    from censusgeocode import CensusGeocode

    cg = CensusGeocode()

    #Function for adding and summing entries in dictionaries
    def sumdict(theval, thedict):
        if theval in thedict:
            thedict[theval] = thedict[theval] + 1
        else:
            thedict[theval] = 1

    #Open files, set up environments. Match lists are for debugging; results
    #are written to output files as each record is handled. Headers added
    #based on user input. Types of non-matches stored in a dictionary for
    #output to report. Users should verify that input files are in UTF-8 before
    #matching.

    if type(addcol) is not list:
        print(
            'Position numbers with address components must be provided in a list, i.e. [3] or [3,4,5,6]'
        )
        raise SystemExit

    if len(addcol) == 1:
        unparsed = addcol[0] - 1
    elif len(addcol) == 4:
        addstreet = addcol[0] - 1
        thecity = addcol[1] - 1
        thestate = addcol[2] - 1
        thezip = addcol[3] - 1
    else:
        print(
            'Inappropriate number of positions given - provide either 1 for unparsed or 4 for parsed'
        )
        raise SystemExit

    if header.lower() == 'y' or 'yes' or 'n' or 'no':
        pass
    else:
        print("Must indicate whether there is a header row with 'y' or 'n'")
        raise SystemExit

    matched = []
    nomatch = []
    matchfails = {}
    counter = 0
    namefile = datafile[:-4]
    if datafile[-4:] == '.csv':
        ext = '.csv'
    else:
        ext = '.txt'

    readfile = csv.reader(open(datafile,
                               'r',
                               encoding='utf-8',
                               errors='ignore'),
                          delimiter=delim)
    matchfile = open(namefile + '_matched' + ext,
                     'a',
                     newline='',
                     encoding='utf-8',
                     errors='ignore')
    matchwrite = csv.writer(matchfile,
                            delimiter=delim,
                            quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)
    nomatchfile = open(namefile + '_nomatch' + ext,
                       'a',
                       newline='',
                       encoding='utf-8',
                       errors='ignore')
    nomatchwrite = csv.writer(nomatchfile,
                              delimiter=delim,
                              quotechar='"',
                              quoting=csv.QUOTE_MINIMAL)

    if header.lower() == ('y' or 'yes') and int(start) == 0:
        headrow = next(readfile)
        headnomatch = list(headrow)
        headnomatch.append('error')
        nomatchwrite.writerow(headnomatch)
        headmatch = list(headrow)
        newhead = [
            'matched_add', 'longitude', 'latitude', 'ansifips', 'stateid',
            'countyid', 'tractid', 'blkgrpid', 'blkid', 'block', 'tract',
            'county', 'state'
        ]
        headmatch.extend(newhead)
        matchwrite.writerow(headmatch)

    print('Match process launched...')

    #Start reading the file from the given row number;
    #if there is no result (no match) write record to no match list and output file;
    #if record has matches, take relevant data from the first match,
    #append it to the address and add it to the matched list and output file.
    #Outside try / except handles all errors, breaks off matching and writes report.
    #Inside try / except while true handles server time out, tries to rematch, or
    #if input is bad gives up after 5 times and writes no match. While true breaks
    #if no exception raised, moves on to next record. Internal i in range does rematch
    #if result returns no geography due to java error - by default a status key returns
    #no value if everything is ok, but returns a message value if there's a problem.
    #If there is a status value, tries again up to 3 times before giving up, then writes
    #no match. Otherwise in range loop breaks if a clean no match or match is made,
    #proceeds to next record.

    for index, record in enumerate(readfile):
        try:
            if index < int(start):
                continue
            else:
                error_count = 0
                record = [x.strip() for x in record]
                while True:
                    try:
                        for i in range(4):
                            if len(addcol) == 1:
                                result = cg.onelineaddress(record[unparsed])
                            else:
                                result = cg.address(record[addstreet],
                                                    city=record[thecity],
                                                    state=record[thestate],
                                                    zipcode=record[thezip])
                            if len(result) == 0:
                                record.append('Match not found')
                                nomatch.append(record)
                                sumdict(record[-1], matchfails)
                                nomatchwrite.writerow(record)
                            else:
                                geo = result[0].get('geographies')
                                blockinfo = geo.get('2010 Census Blocks')
                                tractinfo = geo.get('Census Tracts')
                                countyinfo = geo.get('Counties')
                                stateinfo = geo.get('States')
                                problemlist = [
                                    blockinfo[0].get('status'),
                                    tractinfo[0].get('status'),
                                    countyinfo[0].get('status'),
                                    stateinfo[0].get('status')
                                ]

                                if any(v is not None for v in problemlist):
                                    if i < 3:
                                        print(
                                            'Trying to return geography at index '
                                            + str(index))
                                        time.sleep(1)
                                        continue
                                    else:
                                        print(
                                            'Writing a no match for failed geography at index '
                                            + str(index))
                                        record.append(
                                            'Failed to return geography')
                                        nomatch.append(record)
                                        sumdict(record[-1], matchfails)
                                        nomatchwrite.writerow(record)
                                else:
                                    ansifips = blockinfo[0].get('GEOID')
                                    stateid = ansifips[0:2]
                                    countyid = ansifips[2:5]
                                    tractid = ansifips[5:11]
                                    blkgrpid = ansifips[11]
                                    blkid = ansifips[11:]

                                    blkname = blockinfo[0].get('NAME')
                                    trctname = tractinfo[0].get('NAME')
                                    coname = countyinfo[0].get('NAME')
                                    stname = stateinfo[0].get('NAME')

                                    match = result[0].get('matchedAddress')
                                    coord = result[0].get('coordinates')
                                    lng = str(coord.get('x'))
                                    lat = str(coord.get('y'))

                                    newitems = match, lng, lat, ansifips, stateid, countyid, tractid, blkgrpid, blkid, blkname, trctname, coname, stname

                                    record.extend(newitems)
                                    matched.append(record)
                                    matchwrite.writerow(record)
                            break

                        counter = counter + 1
                        time.sleep(1)
                        if counter % 100 == 0:
                            print(counter, ' records processed so far...')
                            print('Last record written was:')
                            print(record)
                        if counter % 1000 == 0:
                            time.sleep(5)

                    except error.HTTPError as server_error:
                        if server_error.code == 500:
                            error_count = error_count + 1
                            if error_count < 5:
                                print(
                                    'Got a server error, will try again from index '
                                    + str(index))
                                time.sleep(2)
                                continue
                            else:
                                print(
                                    'Writing a no match as server failed to return result at index '
                                    + str(index))
                                record.append('Server failed to return result')
                                nomatch.append(record)
                                sumdict(record[-1], matchfails)
                                counter = counter + 1
                                nomatchwrite.writerow(record)
                    break

        except Exception as e:
            print('An error has occurred. File stopped at index ' + str(index))
            traceback.print_exc()
            break

    #Close all files, write match summaries to report

    matchfile.close()
    nomatchfile.close()
    nomatch_cnt = len(nomatch)
    matched_cnt = len(matched)

    print(counter, ' records processed in total.')
    print(matched_cnt, ' records matched and ', nomatch_cnt,
          ' records had no matches.')

    ts = datetime.datetime.now().strftime("%Y_%m_%d_%H%M")

    report = open(namefile + '_report_' + ts + '.txt', 'w')
    report.write('Summary of Census Geocoding Output for ' + datafile +
                 ' on ' + ts + '\n' + '\n')
    report.write(str(counter) + ' records processed in total.' + '\n')
    report.write(str(matched_cnt) + ' records matched' + '\n')
    report.write(str(nomatch_cnt) + ' records had no matches' + '\n' + '\n')
    report.write('For the unmatched records, results and errors:' + '\n')
    for k, v in sorted(matchfails.items()):
        report.writelines('\t' + ': '.join([k, str(v)]) + '\n')
    report.close()
예제 #17
0
# See: https://docs.python.org/3/reference/import.html
from censusgeocode import CensusGeocode

# This dictionary will map the counties in the input file to human-readable borough names
# Knowing which values to put here required looking at the input files carefully
counties = {
    'BX': 'Bronx',
    'Q': 'Queens',
    'NY': 'New York',
    'K': 'Brooklyn',
    'R': 'Staten Island'
}

# Create a new CensusGeocode object
# See the docs here: https://github.com/fitnr/censusgeocode
cg = CensusGeocode()

# define the input and output file names
infile = 'citations.csv'
outfile = 'new-citations.csv'

# Open the input file for reading
with open(infile) as f:
    # The DictReader will return every row of the CSV as a dictionary
    reader = csv.DictReader(f)

    # Open up the output file for writing. This is delete the contents, if any.
    with open(outfile, 'w') as g:
        # The DictWriter will write a dictionary, but we must tell it the names of the output fields
        # We add three new fields to reader.fieldnames (a list) using list add operator (+)
        newfieldnames = reader.fieldnames + [