def geocodeme(row, addr_lst): cg = CensusGeocode() result = cg.address(row[addr_lst[0]], city=row[addr_lst[1]], state=row[addr_lst[2]], zipcode=row[addr_lst[3]]) if len(result) != 0: return result[0]['geographies']['Census Tracts'][0]['GEOID'] else: return None
def GeoCode(GeoCoder, strAddr): strBingMapKey = cfg.getConfigValue(r"Geocoder/BingKey") #strBingMapKey = 'AjlU0VglpeaGSVjfdrvFNEEZKSRWLtUYbDGGBbkVq1SsFK6Vz724WpqxqRi2m8SJ' try: if GeoCoder == 'google': g = geocoder.google(strAddr) return (g.lat, g.lng, g.address, GeoCoder, g.neighborhood, g.quality, g.accuracy, None) elif GeoCoder == 'bing': g = geocoder.bing(strAddr, key=strBingMapKey) return (g.lat, g.lng, g.address, GeoCoder, g.neighborhood, g.quality, g.accuracy, g.confidence) elif GeoCoder == 'census': cg = CensusGeocode() j = cg.onelineaddress(strAddr) try: return (j[0]['coordinates']['y'], j[0]['coordinates']['x'], j[0]['matchedAddress'], GeoCoder, None, None, None, None) except: return (None, None, None, GeoCoder, None, None, None, None) else: g = geocoder.yahoo(strAddr) return (g.lat, g.lng, g.json['address'], GeoCoder, g.neighborhood, g.quality, g.accuracy, None) except: print('error encountered when geocoding address: {0}'.format(strAddr)) traceback.print_exc() return (None, None, None, GeoCoder, None, None, None, None)
def __init__(self, objid, x, y): self.x = x self.y = y self.objid = objid success = False barrier = 10 try: while not success: try: self.geo = CensusGeocode().coordinates(x, y) self.result_str = self.geo.__str__().replace("'", '"') self.result = json.loads(self.geo.__str__().replace( "'", '"')) self.census_tract = self.result[0]['Census Tracts'][0][ 'BASENAME'] success = True except KeyError as e: if barrier > 0: print("Error: {0}. Retrying in 2 seconds.".format(e)) time.sleep(2) barrier -= 1 else: success = True print("Hard Error: {0} - failed too many times".format( e, barrier)) self.geo = None self.result = None self.census_tract = None except Exception as e: print("Hard Error: {0} - Type: {1}".format(e, type(e))) self.geo = None self.result = None self.census_tract = None
def test_benchmark_vintage(self): bmark, vint = 'Public_AR_Census2010', 'Census2010_Census2010' cg = CensusGeocode(benchmark=bmark, vintage=vint) result = cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500', returntype='geographies') self.assertEqual(result.input['benchmark']['benchmarkName'], bmark) self.assertEqual(result.input['vintage']['vintageName'], vint) self.assertEqual(result[0]['geographies']['Census Tracts'][0]['GEOID'], '11001006202')
def get_census_tract(lat_lng): #helper function that uses the Census Geocoder API to get Census Tract from latitude and longitude (daily rate limits) census_tract = '' if lat_lng is not None: cg = CensusGeocode() lat = lat_lng[0] lng = lat_lng[1] result = cg.coordinates(x=lng, y=lat) if len(result) > 0 and 'Census Tracts' in result[0] and len( result[0]['Census Tracts']) > 0: if 'TRACT' in result[0]['Census Tracts'][0]: census_tract = int( result[0]['Census Tracts'][0]['TRACT']) / 100 return census_tract
def setUp(self): self.cg = CensusGeocode()
class CensusGeoCodeTestCase(unittest.TestCase): cg = None def setUp(self): self.cg = CensusGeocode() @vcr.use_cassette('tests/fixtures/coordinates.yaml') def test_returns_geo(self): results = self.cg.coordinates(-74, 43, returntype='geographies') assert isinstance(results, GeographyResult) assert results.input @vcr.use_cassette('tests/fixtures/coordinates.yaml') def test_coords(self): results = self.cg.coordinates(-74, 43) assert results['Counties'][0]['BASENAME'] == 'Saratoga' assert results['Counties'][0]['GEOID'] == '36091' assert results['Census Tracts'][0]['BASENAME'] == "615" def test_url(self): r = self.cg._geturl('coordinates', 'geographies') assert r == 'https://geocoding.geo.census.gov/geocoder/geographies/coordinates' @vcr.use_cassette('tests/fixtures/address-geographies.yaml') def test_address(self): results = self.cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500') assert results[0] assert results[0]['geographies']['Counties'][0][ 'BASENAME'] == 'District of Columbia' @vcr.use_cassette('tests/fixtures/onelineaddress.yaml') def test_onelineaddress(self): results = self.cg.onelineaddress( '1600 Pennsylvania Avenue NW, Washington, DC, 20500', layers='all') assert results[0] try: assert results[0]['geographies']['Counties'][0][ 'BASENAME'] == 'District of Columbia' except AssertionError: print(results[0]['geographies']['Counties'][0]) raise assert 'Metropolitan Divisions' in results[0]['geographies'].keys() assert 'Alaska Native Village Statistical Areas' in results[0][ 'geographies'].keys() @vcr.use_cassette('tests/fixtures/address-locations.yaml') def test_address_return_type(self): results = self.cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500', returntype='locations') assert results[0]['matchedAddress'].upper( ) == '1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20502' assert results[0]['addressComponents']['streetName'] == 'PENNSYLVANIA' @vcr.use_cassette('tests/fixtures/test_benchmark_vintage.yaml') def test_benchmark_vintage(self): bmark, vint = 'Public_AR_Census2010', 'Census2010_Census2010' cg = CensusGeocode(benchmark=bmark, vintage=vint) result = cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500', returntype='geographies') self.assertEqual(result.input['benchmark']['benchmarkName'], bmark) self.assertEqual(result.input['vintage']['vintageName'], vint) self.assertEqual(result[0]['geographies']['Census Tracts'][0]['GEOID'], '11001006202') @vcr.use_cassette('tests/fixtures/address-batch.yaml') def test_addressbatch(self): result = self.cg.addressbatch('tests/fixtures/batch.csv', returntype='locations') assert isinstance(result, list) resultdict = {int(r['id']): r for r in result} assert resultdict[3][ 'parsed'] == '3 GRAMERCY PARK W, NEW YORK, NY, 10003' assert resultdict[2]['match'] is False result = self.cg.addressbatch('tests/fixtures/batch.csv', returntype='geographies') assert isinstance(result, list) resultdict = {int(r['id']): r for r in result} assert resultdict[3]['tigerlineid'] == '59653655' assert resultdict[3]['statefp'] == '36' assert resultdict[2]['match'] is False
from censusgeocode import CensusGeocode import csv import time cg = CensusGeocode() start = time.time() with open('flights2.csv', 'r') as f: d_reader = csv.DictReader(f) fieldnames = d_reader.fieldnames fieldnames.append('county') with open('flights.csv', 'a') as csvfile: writer = csv.DictWriter(csvfile, fieldnames) # writer.writeheader() for num, row in enumerate(d_reader): if num % 100 == 0: print(num, "rows processed") try: result = cg.coordinates(x=row['longitude'], y=row['latitude']) county = result[0]['Counties'] county_name = county[0]['NAME'] new_row = row new_row['county'] = county_name writer.writerow(new_row)
def init(): #clean globals lat = settings.lat lon = settings.lon state = settings.state postal = settings.postal year = settings.year # -------------------------------------------------------------------------------------------------------------------------------- # WEATHER DATA - NSRDB - https://nsrdb.nrel.gov/api-instructions # -------------------------------------------------------------------------------------------------------------------------------- # Declare all variables as strings. Spaces must be replaced with '+', i.e., change 'John Smith' to 'John+Smith'. # You must request an NSRDB api key from the link above api_key = 'JJQmF8CNU7qDCgGptU1krnjESLAa4RBzH2aOaOrs' # Set the attributes to extract (e.g., dhi, ghi, etc.), separated by commas. attributes = 'ghi,dhi,dni,wind_speed_10m_nwp,surface_air_temperature_nwp,solar_zenith_angle' # Choose year of data #year = '2015' # Set leap year to true or false. True will return leap day data if present, false will not. leap_year = 'false' # Set time interval in minutes, i.e., '30' is half hour intervals. Valid intervals are 30 & 60. interval = '30' # Specify Coordinated Universal Time (UTC), 'true' will use UTC, 'false' will use the local time zone of the data. # NOTE: In order to use the NSRDB data in SAM, you must specify UTC as 'false'. SAM requires the data to be in the # local time zone. utc = 'true' # Your full name, use '+' instead of spaces. your_name = 'Jon+James' # Your reason for using the NSRDB. reason_for_use = 'Find+best+places+to+put+solar+panels.' # Your affiliation your_affiliation = 'simplyenable' # Your email address your_email = '*****@*****.**' # Please join our mailing list so we can keep you up-to-date on new developments. mailing_list = 'false' # Declare url string nsrdb = 'http://developer.nrel.gov/api/solar/nsrdb_0512_download.csv?wkt=POINT({lon}%20{lat})&names={year}&leap_day={leap}&interval={interval}&utc={utc}&full_name={name}&email={email}&affiliation={affiliation}&mailing_list={mailing_list}&reason={reason}&api_key={api}&attributes={attr}'.format( year=year, lat=lat, lon=lon, leap=leap_year, interval=interval, utc=utc, name=your_name, email=your_email, mailing_list=mailing_list, affiliation=your_affiliation, reason=reason_for_use, api=api_key, attr=attributes) # Return just the first 2 lines to get metadata: weather = pd.read_csv(nsrdb, nrows=1) # See metadata for specified properties, e.g., timezone and elevation timezone, elevation = weather['Local Time Zone'], weather['Elevation'] radiance = weather['Fill Flag 2'] sky = weather['Fill Flag 3'] settings.weather = weather settings.radiance = radiance settings.sky = sky # -------------------------------------------------------------------------------------------------------------------------------- # POLITICAL DATA - DSIRE - http://www.dsireusa.org/resources/data-and-tools/ # -------------------------------------------------------------------------------------------------------------------------------- dsire = 'http://programs.dsireusa.org/api/v1/getprograms/json?fromSir=0&state={state}'.format( state=state) polreq = requests.get(dsire) settings.politics = polreq.json # -------------------------------------------------------------------------------------------------------------------------------- # ECONOMIC DATA - EIA - https://www.eia.gov/opendata/commands.php # -------------------------------------------------------------------------------------------------------------------------------- eia_key = 'afe0bc288e7de03842e061ac596b9301' #series IDs found from data browser - https://www.eia.gov/electricity/data/browser/ #timeline monthly = 'M' quarterly = 'Q' yearly = 'Y' timeline = monthly #sector all = 'ALL' residential = 'RES' commercial = 'COM' Industrial = 'IND' transportation = 'TRA' other = 'OTH' sector = residential #dataset avg_price = 'ELEC.PRICE' avg_rev = 'ELEC.REV' dataset = avg_price #generate series ID series_id = '{dataset}.{state}-{sector}.{timeline}'.format( dataset=dataset, state=state, sector=sector, timeline=timeline) eia = 'http://api.eia.gov/series/?api_key={eia_key}&series_id={series_id}'.format( eia_key=eia_key, series_id=series_id) #request e = requests.get(eia) settings.economics = e.json # -------------------------------------------------------------------------------------------------------------------------------- # DEMOGRAPHIC DATA - CENSUS - https://api.census.gov/data/2015/acs5/variables.html # -------------------------------------------------------------------------------------------------------------------------------- #Census geocoder to get regional codes for the location cg = CensusGeocode() cg_results = cg.coordinates(x=lon, y=lat) #Census regional codes c_state = cg_results[0]['Census Tracts'][0]['STATE'] c_county = cg_results[0]['Census Tracts'][0]['COUNTY'] c_tract = cg_results[0]['Census Tracts'][0]['TRACT'] #Census instantiate with API Key c = Census("fb10dd39ec721dda4caf2baf5eed40a57f724084") #Census variables #Median Household Income by Household Size size = 'B19019_001E' #Aggregate household income in the past 12 months agg = 'B19025_001E' #Age of Householder by Household Income age = 'B19037_001E' #INCOME IN THE PAST 12 MONTHS income = 'B06011_001E' #retrieve the census data d = c.acs5.state_county_tract(('NAME', size), c_state, c_county, c_tract) settings.demographics = d return
import requests import pdb from censusgeocode import CensusGeocode cg = CensusGeocode() def example(lat, long): # grab some lat/long coords from wherever. For this example, # I just opened a javascript console in the browser and ran: # # navigator.geolocation.getCurrentPosition(function(p) { # console.log(p); # }) # latitude = 35.1330343 longitude = -90.0625056 # Did the geocoding request comes from a device with a # location sensor? Must be either true or false. sensor = 'true' # Hit Google's reverse geocoder directly # NOTE: I *think* their terms state that you're supposed to # use google maps if you use their api for anything. base = "http://maps.googleapis.com/maps/api/geocode/json?" params = "latlng={lat},{lon}&sensor={sen}".format(lat=latitude, lon=longitude, sen=sensor) url = "{base}{params}".format(base=base, params=params) response = requests.get(url)
class CensusGeoCodeTestCase(unittest.TestCase): cg = None def setUp(self): self.cg = CensusGeocode() @vcr.use_cassette('tests/fixtures/coordinates.yaml') def test_returns_geo(self): results = self.cg.coordinates(-74, 43, returntype='geographies') assert isinstance(results, GeographyResult) assert results.input @vcr.use_cassette('tests/fixtures/coordinates.yaml') def test_coords(self): results = self.cg.coordinates(-74, 43) assert results['Counties'][0]['BASENAME'] == 'Saratoga' assert results['Counties'][0]['GEOID'] == '36091' assert results['Census Tracts'][0]['BASENAME'] == "615" def test_url(self): r = self.cg._geturl('coordinates', 'geographies') assert r == 'https://geocoding.geo.census.gov/geocoder/geographies/coordinates' @vcr.use_cassette('tests/fixtures/address-geographies.yaml') def test_address(self): results = self.cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500') assert results[0] assert results[0]['geographies']['Counties'][0]['BASENAME'] == 'District of Columbia' @vcr.use_cassette('tests/fixtures/onelineaddress.yaml') def test_onelineaddress(self): results = self.cg.onelineaddress('1600 Pennsylvania Avenue NW, Washington, DC, 20500', layers='all') assert results[0] try: assert results[0]['geographies']['Counties'][0]['BASENAME'] == 'District of Columbia' except AssertionError: print(results[0]['geographies']['Counties'][0]) raise assert 'Metropolitan Divisions' in results[0]['geographies'].keys() assert 'Alaska Native Village Statistical Areas' in results[0]['geographies'].keys() @vcr.use_cassette('tests/fixtures/address-locations.yaml') def test_address_return_type(self): results = self.cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500', returntype='locations') assert results[0]['matchedAddress'].upper() == '1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20502' assert results[0]['addressComponents']['streetName'] == 'PENNSYLVANIA' @vcr.use_cassette('tests/fixtures/test_benchmark_vintage.yaml') def test_benchmark_vintage(self): bmark, vint = 'Public_AR_Census2010', 'Census2010_Census2010' cg = CensusGeocode(benchmark=bmark, vintage=vint) result = cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500', returntype='geographies') self.assertEqual(result.input['benchmark']['benchmarkName'], bmark) self.assertEqual(result.input['vintage']['vintageName'], vint) self.assertEqual(result[0]['geographies']['Census Tracts'][0]['GEOID'], '11001006202') @vcr.use_cassette('tests/fixtures/address-batch.yaml') def test_addressbatch(self): result = self.cg.addressbatch('tests/fixtures/batch.csv', returntype='locations') assert isinstance(result, list) resultdict = {int(r['id']): r for r in result} assert resultdict[3]['parsed'] == '3 GRAMERCY PARK W, NEW YORK, NY, 10003' assert resultdict[2]['match'] is False result = self.cg.addressbatch('tests/fixtures/batch.csv', returntype='geographies') assert isinstance(result, list) resultdict = {int(r['id']): r for r in result} assert resultdict[3]['tigerlineid'] == '59653655' assert resultdict[3]['statefp'] == '36' assert resultdict[2]['match'] is False
select entity_id, json_build_object('address1', trim(lower(address_1)), 'address2', trim(lower(address_1b)), 'city', trim(lower(city_1)), 'state', trim(lower(state_1)), 'zipcode', zipcode_1) as address, enroll_date::date as update_date from raw.epic_adress join patients_ucm.main ON mrn = patient_id::int join raw.final_mrns USING (mrn) ''') df = query_db(query, connection) print(df.head()) cg = CensusGeocode() def getGeocode(x): try: address = " ".join([str(i) for i in filter(None, x.values())]) #result = cg.onelineaddress(address) #geoid = result[0]['geographies']['Census Tracts'][0]['GEOID'] except: #del x['address2'] print(address) return None return geoid df['geoid'] = df['address'].apply(getGeocode)
def census_geocode(datafile, delim, header, start, addcol): """ (str,str,str,int,list[int]) -> files Datafile is file or path to process, delim is file's delimiter, specify if file has header row with y or n, specify 0 to read from beginning of file or index # to resume, addcol is a list of column numbers containing address components. """ import csv, locale, traceback, time, datetime from urllib import error from censusgeocode import CensusGeocode cg = CensusGeocode() #Function for adding and summing entries in dictionaries def sumdict(theval, thedict): if theval in thedict: thedict[theval] = thedict[theval] + 1 else: thedict[theval] = 1 #Open files, set up environments. Match lists are for debugging; results #are written to output files as each record is handled. Headers added #based on user input. Types of non-matches stored in a dictionary for #output to report. Users should verify that input files are in UTF-8 before #matching. if type(addcol) is not list: print( 'Position numbers with address components must be provided in a list, i.e. [3] or [3,4,5,6]' ) raise SystemExit if len(addcol) == 1: unparsed = addcol[0] - 1 elif len(addcol) == 4: addstreet = addcol[0] - 1 thecity = addcol[1] - 1 thestate = addcol[2] - 1 thezip = addcol[3] - 1 else: print( 'Inappropriate number of positions given - provide either 1 for unparsed or 4 for parsed' ) raise SystemExit if header.lower() == 'y' or 'yes' or 'n' or 'no': pass else: print("Must indicate whether there is a header row with 'y' or 'n'") raise SystemExit matched = [] nomatch = [] matchfails = {} counter = 0 namefile = datafile[:-4] if datafile[-4:] == '.csv': ext = '.csv' else: ext = '.txt' readfile = csv.reader(open(datafile, 'r', encoding='utf-8', errors='ignore'), delimiter=delim) matchfile = open(namefile + '_matched' + ext, 'a', newline='', encoding='utf-8', errors='ignore') matchwrite = csv.writer(matchfile, delimiter=delim, quotechar='"', quoting=csv.QUOTE_MINIMAL) nomatchfile = open(namefile + '_nomatch' + ext, 'a', newline='', encoding='utf-8', errors='ignore') nomatchwrite = csv.writer(nomatchfile, delimiter=delim, quotechar='"', quoting=csv.QUOTE_MINIMAL) if header.lower() == ('y' or 'yes') and int(start) == 0: headrow = next(readfile) headnomatch = list(headrow) headnomatch.append('error') nomatchwrite.writerow(headnomatch) headmatch = list(headrow) newhead = [ 'matched_add', 'longitude', 'latitude', 'ansifips', 'stateid', 'countyid', 'tractid', 'blkgrpid', 'blkid', 'block', 'tract', 'county', 'state' ] headmatch.extend(newhead) matchwrite.writerow(headmatch) print('Match process launched...') #Start reading the file from the given row number; #if there is no result (no match) write record to no match list and output file; #if record has matches, take relevant data from the first match, #append it to the address and add it to the matched list and output file. #Outside try / except handles all errors, breaks off matching and writes report. #Inside try / except while true handles server time out, tries to rematch, or #if input is bad gives up after 5 times and writes no match. While true breaks #if no exception raised, moves on to next record. Internal i in range does rematch #if result returns no geography due to java error - by default a status key returns #no value if everything is ok, but returns a message value if there's a problem. #If there is a status value, tries again up to 3 times before giving up, then writes #no match. Otherwise in range loop breaks if a clean no match or match is made, #proceeds to next record. for index, record in enumerate(readfile): try: if index < int(start): continue else: error_count = 0 record = [x.strip() for x in record] while True: try: for i in range(4): if len(addcol) == 1: result = cg.onelineaddress(record[unparsed]) else: result = cg.address(record[addstreet], city=record[thecity], state=record[thestate], zipcode=record[thezip]) if len(result) == 0: record.append('Match not found') nomatch.append(record) sumdict(record[-1], matchfails) nomatchwrite.writerow(record) else: geo = result[0].get('geographies') blockinfo = geo.get('2010 Census Blocks') tractinfo = geo.get('Census Tracts') countyinfo = geo.get('Counties') stateinfo = geo.get('States') problemlist = [ blockinfo[0].get('status'), tractinfo[0].get('status'), countyinfo[0].get('status'), stateinfo[0].get('status') ] if any(v is not None for v in problemlist): if i < 3: print( 'Trying to return geography at index ' + str(index)) time.sleep(1) continue else: print( 'Writing a no match for failed geography at index ' + str(index)) record.append( 'Failed to return geography') nomatch.append(record) sumdict(record[-1], matchfails) nomatchwrite.writerow(record) else: ansifips = blockinfo[0].get('GEOID') stateid = ansifips[0:2] countyid = ansifips[2:5] tractid = ansifips[5:11] blkgrpid = ansifips[11] blkid = ansifips[11:] blkname = blockinfo[0].get('NAME') trctname = tractinfo[0].get('NAME') coname = countyinfo[0].get('NAME') stname = stateinfo[0].get('NAME') match = result[0].get('matchedAddress') coord = result[0].get('coordinates') lng = str(coord.get('x')) lat = str(coord.get('y')) newitems = match, lng, lat, ansifips, stateid, countyid, tractid, blkgrpid, blkid, blkname, trctname, coname, stname record.extend(newitems) matched.append(record) matchwrite.writerow(record) break counter = counter + 1 time.sleep(1) if counter % 100 == 0: print(counter, ' records processed so far...') print('Last record written was:') print(record) if counter % 1000 == 0: time.sleep(5) except error.HTTPError as server_error: if server_error.code == 500: error_count = error_count + 1 if error_count < 5: print( 'Got a server error, will try again from index ' + str(index)) time.sleep(2) continue else: print( 'Writing a no match as server failed to return result at index ' + str(index)) record.append('Server failed to return result') nomatch.append(record) sumdict(record[-1], matchfails) counter = counter + 1 nomatchwrite.writerow(record) break except Exception as e: print('An error has occurred. File stopped at index ' + str(index)) traceback.print_exc() break #Close all files, write match summaries to report matchfile.close() nomatchfile.close() nomatch_cnt = len(nomatch) matched_cnt = len(matched) print(counter, ' records processed in total.') print(matched_cnt, ' records matched and ', nomatch_cnt, ' records had no matches.') ts = datetime.datetime.now().strftime("%Y_%m_%d_%H%M") report = open(namefile + '_report_' + ts + '.txt', 'w') report.write('Summary of Census Geocoding Output for ' + datafile + ' on ' + ts + '\n' + '\n') report.write(str(counter) + ' records processed in total.' + '\n') report.write(str(matched_cnt) + ' records matched' + '\n') report.write(str(nomatch_cnt) + ' records had no matches' + '\n' + '\n') report.write('For the unmatched records, results and errors:' + '\n') for k, v in sorted(matchfails.items()): report.writelines('\t' + ': '.join([k, str(v)]) + '\n') report.close()
# See: https://docs.python.org/3/reference/import.html from censusgeocode import CensusGeocode # This dictionary will map the counties in the input file to human-readable borough names # Knowing which values to put here required looking at the input files carefully counties = { 'BX': 'Bronx', 'Q': 'Queens', 'NY': 'New York', 'K': 'Brooklyn', 'R': 'Staten Island' } # Create a new CensusGeocode object # See the docs here: https://github.com/fitnr/censusgeocode cg = CensusGeocode() # define the input and output file names infile = 'citations.csv' outfile = 'new-citations.csv' # Open the input file for reading with open(infile) as f: # The DictReader will return every row of the CSV as a dictionary reader = csv.DictReader(f) # Open up the output file for writing. This is delete the contents, if any. with open(outfile, 'w') as g: # The DictWriter will write a dictionary, but we must tell it the names of the output fields # We add three new fields to reader.fieldnames (a list) using list add operator (+) newfieldnames = reader.fieldnames + [