def parse_csv(csv_file, verbose): """ Builds urls based on street address and city. Args: Takes a csv file passed in as cli argument. Returns: A list of properly formatted urls for querying lat lng using Google Geocode. """ get_url = BuildURL("https://maps.googleapis.com/maps/api/geocode/json?address=", state="GA") street = compile("\d+\s\w.*") msg = "\nWould you like to use an api key this time? If not you will be prompted to add one as the query limit\n" \ "is reached. 2500 Queries will run without one just fine: y/n " with open(csv_file, 'r') as open_csv: csv_stream_object = reader(open_csv) headers = next(csv_stream_object, None) (city_header_index, tract_header_index) = scrape_headers(headers) google_api_key = require_api(msg) print("\nPlease wait, Generating coordinate urls...\n") sleep(1) address = {'street': None, 'city': None} list_geocode_urls, list_of_cities = [], [] for row in csv_stream_object: try: list_of_cities.append(row[city_header_index]) # Build list of possible serving territories based on city # from the index position of Tax City except UnboundLocalError as err: print(err) # logger.warn("Error: {}, You must have at least 'Tax City', 'Census Tract'" # " and 'Service Address' headers in your file ".format(err)) for field in row: if search(street, field): # find street in fields address['city'] = row[row.index(field)] for city in set(list_of_cities): # Assign a city for the query from dynamic list try: if city in row: address['street'] = row[row.index(city)] except: raise list_geocode_urls.append(get_url.geo_url(address, verbose, api_key=google_api_key)) print("Done...\n") return list_geocode_urls, tract_header_index
def main(csv, verbose=None): print("Welcome to the 477 report generator") print(""" TODO: (Does not effect current functionality.) # Cut out tracts that don't start with 1304. # Refactor file splitter code as large files are limited by 2500 queries a day or use multiple API keys # Refactor elements when time permits, shift towards OOP where possible """) input("Use -h for more options. Press <Enter> to continue:") # CREATE TIMESTAMP/LOG FILE NAME THAT'S OS SENSITIVE date = CheckSystemDate() log = date.system_date() # TODO refactor file splitter code as large files are limited by 2500 queries a day or use multiple API keys # final_csv = split_file(csv) # PARSE CSV BUILD LOCATION URLS, POP OFF INDEX TO WRITE BACK TO (geocode_urls, tract_header_index) = parse_csv(csv, verbose) # GET LOCATION INFORMATION get_location_date = HttpWorker(geocode_urls, msg="Getting accurate coordinates .. Please wait\n", endpoint="google") location_data = get_location_date.get(verbose) # BUILD ACCURATE FCC URL FROM LOCATION INFORMATION create_fcc_urls = BuildURL("http://data.fcc.gov/api/block/find?format=json&censusYear=2010&") fcc_urls = create_fcc_urls.fcc_url(location_data, verbose) # DOCUMENT THIS (clean_fcc_urls, fcc_error_indices) = filter_url_errors(fcc_urls) # GET JSON DATA FOR EACH CUSTOMER get_fcc_data = HttpWorker(clean_fcc_urls, msg="Getting accurate fips codes ...Please wait\n", endpoint="fcc") tract_data = get_fcc_data.get(verbose) # FIPS ARE THE 13 DIGIT CODES fips = [code["Block"]["FIPS"] for code in tract_data] for error in fcc_error_indices: fips.insert(error, "unable to retrieve location or tact data") (with_err, no_err) = write_to_csv(fips, csv, tract_header_index, log) print("\nWrote updated csv to disk:\n", with_err + "\n", no_err)