예제 #1
0
def parse_csv(csv_file, verbose):
    """ Builds urls based on street address and city.

    Args:
        Takes a csv file passed in as cli argument.

    Returns:
        A list of properly formatted urls for querying
        lat lng using Google Geocode.
    """
    get_url = BuildURL("https://maps.googleapis.com/maps/api/geocode/json?address=", state="GA")

    street = compile("\d+\s\w.*")
    msg = "\nWould you like to use an api key this time? If not you will be prompted to add one as the query limit\n" \
          "is reached. 2500 Queries will run without one just fine: y/n "

    with open(csv_file, 'r') as open_csv:
        csv_stream_object = reader(open_csv)
        headers = next(csv_stream_object, None)

        (city_header_index, tract_header_index) = scrape_headers(headers)

        google_api_key = require_api(msg)

        print("\nPlease wait, Generating coordinate urls...\n")
        sleep(1)

        address = {'street': None, 'city': None}

        list_geocode_urls, list_of_cities = [], []
        for row in csv_stream_object:
            try:
                list_of_cities.append(row[city_header_index])
                # Build list of possible serving territories based on city
                # from the index position of Tax City
            except UnboundLocalError as err:
                print(err)
                # logger.warn("Error: {}, You must have at least 'Tax City', 'Census Tract'"
                #             " and 'Service Address' headers in your file ".format(err))

            for field in row:
                if search(street, field):
                    # find street in fields
                    address['city'] = row[row.index(field)]

            for city in set(list_of_cities):
                # Assign a city for the query from dynamic list
                try:
                    if city in row:
                        address['street'] = row[row.index(city)]
                except:
                    raise

            list_geocode_urls.append(get_url.geo_url(address, verbose, api_key=google_api_key))

        print("Done...\n")
    return list_geocode_urls, tract_header_index
예제 #2
0
def main(csv, verbose=None):
    print("Welcome to the 477 report generator")

    print("""
    TODO: (Does not effect current functionality.)

        #  Cut out tracts that don't start with 1304.
        #  Refactor file splitter code as large files are limited by 2500 queries a day or use multiple API keys
        #  Refactor elements when time permits, shift towards OOP where possible

    """)

    input("Use -h for more options. Press <Enter> to continue:")

    # CREATE TIMESTAMP/LOG FILE NAME THAT'S OS SENSITIVE
    date = CheckSystemDate()
    log = date.system_date()

    # TODO refactor file splitter code as large files are limited by 2500 queries a day or use multiple API keys
    # final_csv = split_file(csv)

    # PARSE CSV BUILD LOCATION URLS, POP OFF INDEX TO WRITE BACK TO
    (geocode_urls, tract_header_index) = parse_csv(csv, verbose)

    # GET LOCATION INFORMATION
    get_location_date = HttpWorker(geocode_urls, msg="Getting accurate coordinates .. Please wait\n", endpoint="google")
    location_data = get_location_date.get(verbose)

    # BUILD ACCURATE FCC URL FROM LOCATION INFORMATION
    create_fcc_urls = BuildURL("http://data.fcc.gov/api/block/find?format=json&censusYear=2010&")
    fcc_urls = create_fcc_urls.fcc_url(location_data, verbose)

    # DOCUMENT THIS
    (clean_fcc_urls, fcc_error_indices) = filter_url_errors(fcc_urls)

    # GET JSON DATA FOR EACH CUSTOMER
    get_fcc_data = HttpWorker(clean_fcc_urls, msg="Getting accurate fips codes ...Please wait\n", endpoint="fcc")
    tract_data = get_fcc_data.get(verbose)

    # FIPS ARE THE 13 DIGIT CODES
    fips = [code["Block"]["FIPS"] for code in tract_data]
    for error in fcc_error_indices:
        fips.insert(error, "unable to retrieve location or tact data")

    (with_err, no_err) = write_to_csv(fips, csv, tract_header_index, log)

    print("\nWrote updated csv to disk:\n",
          with_err + "\n",
          no_err)