def parse_csv(csv_file, verbose): """ Builds urls based on street address and city. Args: Takes a csv file passed in as cli argument. Returns: A list of properly formatted urls for querying lat lng using Google Geocode. """ get_url = BuildURL("https://maps.googleapis.com/maps/api/geocode/json?address=", state="GA") street = compile("\d+\s\w.*") msg = "\nWould you like to use an api key this time? If not you will be prompted to add one as the query limit\n" \ "is reached. 2500 Queries will run without one just fine: y/n " with open(csv_file, 'r') as open_csv: csv_stream_object = reader(open_csv) headers = next(csv_stream_object, None) (city_header_index, tract_header_index) = scrape_headers(headers) google_api_key = require_api(msg) print("\nPlease wait, Generating coordinate urls...\n") sleep(1) address = {'street': None, 'city': None} list_geocode_urls, list_of_cities = [], [] for row in csv_stream_object: try: list_of_cities.append(row[city_header_index]) # Build list of possible serving territories based on city # from the index position of Tax City except UnboundLocalError as err: print(err) # logger.warn("Error: {}, You must have at least 'Tax City', 'Census Tract'" # " and 'Service Address' headers in your file ".format(err)) for field in row: if search(street, field): # find street in fields address['city'] = row[row.index(field)] for city in set(list_of_cities): # Assign a city for the query from dynamic list try: if city in row: address['street'] = row[row.index(city)] except: raise list_geocode_urls.append(get_url.geo_url(address, verbose, api_key=google_api_key)) print("Done...\n") return list_geocode_urls, tract_header_index
def get(self, verbose): """ Fetch data from URL. Get json resource for url, provide means to continue queries by swapping api_keys Args: An array of urls to be iterated over. an endpoint name to make decisions on how to process url a message for stdout different depending on url Returns: JSON object of the api the url hits. """ print(self.msg) sleep(0.5) query_limit_reached = compile('OVER_QUERY_LIMIT') phase = 'Phase one:' responses = [] for index, url in enumerate(self.urls): try: req = request.urlopen(url) sleep(0.3) res = req.read() responses.append(loads(res.decode("utf-8"))) if verbose: if self.endpoint != "google": phase = 'Phase two:' print(phase, "call status from", self.endpoint, "for", index + 1, "status", responses[index]['status']) print(url) if search(query_limit_reached, responses[index]['status']): # reprocess previously failed url url = self.urls[index - 1] print("\nYou have reached a query limit at url {} for the current apiKey").format(str(index)) (google_api_key, wait) = require_api("Please choose a new api key to apply") # strip out old api key, regenerate urls list for urls_left in self.urls[index:]: try: slice_point = urls_left.index('&key=') stripped_url = urls_left[:slice_point] self.urls[index] = ''.join([stripped_url, google_api_key]) except ValueError as err: if err: self.urls[index] = ''.join([urls_left, google_api_key]) except (error.HTTPError, error.URLError, OSError) as err: print("Error on", url, err.reason) pass return responses