def find_lat_long(current_zip): #find_lat_long takes a zipcode as a parameter and uses pgeocode to #find the latitude and longitude of the zipcode #as well as make a reference to the name of the location geolocator = Nominatim('us') lat = geolocator.query_postal_code(current_zip)[-3] long = geolocator.query_postal_code(current_zip)[-2] current_data = check_db(current_zip) plot_lat_long(lat, long, current_data['City'])
def test_nominatim_query_postal_code(): nomi = Nominatim("fr") res = nomi.query_postal_code(["91120"]) assert isinstance(res, pd.DataFrame) assert res.shape[0] == 1 assert res.place_name.values[0] == "Palaiseau" res = nomi.query_postal_code("91120") assert isinstance(res, pd.Series) assert res.place_name == "Palaiseau" res = nomi.query_postal_code(["33625", "31000", "99999"]) assert res.shape[0] == 3 assert not np.isfinite(res.iloc[2].latitude)
def test_nominatim_query_postal_code(): nomi = Nominatim('fr') res = nomi.query_postal_code(['91120']) assert isinstance(res, pd.DataFrame) assert res.shape[0] == 1 assert res.place_name.values[0] == 'Palaiseau' res = nomi.query_postal_code('91120') assert isinstance(res, pd.Series) assert res.place_name == 'Palaiseau' res = nomi.query_postal_code(['33625', '31000', '99999']) assert res.shape[0] == 3 assert not np.isfinite(res.iloc[2].latitude)
def test_countries(country, pc1, location1, pc2, location2, distance12): nomi = Nominatim(country) res = nomi.query_postal_code(pc1) assert isinstance(res, pd.Series) assert _normalize_str(location1) in _normalize_str(res.place_name) res = nomi.query_postal_code(pc2) assert isinstance(res, pd.Series) assert _normalize_str(location2) in _normalize_str(res.place_name) gdist = GeoDistance(country) dist = gdist.query_postal_code(pc1, pc2) assert isinstance(dist, float) assert dist == pytest.approx(distance12, abs=5)
def test_download_dataset(temp_dir): assert not os.path.exists(os.path.join(temp_dir, "FR.txt")) nomi = Nominatim("fr") # the data file was downloaded assert os.path.exists(os.path.join(temp_dir, "FR.txt")) res = nomi.query_postal_code("77160") nomi2 = Nominatim("fr") res2 = nomi.query_postal_code("77160") assert_array_equal(nomi._data.columns, nomi2._data.columns) assert_array_equal(nomi._data_frame.columns, nomi2._data_frame.columns) assert nomi._data.shape == nomi._data.shape assert nomi._data_frame.shape == nomi._data_frame.shape assert len(res.place_name.split(",")) > 1 assert len(res2.place_name.split(",")) > 1
def test_download_dataset(temp_dir): assert not os.path.exists(os.path.join(temp_dir, 'FR.txt')) nomi = Nominatim('fr') # the data file was downloaded assert os.path.exists(os.path.join(temp_dir, 'FR.txt')) res = nomi.query_postal_code('77160') nomi2 = Nominatim('fr') res2 = nomi.query_postal_code('77160') assert_array_equal(nomi._data.columns, nomi2._data.columns) assert_array_equal(nomi._data_unique.columns, nomi2._data_unique.columns) assert nomi._data.shape == nomi._data.shape assert nomi._data_unique.shape == nomi._data_unique.shape assert len(res.place_name.split(',')) > 1 assert len(res2.place_name.split(',')) > 1
def test_countries(country, pc1, location1, pc2, location2, distance12): if country == "IE": pytest.xfail("TODO: Investigate failure for IE") nomi = Nominatim(country) res = nomi.query_postal_code(pc1) assert isinstance(res, pd.Series) assert _normalize_str(location1) in _normalize_str(res.place_name) assert "country_code" in res.index res = nomi.query_postal_code(pc2) assert isinstance(res, pd.Series) assert _normalize_str(location2) in _normalize_str(res.place_name) gdist = GeoDistance(country) dist = gdist.query_postal_code(pc1, pc2) assert isinstance(dist, float) assert dist == pytest.approx(distance12, abs=5)
class PgeocodeWrapper: def __init__(self, partition_size, storage_dir, country='US'): """requires separate inputs for each country""" self.partition_size = partition_size self.storage_dir = storage_dir logging.info('Storage at %s.' % (storage_dir)) self.df = pd.DataFrame() if country in list(pgeocode_country_codes): self.country_code = pgeocode_country_codes[country] elif country.upper() in list(pgeocode_country_codes.values()): self.country_code = country logging.info('initializing with country_code %s.' % self.country_code) self.geolocator = Nominatim(self.country_code) def geocode(self, x): """takes zipcode""" locations = self.geolocator.query_postal_code(x) return locations.latitude, locations.longitude def manage_partitioning(self, partition_number): start = self.start_i end = start + self.partition_size partition = self.df[start:end].copy() _zips = partition.zipcode.tolist() latitudes, longitudes = self.geocode(_zips) partition['latitude'] = latitudes.tolist() partition['longitude'] = longitudes.tolist() filename = '%s_part_%s.csv' % (self.country_code, partition_number) filepath = os.path.join(self.storage_dir, filename) partition.to_csv(filepath, index=False) self.start_i += self.partition_size return list(latitudes), list(longitudes) def run(self): if self.df.empty: pass self.start_i = 0 n_partitions = int(np.ceil(len(self.df)/self.partition_size)) latitudes, longitudes = [], [] try: for i in range(n_partitions): lats, lons = self.manage_partitioning(i) latitudes += lats longitudes += lons self.df['latitude'] = latitudes self.df['longitude'] = longitudes except Exception as e: logging.error('Partition failed. Error: %s' % e) traceback.print_exc() # for initial dev TODO: create debug mode
def zip_to_coords(local_zip_code, local_country='us'): # This is very slow and should only be called when necessary print("Retrieving location data...") local_nomi = Nominatim(local_country) local_location_details = local_nomi.query_postal_code(local_zip_code) print("IDENTIFIED: {}, {} - located at ({}, {}) in {}.".format( local_location_details[2], local_location_details[4], local_location_details[9], local_location_details[10], local_location_details[1])) # These should always work presuming the index for the pandas.core.series.Series object type isn't changed local_latitude = local_location_details[9] local_longitude = local_location_details[10] local_coords = [local_latitude, local_longitude] return local_coords
def get_hourly_forecasts(country_code, zip_code, unit): """ Fetches hourly forecast information from the Weather.gov API, for a given country and zip code. Params: country_code (str) the requested country, like "US" zip_code (str) the requested postal code, like "20057" Example: result = get_hourly_forecasts(country_code="US", zip_code="20057") Returns the forecast info "hourly_forecasts" along with more information about the requested geography ("city_name"). """ geocoder = Geocoder(country_code) geo = geocoder.query_postal_code(zip_code) # using a null-checking method from pandas because geo is a pandas Series: if isnull(geo.latitude) or isnull(geo.longitude) or isnull( geo.place_name) or isnull(geo.state_code): return None # unfortunately the weather.gov api makes us do two requests or use a more sophisticated caching strategy (see api docs) request_url = f"https://api.weather.gov/points/{geo.latitude},{geo.longitude}" response = requests.get(request_url) if response.status_code != 200: return None parsed_response = json.loads(response.text) forecast_url = parsed_response["properties"]["forecastHourly"] forecast_response = requests.get(forecast_url) if forecast_response.status_code != 200: return None parsed_forecast_response = json.loads(forecast_response.text) # consider returning the raw geo and parsed_forecast_response objects, # ... and using a different method to parse them further! # ... but we're doing that here for now as well: city_name = f"{geo.place_name}, {geo.state_code}" #> Washington, DC hourly_forecasts = [] for period in parsed_forecast_response["properties"]["periods"][0:24]: hourly_forecasts.append({ "timestamp": format_hour(period["startTime"]), "temp": format_temp(period["temperature"], period["temperatureUnit"], unit), "conditions": period["shortForecast"], "image_url": period["icon"] }) return {"city_name": city_name, "hourly_forecasts": hourly_forecasts}
def test_nominatim_query_postal_code_multiple(): nomi = Nominatim("de", unique=False) expected_places = [ "Wellen", "Groß Rodensleben", "Irxleben", "Eichenbarleben", "Klein Rodensleben", "Niederndodeleben", "Hohendodeleben", "Ochtmersleben", ] res = nomi.query_postal_code("39167") assert isinstance(res, pd.DataFrame) assert res.shape[0] == len(expected_places) for place in res.place_name.values: assert place in expected_places
def test_nominatim_query_postal_code_multiple(): nomi = Nominatim('de', unique=False) expected_places = [ 'Wellen', 'Groß Rodensleben', 'Irxleben', 'Eichenbarleben', 'Klein Rodensleben', 'Niederndodeleben', 'Hohendodeleben', 'Ochtmersleben', ] res = nomi.query_postal_code('39167') assert isinstance(res, pd.DataFrame) assert res.shape[0] == len(expected_places) for place in res.place_name.values: assert place in expected_places
def test_nominatim_all_countries(country): nomi = Nominatim(country) res = nomi.query_postal_code("00000") assert isinstance(res, pd.Series)