def geocode(fp): """ The geocode takes a .csv file of addresses, and geocodes the text in the column named 'addr' into point features Args: fp (string): filepath of the address .csv file Returns: points (geodataframe): geodataframe of the locations as point features """ from geopandas.tools import geocode # Read the data data = pd.read_csv(fp, sep=';') # Geocode addresses using the provider Nominatim. # A custom "application name" is needed as the user_agent parameter! geo = geocode(data['addr'], provider='nominatim', user_agent='qmslup_group9', timeout=4) points = geo.join(data) return points
def ShortestPath(orig_address, dest_address): """ This function takes a two addresses in as an input (origin and destination). The addresses should be strings. Then it calculates he quickest route from origin to destination by driving and presents that on a map. Please note that the function only works in Helsinki because the network data is heavy, that's why it might be slow. """ #retrieve the driving network from Helsinki. I first did this with bike network data but none of the nodes I tried were connected to biking network so I changed that to driving for the sake of convenience. place_name = "Helsinki, Finland" graph = ox.graph_from_place(place_name, network_type="drive") #project the graph graph_proj = ox.project_graph(graph) #access projected nodes and edges nodes_proj, edges_proj = ox.graph_to_gdfs(graph_proj, nodes=True, edges=True) #create a dataframe with the addresses as an input d = {"id": [1, 2], "addr": [orig_address, dest_address]} data = pd.DataFrame(data=d) #geocode addresses so the dataframe becomes a geodataframe (with location information extracted by geocoding) geo = geocode(data["addr"], provider="nominatim", user_agent="SK", timeout=4) #check CRS systems, they are different CRS(geo.crs).to_epsg() CRS(edges_proj.crs).to_epsg() #convert geo to same CRS system with projected network geo = geo.to_crs(epsg=32635) #make a tuple of the x and y coordinates by retrieving them from dataframe. Y is first because it is latitude infomation orig_xy = [geo["geometry"][0].y, geo["geometry"][0].x] dest_xy = [geo["geometry"][1].y, geo["geometry"][1].x] #find the nearest node to the origin and target point coordinates with euclidean distance orig_node = ox.get_nearest_node(graph_proj, orig_xy, method='euclidean') target_node = ox.get_nearest_node(graph_proj, dest_xy, method='euclidean') # Calculate the shortest path (using Djikstra's algorithm) route = nx.shortest_path(G=graph_proj, source=orig_node, target=target_node, weight='time') # Plot the shortest path fig, ax = ox.plot_graph_route(graph_proj, route, origin_point=orig_xy, destination_point=dest_xy) #save and return figure output_fig = "outputs/shortest_path_from_" + orig_address + "_to_" + dest_address + ".png" plt.savefig(output_fig) return fig
def do_geocode(row, attempt=1, max_attempts=max_att): try: return geocode(row['Address'],provider='nominatim') except GeocoderTimedOut: time.sleep(sec) if attempt<=max_attempts: return do_geocode(row, attempt=attempt+1, max_attempts=max_att) raise idx return
def geocoder(row): try: point = geocode(row, provider='nominatim').geometry.iloc[0] return pd.Series({ "Latitude": point.y, "Longitude": point.x, "geometry": point }) except: return None
def my_geocoder(row): print(row) try: point = geocode(row, provider='nominatim').geometry.iloc[0] return pd.Series({ 'Latitude': point.y, 'Longitude': point.x, 'geometry': point }) except: return None
def companies_geocoder(row): try: address_full = geocode(row, provider='nominatim', timeout=5, scheme='http') address = address_full.address.iloc[ 0] #Getting an adress line as a string point = address_full.geometry.iloc[0] return pd.Series({'Y': point.y, 'X': point.x, 'Address': address}) except: return None
def test_forward(self): with mock.patch('geopy.geocoders.googlev3.GoogleV3.geocode', ForwardMock()) as m: g = geocode(self.locations, provider='googlev3', timeout=2) self.assertEqual(len(self.locations), m.call_count) n = len(self.locations) self.assertIsInstance(g, gpd.GeoDataFrame) expected = GeoSeries([Point(float(x) + 0.5, float(x)) for x in range(n)], crs=from_epsg(4326)) assert_geoseries_equal(expected, g['geometry']) tm.assert_series_equal(g['address'], pd.Series(self.locations, name='address'))
def geo_code(df): df_new = df.copy() address = df.Address + ', ' + df.City + ', ' + df.State + ' ' + df.Zip.map( str) try: geo = geocode(address, provider='arcgis') except: print('Error geocoding address. Aborting script') sys.exit() df_new['Longitude'] = geo.geometry.x df_new['Latitude'] = geo.geometry.y df_new['AddressFound'] = geo.address return df_new
def test_forward(locations, points): from geopy.geocoders import GeocodeFarm for provider in ['geocodefarm', GeocodeFarm]: with mock.patch('geopy.geocoders.GeocodeFarm.geocode', ForwardMock()) as m: g = geocode(locations, provider=provider, timeout=2) assert len(locations) == m.call_count n = len(locations) assert isinstance(g, GeoDataFrame) expected = GeoSeries( [Point(float(x) + 0.5, float(x)) for x in range(n)], crs=from_epsg(4326)) assert_geoseries_equal(expected, g['geometry']) assert_series_equal(g['address'], pd.Series(locations, name='address'))
def test_forward(locations, points): from geopy.geocoders import Photon for provider in ["photon", Photon]: with mock.patch("geopy.geocoders.Photon.geocode", ForwardMock()) as m: g = geocode(locations, provider=provider, timeout=2) assert len(locations) == m.call_count n = len(locations) assert isinstance(g, GeoDataFrame) expected = GeoSeries( [Point(float(x) + 0.5, float(x)) for x in range(n)], crs="EPSG:4326") assert_geoseries_equal(expected, g["geometry"]) assert_series_equal(g["address"], pd.Series(locations, name="address"))
def geocode_locations(df: gpd.GeoDataFrame, loc_col: str): """ Geocode location names into polygon coordinates Parameters ---------- df: Geopandas DataFrame loc_col:str name of column in df which contains locations Returns ------- """ locations = geocode(df.loc[:, loc_col]) df["geometry"] = locations.loc[:, "geometry"] df["address"] = locations.loc[:, "address"] return df
def test_forward(locations, points): from geopy.geocoders import GoogleV3 for provider in ['googlev3', GoogleV3]: with mock.patch('geopy.geocoders.googlev3.GoogleV3.geocode', ForwardMock()) as m: g = geocode(locations, provider=provider, timeout=2) assert len(locations) == m.call_count n = len(locations) assert isinstance(g, GeoDataFrame) expected = GeoSeries( [Point(float(x) + 0.5, float(x)) for x in range(n)], crs=from_epsg(4326)) assert_geoseries_equal(expected, g['geometry']) assert_series_equal(g['address'], pd.Series(locations, name='address'))
def checkLocation(self,location): try: info = geocode(location, provider = 'nominatim') #get information about input location self.name = info['address'].loc[0] #get name of location self.long = info['geometry'].loc[0].x #get longitude of input location self.lat = info['geometry'].loc[0].y #get latitude of input location self.dist = [] self.num = [] i = 0 try: while True: self.dist.append(float(math.sqrt((self.data[i][6] - self.lat)**2 + (self.data[i][7] - self.long)**2))) #create list of distances self.num.append(i) i+=1 except: self.dist,self.num = zip(*sorted(zip(self.dist,self.num))) #sort both list according to smallest distance if self.dist[0] < 5: return "LocationFound" else: return "TooFar" except: return "LocationNotFound"
def test_openmapquest_forward(self): g = geocode(self.locations, provider='openmapquest', timeout=2) self.assertIsInstance(g, gpd.GeoDataFrame)
def test_bad_provider_forward(): from geopy.exc import GeocoderNotFound with pytest.raises(GeocoderNotFound): geocode(['cambridge, ma'], 'badprovider')
def my_geocoder(row): try: point = geocode(row, provider='nominatim').geometry[0] return pd.Series({'Latitude': point.y, 'Longitude': point.x}) except: return none
#Geocoder from geopandas.tools import geocode result = geocode("The Great Pyramid of Giza", provider="nominatim") result point = result.geometry.iloc[0] print("Latitude:", point.y) print("Longitude:", point.x) universities = pd.read_csv("../input/geospatial-learn-course-data/top_universities.csv") universities.head() def my_geocoder(row): try: point = geocode(row, provider='nominatim').geometry.iloc[0] return pd.Series({'Latitude': point.y, 'Longitude': point.x, 'geometry': point}) except: return None universities[['Latitude', 'Longitude', 'geometry']] = universities.apply(lambda x: my_geocoder(x['Name']), axis=1) print("{}% of addresses were geocoded!".format( (1 - sum(np.isnan(universities["Latitude"])) / len(universities)) * 100)) # Drop universities that were not successfully geocoded universities = universities.loc[~np.isnan(universities["Latitude"])] universities = gpd.GeoDataFrame(universities, geometry=universities.geometry) universities.crs = {'init': 'epsg:4326'} universities.head()
* date: 12-01-21 * description:This script fetch the lat and long nominatim open street API using the Addresses in the Address.csv """ import pandas as pd # pip install pandas from geopandas.tools import geocode # follow https://geopandas.org/install.html import geopy # pip install geopy import time # changing the default user_agent name geopy.geocoders.options.default_user_agent = "magdy" # Reading the addresses csv file into dataframe addresses = pd.read_csv('address.csv', usecols=["Address"]) # looping through the addresses and fitch the long and lat for index, row in addresses.iterrows(): try: print(row['Address']) # fetching each row (Address) from geopandas and using the open street map "nominatim" read 'https://nominatim.openstreetmap.org/ui/search.html' info = geocode(str(row['Address']), provider='nominatim') # Adding Lat and lon columns to the dataframe addresses.loc[int(index), 'Lon'] = info['geometry'].loc[0].x addresses.loc[int(index), 'Lat'] = info['geometry'].loc[0].y except TypeError: print("\nGeocoding information for " + row['Address'] + " is not found!\n") time.sleep(1) # Saving the dataframe to a csv file addresses.to_csv('addresses_with_lon_and_lat.csv')
@author: katel """ import pandas as pd import geopandas as gpd from shapely.geometry import Point, Polygon, LineString, MultiLineString, MultiPoint from shapely.ops import nearest_points from geopandas.tools import geocode #Created a text file with the address for a rink about halfway between Jumbo and Itis fp_rink = r'C:\Users\katel\Documents\AutoGIS\2018\Geo-Python-2018\AutoGIS\activity_locations.txt' rink = pd.read_csv(fp_rink, sep=';') geocoded_rink = geocode(rink['addr'], provider='nominatim', user_agent='autogis_student_78') geocoded_rink = geocoded_rink.to_crs({'init': 'epsg:3879'}) #read in the shp of shopping center points created in the previous E fp_sc = r'C:\Users\katel\Documents\AutoGIS\2018\Geo-Python-2018\AutoGIS\shopping_centers.shp' shop_centers = gpd.read_file(fp_sc) shop_centers = shop_centers.to_crs({'init': 'epsg:3879'}) unary_union = shop_centers.unary_union nearest_geoms = nearest_points(geocoded_rink['geometry'][0], unary_union) near_idx0 = nearest_geoms[0]
if __name__ == '__main__': data = load_data() analyze_data(data) data = cleanData(data) data.to_csv('cleaned_house_data.csv') data_engineer(data) model = build_model(data) predict(model) pass import geopandas boros = geopandas.read_file("cleaned_house_data4.csv", encoding='utf8') boro_locations = geopandas.tools.geocode(boros.address) boro_locations.to_file("geography.shp") from geopandas.tools import geocode key = 'delted' geo = geocode(boros['address'], api_key=key) with open('cleaned_house_data3.csv') as f: print(f) import pandas data = pandas.read_csv("cleaned_house_data4.csv", encoding='utf8') import googlemaps gmaps_key = googlemaps.Client(key='Deleted4Security') data['LAT'] = None data['LON'] = None for i in range(0, len(data), 1): geocode_result = gmaps_key.geocode(data['address'][i])
# Script #4 # Locator # Retrieving the postcodes from our database df=pd.read_excel(r".\3.Analyses\AllCompanyInfosSortedByPostCodeWithoutDuplicate.xls") occurences=df["Postcode"].value_counts() print(occurences.head(5)) postcodes=df["Postcode"] postcodes = postcodes.drop_duplicates(keep = "last") #postcodes=postcodes.head(3) #For small scale tests (ulrta useful to not waste time/not requiring a lot of same data again and again to nominatim (OpenStreetMap locator) list_postcodes=postcodes.tolist() #At the end also put the next line in try except so that a problem can be solved #edit: not necessary because you have to stay in front when you run it (at the begining at least) geo = geocode("3000 belgium", provider='nominatim') #Prealloc before the loop sleep(2) #timer list_long_lat=list(range(0,len(list_postcodes))) #Preallocation of a list which will contain the coordinates of the different postcodes for i,j in zip(list_postcodes,list(range(0,len(list_postcodes)))): k=5 #k seconds to wait in case of error if i%25==0: sleep(2) sleep(uniform(1.5,3))#1.5 à 3 sec break to time between requests ; PAY attention that it is forbidden to be below 1 while True: #Will re-try indefinitely when encountering an error to process the request for the long&lat try: # do stuff geo = geocode(str(i) + " Belgium", provider='nominatim') # Precise be so that the search will stay in Belgium
def test_googlev3_forward(self): g = geocode(self.locations, provider='googlev3', timeout=2) self.assertIsInstance(g, gpd.GeoDataFrame)
"""Helper function to add a basemap for the plot""" xmin, xmax, ymin, ymax = ax.axis() basemap, extent = ctx.bounds2img(xmin, ymin, xmax, ymax, zoom=zoom, url=url) return basemap, extent # Filepath fp = "L3_data/addresses.txt" # Read the data data = pd.read_csv(fp, sep=';') data.columns # Geocode addresses from addr geo = geocode(data["addr"], provider="nominatim", user_agent ="csc_user_tml") # Merge geocoded locations back to original DataFrame geo = geo.join(data) # Reproject to Wen Mercator, background maps don't work without this geo = geo.to_crs(epsg=3857) # Plot the data with background map geo.plot() #Add basemap, zoom tells how detailed map we want add_basemap(ax=ax, zoom=12)
import pandas as pd import geopandas as gpd from shapely.geometry import Point from geopandas.tools import geocode fp = r"addresses.txt" data = pd.read_csv(fp, sep=';') #data.head() c = input("Adres Giriniz "); geo = geocode(c, provider='nominatim') print(geo.head())
def test_bad_provider_forward(): from geopy.exc import GeocoderNotFound with pytest.raises(GeocoderNotFound): geocode(["cambridge, ma"], "badprovider")
def test_bad_provider_forward(self): with self.assertRaises(ValueError): geocode(['cambridge, ma'], 'badprovider')
import pandas as pd from geopandas.tools import geocode import geopy data = pd.read_csv( "/Users/karanveersingh/PycharmProjects/Sentiment_analyser/Sentiment_analyser/Dataset/Day4.csv" ) for index, row in data.iterrows(): try: # print(row['Location']) info = geocode(str(row['Location']), provider='Nominatim') data.loc[int(index), 'Address'] = info['address'].loc[0] data.loc[int(index), 'Longitude'] = info['geometry'].loc[0].x data.loc[int(index), 'Latitude'] = info['geometry'].loc[0].y except BaseException: pass data = data.dropna() # print(data) data.to_csv( "/Users/karanveersingh/PycharmProjects/Sentiment_analyser/Sentiment_analyser/LocationInfo/loc4.csv" )
def test_nominatim(self): g = geocode(self.locations, provider='nominatim', timeout=2) self.assertIsInstance(g, gpd.GeoDataFrame)
url=url) ax.imshow(basemap, extent=extent, interpolation='bilinear') # restore original x/y limits ax.axis((xmin, xmax, ymin, ymax)) # Filepath for addresses fp = "L3_data/addresses.txt" # Read the data data = pd.read_csv(fp, sep=';') #data.head() #type(data) #Geocode addresses with Nominatim backend geo = geocode(data['addr'], provider='nominatim', user_agent='csc_user_fa') geo.head() #Merge geocoded locations back to the original dataframe geo = geo.join(data) #Reproject to webmercator geo = geo.to_crs(epsg=3857) #Plot points with background map ax = geo.plot() #Add basemap add_basemap(ax=ax, zoom=10)
#Import database city_for_locator_df = pd.read_csv(r".\Locations\city_listing_db.csv", index_col=0) #Merge multiple rows regarding the same cities in order to realize as few requests as possible to Nominatim city_for_locator_df.drop_duplicates(subset=['iso3', 'City/Town'], keep='first', inplace=True) #Creating a col for longitude and one for the latitude city_for_locator_df['long'] = np.nan city_for_locator_df['lat'] = np.nan #Preallocation before the loop geo = geocode("Belgium", provider='nominatim', user_agent="air-pollution-worldwide") long_and_lat = geo.to_crs('EPSG:4326') sleep(2) #timer #'Massive' Requests to Nominatim, one for each city for i in city_for_locator_df.index: print(i) try: geo = geocode(city_for_locator_df.at[i, 'Country'] + " " + city_for_locator_df.at[i, 'City/Town'], provider='nominatim', user_agent="air-pollution-worldwide") # geo.to_crs({'init': 'epsg:3395'}) #Mercator projection
TS """ import pandas as pd import geopandas as gpd from geopandas.tools import geocode from geopy.geocoders import Nominatim from shapely.geometry import Point import pycrs #Filepath to addresses fp = r"C:\Users\cscuser\Desktop\TS\ADDRESSES\addresses.txt" data = pd.read_csv(fp, sep=';', encoding='utf8') #Geocode the address geo = geocode(data['addr'], provider='nominatim') #------ #Add more control to geocoding #------ geolocator = Nominatim() #Create new columns for Point and geocoded addresses data = data.assign(geometry=None, address=None) #Iterate over the rows for idx, row in data.iterrows(): #Geocode the location location = geolocator.geocode(row['addr']) #If the location was found, then create a Point out of it
op_alueet2018.index[op_alueet2018["id"] == 114404] # 48 op_new = op_alueet2018.drop(48, axis = 0) op_new.to_file("/home/hertta/Documents/Gradu/oppilasalueet2018/schoolAreas2018.shp") for key, row in ruudut_joined.iterrows(): ruudut_joined.loc[key] = row import pandas as pd from geopandas.tools import geocode koulut_osoitteet = pd.read_csv("/home/hertsy/Documents/Gradu/oppilasalueet2018/koulut_osoitteet.csv", sep = ",", encoding = "UTF-8") koulut_geocoded = geocode(koulut_osoitteet['address'], provider = 'Nominatim', user_agent="hertsy") from geopy.geocoders import Nominatim geolocator = Nominatim(user_agent="hertsy") coordinates = [] for value in koulut_osoitteet['address']: location = geolocator.geocode(value) coordinates.append((location.latitude, location.longitude)) koulut_geocoded = gdp.read_file("/home/hertsy/Documents/Gradu/oppilasalueet2018/koulut_osoitteet_nom2.shp", encoding = "UTF-8") schoolareas = gdp.read_file("/home/hertsy/Documents/Gradu/oppilasalueet2018/schoolAreas2018.shp", encoding = "UTF-8") ruudut_final = gdp.read_file("/home/hertsy/Documents/Gradu/oppilasalueet2018/ruudut_kieli_final.shp", encoding = "UTF-8") koulut_geocoded = koulut_geocoded.to_crs(schoolareas.crs)
# Import the x,y and intensity from 4.Locate (a dataframe there in a csv file) locationsData = pd.read_csv( r".\4.Locate\LocationsAndIntensityWithAberations.csv") x = locationsData["x"].tolist() y = locationsData["y"].tolist() intensity = locationsData["intensity"].tolist() print("There are " + str(sum(intensity)) + " companies in total") postcodes = locationsData["Postcode"].tolist() ## PART 2: Cleaning and correcting mislocations # print(min(x)) #debug (purposes in case of mislocation) # print(min(y)) #Computing coordinates of what Nominatim (OSM) returns for "Belgium", it will help us if it did not read the postcode #edit: I did not require it because nominatim was totally accurate or completely not, but a good way if you decide to choose another locator is to think about a circle around the middle of Belgium and remove all data outside geo = geocode("Belgium", provider='nominatim') geo_mercator = geo.to_crs({'init': 'epsg:3395'}) x_belgium = geo_mercator.loc[0, "geometry"].x y_belgium = geo_mercator.loc[0, "geometry"].y #Remove aberrant locations count = 0 #Number of postcodes countCompanies = 0 #Number of companies affected badlyLocatedPostcode = [] #Will store the badly located postcode badlyLocatedIntensity = [ ] #Will store the number of companies at the badly located postcode iterator = 0 end = len(x) while iterator < end: #There are 2 types of error met : 1)Zipcode + Belgium = another place on Earth (usually the USA) ; 2)Zipcode + Belgium = Belgium for the interpreter, thus stacking points at the center of Belgium, we clean it and we'll re-add it later when sufficiently big
# Make a Table join to retrieve the id column from original shopping centers # DataFrame similarly as in lesson materials # # Save the GeoDataFrame as a Shapefile called shopping_centers.shp #============================================================================== import pandas as pd import geopandas as gpd from shapely.geometry import Point fpath = r"C:\Users\oyeda\Desktop\AUTOGIS\AUTOGIS_PERIOD2\assignment3\shopping_centres.txt" data = pd.read_csv(fpath, sep=";", encoding='latin-1') data.head(5) from geopandas.tools import geocode geoc = geocode(data['addre'], provider='nominatim') geoc from fiona.crs import from_epsg geoc = geoc.to_crs(from_epsg(3879)) #it can also be done as below: #geoc['geometry']= geoc['geometry'].to_crs(epsg=3879) geoc.crs geoc_join = geoc.join(data) type(geoc) fp = r"C:\Users\oyeda\Desktop\AUTOGIS\AUTOGIS_PERIOD2\assignment3\shopping_centres.shp" geoc_join.to_file(fp) #geoc_join.plot() #==============================================================================
def test_bad_provider_forward(self): from geopy.exc import GeocoderNotFound with pytest.raises(GeocoderNotFound): geocode(['cambridge, ma'], 'badprovider')