def _create_df(x, y=None, crs=None): y = y or x x = np.asarray(x) y = np.asarray(y) return GeoDataFrame( {'geometry': points_from_xy(x, y), 'value1': x + y, 'value2': x * y}, crs=crs)
def world_map(x, y, normalize, world, folds, title1, title2): drop_list = ['geometry', x] map_world = world.drop(world.columns.difference(drop_list), axis=1) world["x"] = world['geometry'].centroid.x world["y"] = world['geometry'].centroid.y gdf = gpd.GeoDataFrame(map_world, geometry=gpd.points_from_xy(world.x, world.y)) gdf['size'] = gdf[x] * normalize known = world.dropna(subset=[y]) unknown = world[world[y].isna()] ax = known.plot(column=y, cmap='inferno_r', figsize=(20, 12), scheme='fisher_jenks', k=folds, legend=True, edgecolor='#aaaaaa') unknown.plot(ax=ax, color='#ffffff', hatch='//', edgecolor='#aaaaaa') gdf.plot(ax=ax, color='red', markersize='size') ax.set_title(title1, fontdict={'fontsize': 20}, loc='left') description = title2.strip() ax.annotate(description, xy=(0.07, 0.1), size=12, xycoords='figure fraction') ax.set_axis_off() legend = ax.get_legend() legend.set_bbox_to_anchor((.11, .4)) legend.prop.set_size(12)
def load_hydro_data(hydro_var_name,dataset_name,fill_value=-9999,path='../datasets/hydrology',version=1,fill_nan=True): # load dataset X=netCDF4.Dataset("{}/{}_{}.nc".format(path,hydro_var_name,dataset_name)) # create time indexes if version==1: db=pd.DataFrame({'year':np.asarray(X.variables['time'][:][0,:]).astype(int), 'month':np.asarray(X.variables['time'][:][1,:]).astype(int), 'day':15}) time_X=pd.to_datetime(db) if version==2: year=[d[3:] for d in np.asarray(X.variables['time'])] month=[d[:2] for d in np.asarray(X.variables['time'])] db=pd.DataFrame({'year':np.asarray(year).astype(int), 'month':np.asarray(month).astype(int), 'day':15}) time_X=pd.to_datetime(db) # dataframe of all grid points if version==1: lat=np.asarray(X.variables['Lat'][:][0]) long=np.asarray(X.variables['Long'][:][0]) df=pd.DataFrame({'x':long,'y':lat}) if version==2: lat=np.asarray(X.variables['Lat']) long=np.asarray(X.variables['Long']) (lat_flat,long_flat)=np.meshgrid(lat,long) df=pd.DataFrame({'x':long_flat.flatten(),'y':lat_flat.flatten()}) spatial_grid=geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.x, df.y)) # dataframe of each variable at each month over all grid point hydro_var=np.asarray(X.variables['{}_mm'.format(hydro_var_name)]) X_grid=hydrological_variables_grid(hydro_var,time_X,hydro_var_name,spatial_grid, fill_value=fill_value,version=version,fill_nan=fill_nan) return spatial_grid, X_grid, time_X
def sample_points_for_hard_negative_mining(self): # hard negative mining: # get some random negatives from the image bounds to ensure that the model can learn on negative examples # e.g. land, clouds, etc with rio.open(self.imagefile) as src: left, bottom, right, top = src.bounds offset = HARD_NEGATIVE_MINING_SAMPLE_BORDER_OFFSET # m assert top - bottom > 2 * HARD_NEGATIVE_MINING_SAMPLE_BORDER_OFFSET, f"Hard Negative Mining offset 2x{HARD_NEGATIVE_MINING_SAMPLE_BORDER_OFFSET}m too large for the image height: {top - bottom}m" assert right - left > 2 * HARD_NEGATIVE_MINING_SAMPLE_BORDER_OFFSET, f"Hard Negative Mining offset 2x{HARD_NEGATIVE_MINING_SAMPLE_BORDER_OFFSET}m too large for the image width: {right - left}m" N_random_points = len(self.lines) # sample random x positions within bounds zx = np.random.rand(N_random_points) zx *= ((right - offset) - (left + offset)) zx += left + offset # sample random y positions within bounds zy = np.random.rand(N_random_points) zy *= ((top - offset) - (bottom + offset)) zy += bottom + offset return gpd.GeoDataFrame(geometry=gpd.points_from_xy(zx, zy))
def geo_data_frame(dataframe, lat_col, lon_col): """ Makes a geopandas.GeoDataFrame given pandas.DataFrame CRS is hardcoded to GPS (EPSG:4326) Parameters ---------- dataframe : pandas dataframe lat_col : a NAME of a column which holds the lattitude in given dataframe lon_col : a NAME of a column which holds the longtitude in given dataframe Returns ------- geopandas.GeoDataFrame """ _geoDataFrame = gpd.GeoDataFrame(dataframe, geometry=gpd.points_from_xy( dataframe[lon_col], dataframe[lat_col])) # Set the coordinate reference system (CRS) _geoDataFrame.crs = {'init': 'epsg:4326'} return _geoDataFrame
def plot_map(self): world = geopandas.read_file( geopandas.datasets.get_path('naturalearth_lowres')) gdf = geopandas.GeoDataFrame(self.coord, geometry=geopandas.points_from_xy( self.coord.Longitude, self.coord.Latitude)) name = difflib.get_close_matches(self.country, world.name.to_list())[0] ax = world[world.name == name].plot(color='white', edgecolor='black') # We can now plot our ``GeoDataFrame``. gdf.plot(ax=ax, color='red', alpha=0.5, markersize=10, figsize=[100, 50]) gdf.apply(lambda x: ax.annotate( s=x.location, xy=x.geometry.centroid.coords[0], ha='center'), axis=1) # plt.show() plt.savefig( os.path.join( os.path.dirname( self.projects[0]['static_data']['path_project']), 'Map_Country.png'))
def map_to_crs(map_: gpd.GeoDataFrame, target: pyproj.crs.CRS) -> gpd.GeoDataFrame: """Transforms map (geometry and height) to a target CRS. Parameters ---------- map_ : gpd.GeoDataFrame map to be transformed target : pyproj.crs.CRS target CRS in any pyproj parsable format. Returns ------- gpd.GeoDataFrame transformed map """ cm.check.check_type(map_, 'map', raise_errors=True) transformed = to_crs(map_, target) xy = map_.geometry.centroid old_heights = gpd.GeoSeries(gpd.points_from_xy( xy.x, xy.y, map_.height), crs=map_.crs) new_heights = to_crs(old_heights, target) transformed['height'] = [point.z for point in new_heights] return transformed
def polygon_filter(input_df, filter_gdf): """ Purpose: This removes records from the TROPOMI NO2 Pandas DataFrame that is not found within the filter polygons Parameters: input_df: Pandas DataFrame containing NO2 data coming from nc_to_df() filter_gdf: GeoPandas GeoDataFrame containing geometries to constrain NO2 records Returns: geodataframe: Filtered GeoPandas GeoDataFrame """ tic = time.perf_counter() output_gdf = pd.DataFrame() print('Processing input dataframe...') crs = filter_gdf.crs # 1. Convert input_df to gdf gdf1 = gpd.GeoDataFrame(geometry=gpd.points_from_xy( input_df.longitude, input_df.latitude), crs=crs) print('Original NO2 DataFrame length:', len(gdf1)) # 2. Find out intersection between African Countries GeoDataFrames (geometry) and # NO2 GeoDataFrames using Geopandas sjoin (as GeoDataFrame, gdf2) sjoin_gdf = gpd.sjoin(gdf1, filter_gdf, how='inner', op='intersects') # 3. Do a Pandas inner join of sjoin_gdf and df1 NO2 DataFrame (sjoin_gdf is a filter GDF) # using indexes. Inner join filters out non-intersecting records gdf2 = input_df.join(sjoin_gdf, how='inner') print('Filtered NO2 GeoDataFrame length:', len(gdf2)) toc = time.perf_counter() elapsed_time = toc - tic print("Processed NO2 DataFrame sjoin in " + str(elapsed_time / 60) + " minutes") output_gdf = gdf2 return output_gdf
def __init__(self, bounds: gpd.GeoDataFrame): """Initial stat and variables for the police agents. Takes the bounds input which determines where agents may be created. Agents may be spawned within the extent of bounds, but to determine whether they fall within a bounds polygon this must be checked with a geographic function gpd.within(). Args: bounds (gpd.GeoDataFrame): GeoDataFrame with the input polygon. """ # takes bounds from main.py self.bounds = bounds # find extent of bounds x_min, y_min, x_max, y_max = self.bounds.total_bounds while True: # random xy from extent of bounds (square) self.x = random.uniform(x_min, x_max) self.y = random.uniform(y_min, y_max) # convert to geodataframe df = pd.DataFrame({'x': [self.x], 'y': [self.y]}) geom = gpd.points_from_xy(df.x, df.y) gdf = gpd.GeoDataFrame(df, geometry=geom) # check whether point falls within polygon within = int(gdf.within(self.bounds)) # only keep point if within poly, otherwise repeat random coords if within == 1: self.x = gdf['x'] self.y = gdf['y'] self.geom = gdf['geometry'] break
def __init__(self, path_inventario): self.df = pd.read_csv(path_inventario, engine='python', sep='\t', delimiter=';', parse_dates=['UltimaAtualizacao']) self.df[['Latitude', 'Longitude' ]] = self.df[['Latitude', 'Longitude' ]].apply(lambda x: x.str.replace(',', '.')) self.df['Latitude'] = self.df['Latitude'].astype('float') self.df['Longitude'] = self.df['Longitude'].astype('float') self.gdf = gpd.GeoDataFrame(self.df, geometry=gpd.points_from_xy( self.df.Longitude, self.df.Latitude), crs='epsg:4674') self.m01 = ipyleaflet.Map(zoom=2, center=(-16, -47)) self.layer() self.controls_on_Map() self.control_buttonDownload.on_click(self.download_buttom) self.control_shapefileButtom.on_click(self.shapefile_buttom) display(self.m01)
def CreateMapWithPaths(sp, df): # stores lines/roads/edges of sp (shortest path) listLine = [] for i in range(len(sp) - 1): # l1 and l2 are row number of each point in data frame l1 = df[df['Point'] == sp[i]].index.values.astype(int) l2 = df[df['Point'] == sp[i + 1]].index.values.astype(int) # from row number we get geometry and form list of line point1 = df.iloc[l1[0]].Latitude, df.iloc[l1[0]].Longitude point2 = df.iloc[l2[0]].Latitude, df.iloc[l2[0]].Longitude listLine.append(LineString([point1, point2])) # convert line to dataframe then to geo df dfLine = pd.DataFrame({'geometry': listLine}) gdfLine = geopandas.GeoDataFrame(dfLine) # get orignal df and change to crs # this will be helpful to mark intersection on path map that only has line gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy( df.Longitude, df.Latitude)) gdf.crs = from_epsg(3857) gdfLine.crs = from_epsg(3857) ax = gdfLine.plot(figsize=(10, 10), alpha=1, edgecolor='red') for a in gdf.itertuples(): if sp.__contains__(a.Point): plt.text(a.geometry.x, a.geometry.y, a.Point) # ctx.add_basemap(ax, url=ctx.providers.Stamen.TonerLite, zoom=12) ax.set_axis_off() # plt.show() return plt
def read_grd(filename): with open(filename) as infile: ncols = int(infile.readline().split()[1]) nrows = int(infile.readline().split()[1]) xllcorner = float(infile.readline().split()[1]) yllcorner = float(infile.readline().split()[1]) cellsize = float(infile.readline().split()[1]) nodata_value = int(infile.readline().split()[1]) #version = float(infile.readline().split()[1]) longitude = xllcorner + cellsize * np.arange(ncols) latitude = yllcorner + cellsize * np.arange(nrows) value = np.loadtxt(filename, skiprows=6) #change to 7 if version is in file #value is a matrix where the bottom right corner is [xllcorner, yllcorner] x_input = np.array([[lon, lat] for lon in longitude for lat in latitude]) y_input = value y_input = np.flipud(y_input).flatten(order='F') arr = np.hstack([x_input, y_input[:, None]]) df = pd.DataFrame(arr, columns=['x', 'y', 'height']) gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y)) return gdf
def _read_berlin_bounds(self) -> gpd.GeoDataFrame: """ Reads berlin bounds from a poly file and set epsg to 4326. If the file does not exist, return an empty geodataframe. :rtype geopandas.GeoDataFrame """ if not isfile(self.berlin_bounds_file): return gpd.GeoDataFrame() df = pd.read_csv(self.berlin_bounds_file, delim_whitespace=True, header=None) df.columns = ['lat', 'lon'] # create a geodataframe gdf = gpd.GeoDataFrame( df, geometry = gpd.points_from_xy(df.lat, df.lon) ) # drop the lat, lon columns as they are in the geometry column gdf.drop(['lat', 'lon'], axis=1, inplace=True) # set the coordinate system. This has to be done this way due to geopandas==0.5.0 gdf.crs = {'init': 'epsg:4326'} return gdf
def add_public_transport_connection_quality_work(df_businesses): """ Add connection quality of public transport from coordinates :param df_businesses: Contains the businesses from the SynPop, incl. coordinates :return: df_businesses: Contains the businesses from the SynPop, including a column containing the public transport connection quality - A = very good, coded as 1 - B = good, coded as 2 - C = medium, coded as 3 - D = low, coded as 4 - 5 = marginal or no public transport connection """ # Read the shape file containing the connection quality connection_quality_folder_path = Path('../data/input/OeV_Gueteklassen/Fahrplanperiode_17_18/') df_connection_quality = geopandas.read_file(connection_quality_folder_path / 'OeV_Gueteklassen_ARE.shp') df_connection_quality.to_crs(epsg=2056, inplace=True) # Change the projection geodf_businesses = geopandas.GeoDataFrame(df_businesses, geometry=geopandas.points_from_xy(df_businesses.xcoord_work, df_businesses.ycoord_work), crs='epsg:2056') geodf_businesses = geopandas.sjoin(geodf_businesses, df_connection_quality[['KLASSE', 'geometry']], how='left', op='intersects') geodf_businesses['KLASSE'] = geodf_businesses['KLASSE'].map({'A': 1, 'B': 2, 'C': 3, 'D': 4}) geodf_businesses['KLASSE'].fillna(5, inplace=True) # Rename the column with the public transport connection quality geodf_businesses.rename(columns={'KLASSE': 'public_transport_connection_quality_ARE_work'}, inplace=True) # base = df_connection_quality.plot() # geodf_businesses[geodf_businesses['public_transport_connection_quality_ARE_work'] == 1].head(n=5000).plot(ax=base, # marker='o', # color='red', # markersize=5) # plt.show() geodf_businesses.drop(['index_right'], axis=1, inplace=True) return geodf_businesses
def ClipData(From, To, data): # create data frame with source and dest to clipp map ID = [1, 2] # TODO: should we create a TRY EXPECT or something? bad adresses? Source = get_geocords(From) Destination = get_geocords(To) Lat1, Long1 = Source Lat2, Long2 = Destination Lat = [Lat1, Lat2] Long = [Long1, Long2] df = pd.DataFrame() df['id_trip'] = ID df['Lat'] = Lat df['Long'] = Long # pass as a geodata frame, change crs to metters so we buffer and # clipp the original map df_geo = gp.GeoDataFrame(df, geometry=gp.points_from_xy(df.Long, df.Lat)) df_geo.crs = {'init': 'EPSG:4326'} df_geo = df_geo.to_crs("epsg:3043") trips = df_geo.copy() trips['geometry'] = LineString(df_geo.geometry) # WHY THIS? is not already in this CRS? trips = trips.to_crs("epsg:3043") # buffer a kilometer trips.geometry = trips.geometry.buffer(1000) # clipping streets_clipped = gp.sjoin(left_df=data, right_df=trips, how='inner') # change crs for folium and networkx streets_clipped2 = streets_clipped.to_crs('EPSG:4326') return streets_clipped2, Source, Destination
def knn_plot(df, k, figsize=(15, 15), column='class'): data = df[['lng', 'lat']].to_numpy() classes, idx_to_class = pd.factorize(df[column]) x = np.arange(-180, 180, 0.5) y = np.arange(-90, 90, 0.5) # Use the haversine metric for real distance clf = neighbors.KNeighborsClassifier(k, metric='haversine') clf.fit(data*np.pi/180, classes) # Fit on radians and not on degrees xx, yy = np.meshgrid(x, y) pred = clf.predict(np.c_[xx.ravel(), yy.ravel()] * np.pi/180).reshape(xx.shape) world = geopandas.read_file( geopandas.datasets.get_path('naturalearth_lowres')) world = world[['continent', 'geometry']] fig, ax = plt.subplots(figsize=figsize) gdf = geopandas.GeoDataFrame( df.copy(), geometry=geopandas.points_from_xy(df.lng, df.lat)) cmap = cm.get_cmap('tab20', len(idx_to_class)) ax.pcolormesh(xx, yy, pred, cmap=cmap) world.boundary.plot(color='k', ax=ax) for i, c in enumerate(idx_to_class): gdf[gdf[column] == c].geometry.plot( ax=ax, color=cmap.colors[i], label=c) ax.legend()
def dataset(self): """ Load data as a geopandas GeoDataFrame """ if self._dataset is None: self._dataset = self.load_dataframe(self) if not hasattr(self, 'geometry'): if self.geometry_col: self._dataset[self.geometry_col] = ( self._dataset[self.geometry_col].apply(shapely.wkt.loads)) self._dataset = gpd.GeoDataFrame(self._dataset, geometry=self.geometry_col) elif self.xy_cols: self._dataset = gpd.GeoDataFrame( self._dataset, geometry=gpd.points_from_xy( self._dataset[self.xy_cols[0]], self._dataset[self.xy_cols[1]])) if self.bbox: self._dataset = self._dataset.cx[bbox.min.x:bbox.max.y, bbox.min.x:bbox.max.y] return self._dataset
def crime_cluster(from_date, to_date, crime): data = pd.read_csv( 'https://opendata.arcgis.com/datasets/3eeb0a2cbae94b3e8549a8193717a9e1_0.csv?outSR=%7B%22latestWkid%22%3A2248%2C%22wkid%22%3A102685%7D', sep=',', header='infer') data['CrimeDateTime'] = pd.to_datetime(data['CrimeDateTime']) data = data[(data['CrimeDateTime'] > from_date) & (data['CrimeDateTime'] < to_date)] data = data[data['Latitude'] > 0] data = data[data['Description'] == crime] coords = np.array(data[['Longitude', 'Latitude']]) model = hdbscan.HDBSCAN(min_cluster_size=4, min_samples=5, cluster_selection_epsilon=0.001) fit = model.fit(coords) labels = fit.labels_.reshape(fit.labels_.shape[0], -1) arr = np.concatenate((coords, labels), axis=1) df = pd.DataFrame(arr, columns=['Latitude', 'Longitude', 'Cluster']) df.Cluster.astype(int) gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude)) clus_geojson = gdf.to_json() return clus_geojson
def spatial_join(df_input, shapefile): # Load shapefile shapefile = gpd.read_file(shapefile) shapefile.drop(['GID_0', 'GID_1', 'NL_NAME_1', 'GID_2', 'VARNAME_2', 'NL_NAME_2', 'TYPE_2', 'NAME_0', 'NAME_1', 'ENGTYPE_2', 'CC_2', 'HASC_2'], axis=1, inplace=True) # shapefile.crs = {'init': 'epsg:4326'} shapefile.crs = 'epsg:4326' # Load Excel file df = gpd.GeoDataFrame( df_input, geometry=gpd.points_from_xy(df_input['Longitude'], df_input['Latitude'])) # df.crs = {'init': 'epsg:4326'} df.crs = 'epsg:4326' # Spatial Join df = gpd.sjoin(df, shapefile, how='left', op='within') df.drop(['index_right', 'geometry'], axis=1, inplace=True) df.rename(columns={'NAME_2': 'City'}, inplace=True) df = df[['Date', 'Time', 'City', 'Location', 'High_Accuracy', 'Latitude', 'Longitude', 'Direction', 'Type', 'Lanes_Blocked', 'Involved', 'Tweet', 'Source']] return df
def pandas_handler(): try: metrobus_data = get_data(uri_data) metrobus_df = pd.DataFrame.from_records(metrobus_data) metrobus_flat_df = json_normalize(data=metrobus_df['fields']) metrobus_gdf = gpd.GeoDataFrame( metrobus_flat_df, geometry=gpd.points_from_xy(metrobus_flat_df.position_longitude, metrobus_flat_df.position_latitude)) geo_df = gpd.read_file(file_geo) merge_geo_data_df = gpd.sjoin(metrobus_gdf, geo_df, how="left", op='intersects') nomgeo_nomalize = merge_geo_data_df.nomgeo.str.normalize( 'NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8') merge_geo_data_df.nomgeo = nomgeo_nomalize result = merge_geo_data_df.to_json() parsed = json.loads(result) data_json = parsed["features"] cdmx_data = list(map(lambda x: x["properties"], data_json)) return cdmx_data except Exception as ex: logging.error(str(ex))
def make_geodf(df, lat_col_name='latitude', lon_col_name='longitude'): import geopandas as gpd """ Take a dataframe with latitude and longitude columns, and turn it into a geopandas df. Needed to plot the map. The function is more or less copy-pasted from https://www.martinalarcon.org/2018-12-31-d-geopandas/ """ try: df = df.copy() lat = df[lat_col_name] lon = df[lon_col_name] geodf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(lon, lat)) geodf = geodf.set_crs("EPSG:4326") geodf = geodf.to_crs("EPSG:3877") except: print( 'Jotain meni pieleen, kun data yritettiin muuntaa geodataframeen.') geodf = [] return geodf
def makeDataframe(data_file, epsg=3857): # pprint(WMS_LAYERS) # pprint([op.name for op in wms.operations]) # https://frictionlessdata.io/tooling/python/extracting-data/ # Handles multiline columns cleanly. data_filename = os.path.basename(data_file) import_rows = extract(data_file) import_dataframe = pandas.DataFrame(import_rows) #cities_3857 = geopandas.read_file(CITIES_SHP).to_crs(epsg=3857) point_geodataframe = geopandas.GeoDataFrame( import_dataframe[import_dataframe.Longitude.notnull()], geometry=geopandas.points_from_xy( import_dataframe[import_dataframe.Longitude.notnull()].Longitude, import_dataframe[import_dataframe.Longitude.notnull()].Latitude), crs="EPSG:4326") if DEBUG: pprint(point_geodataframe) point_geodataframe_3857 = point_geodataframe.to_crs(epsg=epsg) return point_geodataframe_3857
def set_geometry_points(self, scheduler=None): """Set geometry attribute of gpd.GeoSeries with Points from latitude and longitude attributes if geometry not present. Parameters: scheduler (str): used for dask map_partitions. “threads”, “synchronous” or “processes” """ def apply_point(df_exp): return df_exp.apply((lambda row: Point(row.longitude, row.latitude)), axis=1) if not self.geometry.size: LOGGER.info('Convert centroids to GeoSeries of Point shapes.') if not self.lat.size or not self.lon.size: self.set_meta_to_lat_lon() if not scheduler: self.geometry = gpd.GeoSeries( gpd.points_from_xy(self.lon, self.lat), crs=self.geometry.crs) else: import dask.dataframe as dd from multiprocessing import cpu_count ddata = dd.from_pandas(self, npartitions=cpu_count()) self.geometry = (ddata .map_partitions(apply_point, meta=Point) .compute(scheduler=scheduler))
def join_receiver_position( gnss_obs: pd.DataFrame, gnss_fix: pd.DataFrame) -> gpd.GeoDataFrame: """ Add receiver positions to Raw data. Joined by utc time in milliseconds. """ clean_fix = gnss_fix[["Longitude","Latitude","Altitude","(UTC)TimeInMs"]].dropna().set_index("(UTC)TimeInMs") df = gnss_obs.join(clean_fix, on="time_ms", how="inner", lsuffix="obs", rsuffix="fix") df.reset_index(drop=True, inplace = True) df if len(df) != len(gnss_obs): warnings.warn( f'{len(gnss_obs)-len(df)} observations discarded without matching fix.' ) gdf = gpd.GeoDataFrame( df, geometry=gpd.points_from_xy(df["Longitude"],df["Latitude"], df["Altitude"]), crs=cm.constants.epsg_gnss_logger) return gdf
def get_loc_gdf(tweet_df, column_name='locs'): ''' Given a Twitter DataFrame, this will generate a GeoPandas DataFrame, based on the Twitter DataFrame. Parameters ---------- tweet_df: Pandas DataFrame A DataFrame generated by libpypack.Locations.map_locations(). column_name: str Column name with the locations generated by libpypack.Locations.map_locations() Returns ------- : GeoPandas DataFrame A GeoPandas DataFrame which can be used by libpypack.visualization.heatmap) ''' gdf = create_new_df(tweet_df, column_name=column_name) loc_gdf = geopandas.GeoDataFrame(gdf, geometry=geopandas.points_from_xy( gdf.Longitude, gdf.Latitude)) return loc_gdf
df = pd.read_csv('https://raw.githubusercontent.com/Tibxch/steamlit/master/20190103.csv') elif date == 4: df = pd.read_csv('https://raw.githubusercontent.com/Tibxch/steamlit/master/20190104.csv') elif date == 5: df = pd.read_csv('https://raw.githubusercontent.com/Tibxch/steamlit/master/20190105.csv') ###Raw Data Visualization### if st.checkbox("Show raw data", False): st.subheader('Raw Data') st.write(df) #### GEOMETRY ### crs = "EPSG:4326" geometry = gp.points_from_xy(df.lonstartl, df.latstartl) geo_df = gp.GeoDataFrame(df,crs=crs,geometry=geometry) ### 3 Hours #### hours_3 = st.slider("Hour of interest (Every 3 hours)",0,23,step=3) data = geo_df data["timestart"] = pd.to_datetime(data["timestart"]) ### MAP ### st.subheader("Map show data Picked up at %i:00" % (hours_3)) st.markdown(""" This map will show you only data of Picked up.""") long = 100.5018 #longitude of BKK lat = 13.7563 #latitude of BKK
print("*" * 100) # 2 Read, analyze and summarize Schedule data ######################################################################################################################## print( "Run Section 2: Read, analyze and summarize rawnav, WMATA schedule data..." ) begin_time = datetime.now() # Read the Wmata_Schedule data wmata_schedule_dat = wr.read_sched_db_patterns(path=os.path.join( path_source_data, "wmata_schedule_data", "Schedule_082719-201718.mdb"), analysis_routes=analysis_routes) wmata_schedule_gdf = (gpd.GeoDataFrame(wmata_schedule_dat, geometry=gpd.points_from_xy( wmata_schedule_dat.stop_lon, wmata_schedule_dat.stop_lat), crs='EPSG:4326').to_crs(epsg=wmata_crs)) # Make Output Directory path_stop_summary = os.path.join(path_processed_data, "stop_summary.parquet") if not os.path.isdir(path_stop_summary): os.mkdir(path_stop_summary) path_stop_index = os.path.join(path_processed_data, "stop_index.parquet") if not os.path.isdir(path_stop_index): os.mkdir(path_stop_index) for analysis_route in analysis_routes: print("*" * 100) print('Processing analysis route {}'.format(analysis_route))
import matplotlib.pyplot as plt #change this only os.chdir('/Users/Sarah/Documents/GitHub/US-schoolday-temperatures') date = 'Winter 2020-21' title = 'Average Daily Temp (with Wind Chill)' #df = pd.read_csv('{} temperature.csv'.format(date)) data_folder = 'Data' filename = '{} temperature.csv'.format(date) data_file = os.path.join(data_folder, filename) df = pd.read_csv(data_file) gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy( df['lon'], df['lat']), crs='epsg:4269') #crs for North America #from https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.html #can also get school districts here! state_file = os.path.join(data_folder, 'cb_2019_us_state_20m/cb_2019_us_state_20m.shp') state = geopandas.read_file(state_file) #https://stackoverflow.com/questions/19960077/how-to-filter-pandas-dataframe-using-in-and-not-in-like-in-sql state_abres = ['AK', 'PR', 'HI' ] # contenental US only, remove Alaska, Puerto Rico and Hawaii contenental_states = state[~state['STUSPS'].isin(state_abres)] #state_test.head() fig, ax = plt.subplots() #figsize=(25,8))
PAIPR_dir = ROOT_DIR.joinpath('data/gamma_20111109') data_0 = import_PAIPR(PAIPR_dir) # Format accumulation data accum_long = format_PAIPR(data_0, start_yr=1979, end_yr=2009).drop('elev', axis=1) traces = accum_long.groupby('trace_ID') # New accum and std dfs in wide format accum = accum_long.pivot(index='Year', columns='trace_ID', values='accum') accum_std = accum_long.pivot(index='Year', columns='trace_ID', values='std') # Create df for mean annual accumulation accum_trace = traces.aggregate(np.mean).drop('Year', axis=1) accum_trace = gpd.GeoDataFrame(accum_trace, geometry=gpd.points_from_xy( accum_trace.Lon, accum_trace.Lat), crs="EPSG:4326").drop(['Lat', 'Lon'], axis=1) # Import Antarctic outline shapefile ant_path = ROOT_DIR.joinpath( 'data/Ant_basemap/Coastline_medium_res_polygon.shp') ant_outline = gpd.read_file(ant_path) # Convert accum crs to same as Antarctic outline accum_trace = accum_trace.to_crs(ant_outline.crs) ##### Estimate time series regressions # Preallocate arrays for linear regression lm_data = accum.transpose() std_data = accum_std.transpose()
680: "Þórshöfn", 681: "Þórshöfn", } ADMINISTRATIVE_DIVISIONS: Dict[str, List[str]] = { "Seltjarnarnesbær": ["Seltjarnarnes"], "Ísafjarðarbær": ["Ísafjörður"], } data_path = pkg_resources.resource_filename("stadfangaskra.data", "df.parquet.gzip") _df = pd.read_parquet(data_path) df = _df = geopandas.GeoDataFrame( _df, geometry=geopandas.points_from_xy(_df.lon, _df.lat), crs=4326 ) df = df.drop(["lat", "lon"], axis=1) for c in ["municipality_code"]: df[c] = pd.Categorical(df[c].astype(pd.Int32Dtype())) regions = pd.read_parquet( pkg_resources.resource_filename("stadfangaskra.data", "regions.parquet") ) REGION_MAP = { k: list(v) for (k, v) in regions.groupby("region")["municipality"].unique().to_dict().items() }
def generate_value_added_sales_by_year(start_year=2006, end_year=2018): """ Generate the sales files by year with value-added columns. Notes ----- This takes the file of unique sales and adds several useful columns, including indexed housing prices and geocoded fields (zip codes, neighborhoods, and police districts). """ # get the main sales file matches = glob(os.path.join(data_dir, "OPA", "sales_file_*.csv")) if not len(matches): sales_data = generate_sales_file() else: sales_data = pd.read_csv(matches[0]) # format the data sales_data = (sales_data.assign( sale_date=lambda df: pd.to_datetime(df["sale_date"]), sale_year=lambda df: df.sale_date.dt.year, sale_price_psf=lambda df: df.sale_price / df.total_livable_area, test=lambda df: ~np.isinf(df.sale_price_psf) & df.sale_price_psf. notnull(), housing_index=lambda df: PhillyMSAHousingIndex.interpolate(df[ "sale_date"]), ).assign( housing_index=lambda df: df.housing_index / df.housing_index.max(), sale_price_indexed=lambda df: df.sale_price / df.housing_index, ).query("test == True").drop(labels=["test"], axis=1)) # make sure the output directory exists dirname = os.path.join(data_dir, "OPA", "ValueAdded") if not os.path.exists(dirname): os.makedirs(dirname) # geocode! zip_codes = ZIPCodes.get() neighborhoods = Neighborhoods.get() police_districts = PoliceDistricts.get() # save each year for year in range(start_year, end_year + 1): print(f"Processing sale year {year}...") # get this year's data df = sales_data.query("sale_year == @year") # convert to geopandas gdf = (gpd.GeoDataFrame( df, geometry=gpd.points_from_xy(df["lng"].astype(float), df["lat"].astype(float)), crs={ "init": "epsg:4326" }, ).to_crs(epsg=EPSG).drop(labels=["lat", "lng"], axis=1)) if "zip_code" in gdf.columns: gdf = gdf.drop(labels=["zip_code"], axis=1) # geocode gdf = (gdf.pipe(geocode, zip_codes).pipe(geocode, neighborhoods).pipe( geocode, police_districts)) path = os.path.join(dirname, f"{year}.csv") gdf.to_csv(path, index=False)
df = pd.DataFrame( {'City': ['Buenos Aires', 'Brasilia', 'Santiago', 'Bogota', 'Caracas'], 'Country': ['Argentina', 'Brazil', 'Chile', 'Colombia', 'Venezuela'], 'Latitude': [-34.58, -15.78, -33.45, 4.60, 10.48], 'Longitude': [-58.66, -47.91, -70.66, -74.08, -66.86]}) ############################################################################### # A ``GeoDataFrame`` needs a ``shapely`` object. We use geopandas # ``points_from_xy()`` to transform **Longitude** and **Latitude** into a list # of ``shapely.Point`` objects and set it as a ``geometry`` while creating the # ``GeoDataFrame``. (note that ``points_from_xy()`` is an enhanced wrapper for # ``[Point(x, y) for x, y in zip(df.Longitude, df.Latitude)]``) gdf = geopandas.GeoDataFrame( df, geometry=geopandas.points_from_xy(df.Longitude, df.Latitude)) ############################################################################### # ``gdf`` looks like this : print(gdf.head()) ############################################################################### # Finally, we plot the coordinates over a country-level map. world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres')) # We restrict to South America. ax = world[world.continent == 'South America'].plot( color='white', edgecolor='black')