def test_groupby_metadata(crs): # https://github.com/geopandas/geopandas/issues/2294 df = GeoDataFrame( { "geometry": [Point(0, 0), Point(1, 1), Point(0, 0)], "value1": np.arange(3, dtype="int64"), "value2": np.array([1, 2, 1], dtype="int64"), }, crs=crs, ) # dummy test asserting we can access the crs def func(group): assert isinstance(group, GeoDataFrame) assert group.crs == crs df.groupby("value2").apply(func) # actual test with functionality res = df.groupby("value2").apply( lambda x: geopandas.sjoin(x, x[["geometry", "value1"]], how="inner")) expected = (df.take([0, 2, 0, 2, 1]).set_index( "value2", drop=False, append=True).swaplevel().rename(columns={ "value1": "value1_left" }).assign(value1_right=[0, 0, 2, 2, 1])) assert_geodataframe_equal(res.drop(columns=["index_right"]), expected)
def _get_simulated_pop_by_page( pop_by_plot: gpd.GeoDataFrame, plots_by_page: Sequence, page_col: str, plot_number_col: str, district_number_col: str, ): page_nums = [[i] * n for i, n in enumerate(plots_by_page, start=0)] pop_by_plot[page_col] = list(chain.from_iterable(page_nums)) pop_by_page = pop_by_plot.groupby(by=page_col).sum() representative_plots_and_districts = pop_by_plot.groupby( by=page_col).apply( _get_representative_plot, plot_number_col=plot_number_col, how='mid', district_number_col=district_number_col, ) assert len(representative_plots_and_districts.index) == len( pop_by_page.index) pop_by_page[plot_number_col] = representative_plots_and_districts[ plot_number_col] pop_by_page[district_number_col] = representative_plots_and_districts[ district_number_col] return pop_by_page
def PlnResultsIntegrates(refname, px, py, epsg, wannaview=False, tfac=1): ''' Integrates PLN results in a unique file and exports shapefile refname(string) Scenario sufix name to process ''' # get list of files fnames = glob(refname + '*.PLN*') N = 100 / float(len(fnames)) # loop in files name for fname in fnames: # reads pln file # if final gdf exists ignore results if 'Fgdf' in locals(): try: # converts file to dataframe gdf = ReadPLN(fname, px, py, epsg, tfac=tfac) gdf['prob001'] = (gdf['thickness'].values > 0.01) * 1 gdf['prob01'] = (gdf['thickness'].values > 0.1) * 1 gdf['prob1'] = (gdf['thickness'].values > 1) * 1 gdf['prob10'] = (gdf['thickness'].values > 10) * 1 # concatenates Fgdf = GeoDataFrame(pd.concat([Fgdf, gdf])) # gets cocorrence prob = Fgdf.groupby(['x', 'y'], as_index=False).sum() prob = prob.sort_values(['x', 'y']) # gets maximum thickness thickness = Fgdf.groupby(['x', 'y'], as_index=False).max() thickness = thickness.sort_values(['x', 'y']) Fgdf = Fgdf.drop_duplicates(['x', 'y']) Fgdf = Fgdf.sort_values(['x', 'y']) Fgdf['thickness'] = thickness['thickness'].values Fgdf['prob001'] = prob['prob001'].values Fgdf['prob01'] = prob['prob01'].values Fgdf['prob1'] = prob['prob1'].values Fgdf['prob10'] = prob['prob10'].values except: print('error in scenario {}'.format(fname)) pass else: try: # creates final dataframe Fgdf = ReadPLN(fname, px, py, epsg, wannaview=wannaview) Fgdf['prob001'] = (Fgdf['thickness'].values > 0.01) * 1 Fgdf['prob01'] = (Fgdf['thickness'].values > 0.1) * 1 Fgdf['prob1'] = (Fgdf['thickness'].values > 1) * 1 Fgdf['prob10'] = (Fgdf['thickness'].values > 10) * 1 except: print('error in scenario {}'.format(fname)) pass Fgdf['prob001'] = Fgdf['prob001'].values * N Fgdf['prob01'] = Fgdf['prob01'].values * N Fgdf['prob1'] = Fgdf['prob1'].values * N Fgdf['prob10'] = Fgdf['prob10'].values * N return Fgdf
def property_school_rating(zill_df: gpd.GeoDataFrame, full_schools: gpd.GeoDataFrame) -> gpd.GeoDataFrame: #init school column for main frame zill_df['edu_rating'] = np.nan houses, schols = zill_df.groupby('DISTRICT'), full_schools.groupby( 'DISTRICT') # property_districts = zill_df['DISTRICT'].unique() # schol_distr = [schols.get_group(x) for x in schols.groups] house_districts = [houses.get_group(x) for x in houses.groups] #grab dataframe separated by district for house_district_group in house_districts: #initialize education quality column # house_district_group['edu_rating'] = np.nan district = house_district_group['DISTRICT'].unique()[0] schools = schols.get_group(district) k = len(schools) if len(schools) < 5 else 5 #get neighboring schools in the same district zp_id_dist = geo_knearest(house_district_group, schools, impute=False, k=k) for house, neighboring_schools in zp_id_dist.items(): #replace gsIDs with respective ratings school_ids = [x[0] for x in neighboring_schools] ratings = [] for gsId in school_ids: school = schools.loc[schools['gsId'] == gsId]['gsRating'] rating = school.iloc[0] ratings.append(float(rating)) distances = np.array([x[1] for x in neighboring_schools]) #to handle schools whose gps coordinates located in the same spot, scale all by 1 to avoid divide by zero distances += 1 #normalize distance weights weights = 1 / distances weights /= weights.sum(axis=0) ratings *= weights.T weighted_rating = ratings.sum(axis=0) #append the aggregate rating to associated housing property zill_df.at[zill_df['zpid'] == house, 'edu_rating'] = weighted_rating zill_df.at[zill_df['zpid'] == house, 'school_count'] = k # zill_df['edu_rating'].loc[zill_df['zpid']==house] = weighted_rating return zill_df
def dissolve( gdf: gpd.GeoDataFrame, by: Iterable[str], func: Union[Callable, str, list, dict], how: Union[Literal["union", "first"], Callable[[gpd.GeoSeries], BaseGeometry]] = "union", ) -> gpd.GeoDataFrame: """ Dissolve layer by aggregating features based on common attributes. Args: gdf: GeoDataFrame with non-empty (Multi)Polygon geometries. by: Names of columns to group features by. func: Aggregation function for data columns (see :meth:`pd.DataFrame.groupby`). how: Aggregation function for geometry column. Either 'union' (:meth:`gpd.GeoSeries.unary_union`), 'first' (first geometry in group), or a function aggregating multiple geometries into one. Returns: GeoDataFrame with dissolved geometry and data columns, and grouping columns set as the index. """ check_gdf(gdf) merges = {"union": lambda x: x.unary_union, "first": lambda x: x.iloc[0]} data = gdf.drop(columns=gdf.geometry.name).groupby(by=by).aggregate(func) geometry = gdf.groupby(by=by, group_keys=False)[gdf.geometry.name].aggregate( merges.get(how, how)) return gpd.GeoDataFrame(geometry, geometry=gdf.geometry.name, crs=gdf.crs).join(data)
def get_geom_bboxes(geom: gp.GeoDataFrame, pad: int, idx_def: str, attr: str = None, no_split=False) -> list: if no_split: bbox = __get_geom_bbox(geom, pad) ret = [{'geom': geom, 'bbox': bbox, 'idx': idx_def}] else: ret = [] if attr is None: num = len(geom) for ii in range(num): g = geom.iloc[ii:ii + 1] ret.append({ 'geom': g, 'bbox': __get_geom_bbox(g, pad), 'idx': f'{ii:06d}' }) else: for gi, g in geom.groupby(by=attr): ret.append({ 'geom': g, 'bbox': __get_geom_bbox(g, pad), 'idx': gi }) return ret
def lijn(dfdict): lijnen_dfdict = dict() meeuwen = list(dfdict.keys()) for meeuw in meeuwen: dfdict[meeuw] = dfdict[meeuw].set_index( pd.DatetimeIndex(dfdict[meeuw].date_time)) geometry = [ Point(xy) for xy in zip(dfdict[meeuw].longitude, dfdict[meeuw].latitude) ] punten_df = GeoDataFrame(dfdict[meeuw], geometry=geometry) crs = {'init': 'epsg:4326'} grouped = punten_df.groupby( [punten_df.index.year, punten_df.index.month, punten_df.index.day]).filter(lambda x: len(x) > 1) grouped = grouped.groupby([ grouped.index.year, grouped.index.month, grouped.index.day ])['geometry'].apply(lambda x: LineString(x.tolist())) grouped.index.rename(['jaar', 'maand', 'dag'], inplace=True) lijnen_df = GeoDataFrame(grouped, crs=crs, geometry='geometry') lijnen_df.reset_index(inplace=True) lijnen_df = lijnen_df.to_crs({'init': 'epsg:31370'}) lijnen_df.to_file( r'C:\Users\maart\OneDrive\Master\Projectwerk Geo-ICT\Trajecten\trajecten_{}' .format(meeuw), 'ESRI Shapefile') lijnen_dfdict[meeuw] = lijnen_df return lijnen_dfdict
def _add_tocomid(flw: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """Find the downstream comid(s) of each comid in NHDPlus flowline database. Ported from `nhdplusTools <https://github.com/USGS-R/nhdplusTools>`__ Parameters ---------- flw : geopandas.GeoDataFrame NHDPlus flowlines with at least the following columns: ``comid``, ``terminalpa``, ``fromnode``, ``tonode`` Returns ------- geopandas.GeoDataFrame The input dataframe With an additional column named ``tocomid``. """ req_cols = ["comid", "terminalpa", "fromnode", "tonode"] _check_requirements(req_cols, flw) flw[req_cols] = flw[req_cols].astype("Int64") def tocomid(group): def toid(row): try: return group[group.fromnode == row.tonode].comid.to_numpy()[0] except IndexError: return pd.NA return group.apply(toid, axis=1) flw["tocomid"] = pd.concat( [tocomid(g) for _, g in flw.groupby("terminalpa")]) return flw
def trajectories_from_qgis_point_layer(layer, time_field_name, trajectory_id_field, time_format): names = [field.name() for field in layer.fields()] data = [] for feature in layer.getFeatures(): my_dict = {} for i, a in enumerate(feature.attributes()): if names[i] == time_field_name: if type(a) == QtCore.QDateTime: my_dict[names[i]] = a.toPyDateTime() else: my_dict[names[i]] = datetime.strptime(a, time_format) else: my_dict[names[i]] = a x = feature.geometry().asPoint().x() y = feature.geometry().asPoint().y() my_dict['geometry'] = Point((x, y)) data.append(my_dict) df = pd.DataFrame(data).set_index(time_field_name) crs = CRS(int(layer.sourceCrs().geographicCrsAuthId().split(':')[1])) geo_df = GeoDataFrame(df, crs=crs) df_by_id = dict(tuple(geo_df.groupby(trajectory_id_field))) trajectories = [] for key, value in df_by_id.items(): traj = Trajectory(key, value) trajectories.append(traj) return trajectories
def _plot_vector_feature(self, dataframe: GeoDataFrame, timestamp_column: Optional[str] = None, title: Optional[str] = None) -> np.ndarray: """Plots a GeoDataFrame vector feature""" rows = len( dataframe[timestamp_column].unique()) if timestamp_column else 1 axes = self._provide_axes(nrows=rows, ncols=1, title=title) if self.eopatch.bbox: self._plot_bbox(axes=axes, target_crs=dataframe.crs) if timestamp_column is None: dataframe.plot(ax=axes.flatten()[0]) return axes timestamp_groups = dataframe.groupby(timestamp_column) timestamps = sorted(timestamp_groups.groups) label_kwargs = self._get_label_kwargs() for timestamp, axis in zip(timestamps, axes.flatten()): timestamp_groups.get_group(timestamp).plot(ax=axis) axis.set_ylabel(timestamp.isoformat(), **label_kwargs) return axes
def extract_gdf_roads(self, panos: gpd.GeoDataFrame): def _panos_to_line(df): res = { 'RID': df.iloc[0].RID, 'src': df.iloc[0].name, 'dst': df.iloc[-1].name, 'Panos': df[["Order", 'DIR', 'MoveDir', 'Type', 'X', "Y"]].reset_index().to_dict('records') } if df.shape[0] == 1: res['geometry'] = LineString([ df.iloc[0].geometry.coords[0], df.iloc[0].geometry.coords[0] ]) else: res['geometry'] = LineString( [item.geometry.coords[0] for index, item in df.iterrows()]) return gpd.GeoDataFrame([res]) panos.sort_values(["RID", "Order"], inplace=True) return panos.groupby("RID").apply(_panos_to_line).set_index("RID")
def _write_measurement_figures( gdf: geopandas.GeoDataFrame, outdir: Path, measurement_template: str ) -> Dict[str, List]: """ Write the measurement sub-documents containing figures for each of the product groups. """ # currently the groups are nbar, nbart, oa product_groups = set(meas.split("_")[0] for meas in gdf.measurement.unique()) figure_fnames: Dict[str, List] = {p_group: [] for p_group in product_groups} for name, grp in gdf.groupby("measurement"): product_group = name.split("_")[0] # replace items like nbar_blue with NBAR BLUE for figure captions figure_caption = name.upper().replace("_", " ") out_string = measurement_template.format( measurement_name=name, figure_caption=figure_caption, stem=name, main_doc=LatexSectionFnames.MAIN.value, ) # need relative names to insert into the main tex doc of each product basename = f"{name}.tex" relative_fname = Path(DirectoryNames.REPORT_FIGURES.value, basename) figure_fnames[product_group].append(relative_fname) out_fname = outdir.joinpath(relative_fname) write_latex_document(out_string, out_fname) return figure_fnames
def summarize_bldg_counts(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: summary = gdf.groupby('gadm')['bldg_count'].describe() summary['total_bldgs'] = summary['count']*summary['mean'] summary.rename(columns={c:"bldg_"+c for c in summary.columns}, inplace=True) summary.rename(columns={'bldg_count':'block_count'}, inplace=True) return summary
def remove_erroneous_pv_polygons( self, raw_PV_installations_on_rooftop: gpd.GeoDataFrame = None ) -> gpd.GeoDataFrame: """ Removes PV polygons whose aggregated intersected area is larger than their original raw area Parameters ---------- raw_PV_installations_on_rooftop: GeoPandas.GeoDataFrame GeoDataFrame which must contain the columns "area_inter", "raw_area", and "identifier" Returns ------- GeoPandas.GeoDataFrame Input GeoDataFrame where erroneous PV polygons have been removed """ # Compute share of raw area that the intersected pv polygon covers raw_PV_installations_on_rooftop["percentage_intersect"] = ( raw_PV_installations_on_rooftop["area_inter"] / raw_PV_installations_on_rooftop["raw_area"]) # Group intersection by polygon identifier and sum percentage group_intersection_id = raw_PV_installations_on_rooftop.groupby( "identifier").agg({ "area_inter": "sum", "Street": "first", "Street_Address": "first", "raw_area": "first", "City": "first", "PostalCode": "first", "percentage_intersect": "sum", }) # Find erroneous polygons whose area after intersection is larger than their original (raw) area polygone = group_intersection_id[ group_intersection_id["percentage_intersect"] > 1.1].index.tolist( ) # Filter out erroneous polygons identified above and all their respective sub-parts raw_PV_installations_on_rooftop = raw_PV_installations_on_rooftop.drop( raw_PV_installations_on_rooftop.index[ (raw_PV_installations_on_rooftop["identifier"].isin(polygone)) & (raw_PV_installations_on_rooftop["percentage_intersect"] < 1)]) # Drop duplicate identifiers for erroneous polygons raw_PV_installations_on_rooftop = raw_PV_installations_on_rooftop.drop( raw_PV_installations_on_rooftop.index[ (raw_PV_installations_on_rooftop["identifier"].isin(polygone)) & (raw_PV_installations_on_rooftop["identifier"].duplicated())]) return raw_PV_installations_on_rooftop
def prepare_parcels(bldgs: gpd.GeoDataFrame, blocks: gpd.GeoDataFrame, parcels: gpd.GeoDataFrame) -> pd.DataFrame: ''' For a single GADM, this script (1) creates the PlanarGraph associated with each respective parcel and (2) maps all buildings to their corresponding parcel. The buildings are converted to centroids and then to Node types so they can just be added to the PlanarGraph ''' # Convert buildings to centroids bldgs['centroids'] = bldgs['geometry'].centroid bldgs.set_geometry('centroids', inplace=True) # We want to map each building to a given block to then map the buildings to a parcel bldgs = gpd.sjoin(bldgs, blocks, how='left', op='within') bldgs.drop(columns=['index_right'], inplace=True) # Now, join the parcels with the buildings parcels = parcels.merge(bldgs[['block_id', 'centroids']], how='left', on='block_id') parcels.rename(columns={ 'geometry': 'parcel_geometry', 'centroids': 'buildings' }, inplace=True) # Now collapse on the block and clean parcels = parcels.groupby('block_id').agg(list) parcels['parcel_geometry'] = parcels['parcel_geometry'].apply( lambda x: x[0]) parcels['buildings'] = parcels['buildings'].apply(lambda x: [] if x == [np.nan] else x) # Checks assert blocks.shape[0] == parcels.shape[ 0] # We should maintain block count parcels['buildings_count'] = parcels['buildings'].apply(lambda x: len(x)) #assert parcels['buildings_count'].sum() == bldgs.shape[0] # We should maintain bldgs count parcels.reset_index(inplace=True) # Now, create the graph for each parcel parcels['planar_graph'] = parcels['parcel_geometry'].apply( PlanarGraph.multilinestring_to_planar_graph) # And convert the buildings from shapely.Points -> topology.Nodes parcels['buildings'] = parcels['buildings'].apply( lambda x: [point_to_node(p) for p in x]) return parcels
def load_crime_stats(population_group=None, crime_list=None, provence=None): # lower provers if provence is not None: provence = provence.lower() # get data set dir data_path = get_work_path() # load an clean police police_stats = clean_police_stats( data_path.joinpath('Police_Statistics___2005_-_2017.csv')) if crime_list is not None: police_stats = police_stats[police_stats['Crime'].isin(crime_list)] if provence is not None: police_stats = police_stats.query(f"Province == '{provence}'") # population shape file pop_stats = clean_popluation_stats( data_path.joinpath( 'population/geo_export_3ec3ac74-ddff-4220-8007-b9b5643f79af.shp')) base_group = ['sal_code_i', 'pr_name', 'sp_name', 'geometry'] if population_group is not None: # filter out columns pop_stats = pop_stats[pop_groups[population_group] + base_group] if provence is not None: pop_stats = pop_stats.query(f"pr_name == '{provence}'") # shape id to weights precinct = clean_area_2_precint( data_path.joinpath('Precinct_to_small_area_weights.csv')) # munge data df = merge(precinct, pop_stats, left_on='small_area', right_on='sal_code_i') df = merge(df, police_stats, left_on='precinct', right_on='Police Station') # calclate crime per shape file as proportion of precint weight df['total_crime'] = df.weight * df.Incidents # keep as geo-dataframe df = GeoDataFrame(df, crs=pop_stats.crs) # clean data frame df = df.drop([ 'sal_code_i', 'pr_name', 'sp_name', 'Police Station', 'Incidents', 'weight' ], axis=1) # agg precinct back into shapes temp_df = df.groupby(['small_area', 'Year', 'Crime'])[['total_crime']].sum().round() df = df.drop_duplicates(subset=['small_area', 'Year', 'Crime']).drop( ['total_crime'], axis=1) df = merge(df, temp_df, on=['small_area', 'Year', 'Crime']) return df
def add_ruptures_to_bins(rupture_gdf: gpd.GeoDataFrame, bin_gdf: gpd.GeoDataFrame) -> None: """ Takes a GeoPandas GeoDataFrame of ruptures and adds them to the ruptures list that is an attribute of each :class:`~openquake.hme.utils.bins.SpacemagBin` based on location and magnitude. This function modifies both GeoDataFrames in memory and does not return any value. :param rupture_gdf: GeoDataFrame of ruptures; this should have two columns, one of them being the `rupture` column with the :class:`Rupture` object, and the other being the `geometry` column, with a GeoPandas/Shapely geometry class. :param bin_gdf: GeoDataFrame of the bins. This should have a `geometry` column with a GeoPandas/Shapely geometry and a `SpacemagBin` column that has a :class:`~openquake.hme.utils.bins.SpacemagBin` object. :Returns: `None`. """ logger.info(" adding ruptures to bins") bin_edges = bin_gdf.iloc[0].SpacemagBin.get_bin_edges() bin_centers = bin_gdf.iloc[0].SpacemagBin.mag_bin_centers logging.info("\tgetting mag bin vals") rupture_gdf["mag_r"] = pd.cut( list(map(lambda r: r.mag, rupture_gdf["rupture"])), bin_edges, labels=bin_centers, ) rup_groups = rupture_gdf.groupby(["bin_id"]) pbar = tqdm(total=len(rupture_gdf)) for (bin_id, rup_group) in rup_groups: sbin = bin_gdf.loc[bin_id, "SpacemagBin"] mag_groups = rup_group.groupby("mag_r") for mag_bin, mag_group in mag_groups: sbin.mag_bins[mag_bin].ruptures.extend(mag_group["rupture"].values) pbar.update(len(rup_group)) pbar.close() logging.info("\tdone adding ruptures to bins") return
def plot_cluster(gdf: geopandas.GeoDataFrame, fig_location: str = None, show_figure: bool = False): """ Vykresleni grafu s lokalitou vsech nehod v kraji shlukovanych do clusteru """ gdf = gdf.to_crs("EPSG:3857") a = pd.Series(gdf['geometry'].apply(lambda p: p.x)) b = pd.Series(gdf['geometry'].apply(lambda p: p.y)) X = np.column_stack((a, b)) n_clusters = 14 kmeans = KMeans(n_clusters=n_clusters) labels = pd.Series(kmeans.fit_predict(X)) gdf['cluster'] = labels.values cluster_size: pd.DataFrame = gdf.groupby('cluster').cluster.count() clusters: np.ndarray = kmeans.cluster_centers_ clusters = np.hstack( (clusters, np.array([cluster_size.to_numpy()]).transpose())) fig, ax = plt.subplots(1, 1, figsize=(20, 15)) gdf.plot(ax=ax, markersize=1, color="tab:blue") plt.scatter(x=clusters[:, 0], y=clusters[:, 1], c=cluster_size, s=clusters[:, 2] * 0.8, facecolors='none', alpha=0.6) ctx.add_basemap( ax, crs=gdf.crs.to_string(), source=ctx.providers.Stamen.TonerLite, ) gdf.boundary.plot(ax=ax, color="k") color_bar = plt.colorbar(shrink=0.65) color_bar.set_alpha(1) color_bar.draw_all() ax.set_title('Nehody v Jihomoravském kraji') ax.axis("off") if fig_location: touch(fig_location) plt.savefig(fig_location) if show_figure: plt.tight_layout() plt.show()
def fullcrime_kmeans(fullcrime_df: gpd.GeoDataFrame, fullcrime_coords: np.array, n_clusters: int): ''' Parameters ---------- fullcrime_df : gpd.GeoDataFrame all crimes with descriptions, geometry points. fullcrime_coords : np.array coordinates of crimes as 2-D numpy array. n_clusters : int optimal number of clusters for KMeans, a prior with silhouette analysis. Returns ------- fullcrime_df : gdp.GeoDataFrame crimes with cluster labels attached. clusters : list of lists list of cluster groups centers : list of numpy arrays center of each cluster. ''' weights = fullcrime_df['weight'].to_numpy() clusterer = KMeans(n_clusters=n_clusters, random_state=10) clusterer.fit_predict(X=fullcrime_coords, sample_weight=weights) centers = clusterer.cluster_centers_ #cluster id labels labels = clusterer.labels_ fullcrime_df['clusters'] = labels #find most common crime by cluster #pandas groupby returns a Series object, so using scipy's mode module as alternative # print(fullcrime_df.groupby(['clusters']).agg(lambda x: stats.mode(x)[0])) clusters_df = fullcrime_df.groupby('clusters') clusters = [clusters_df.get_group(x) for x in clusters_df.groups] return clusters_df, clusters, centers
def plot_pngs(gdf: geopandas.GeoDataFrame, outdir: Path) -> None: """ General plotting routine of the residuals analysis. Currently only interested in the surface reflectance measurements, but can easily be expanded to to incorporate other measurements. """ _LOG.info("reading the TM WORLD BORDERS dataset") with open(TM_WORLD_BORDERS_FNAME, "rb") as src: dctx = zstandard.ZstdDecompressor() tm_gdf = geopandas.read_file(dctx.stream_reader(src)) for name, grp in gdf.groupby("measurement"): if "nbar" in name: _plot_reflectance_stats(grp, tm_gdf, name, outdir) else: _plot_oa_stats(grp, tm_gdf, name, outdir) _LOG.info("finished producing plots")
def spatial_lag(gdf: gpd.GeoDataFrame, col: str, first: int = 1, last: int = 1) -> pd.Series: """ Compute spatial lag on col in gdf """ def gdf_to_w_q(gdf_geom: gpd.GeoDataFrame, first: int, last: int) -> Any: """ Build queen weights from gdf. Use a temporary shapefile to get the geometry into pysal as their new interface is confusing. There must be a less silly way. """ # Compute first order spatial weight w = lps.weights.Queen.from_dataframe(gdf_geom, geom_col="geom") # If we want higher order if not first == last == 1: w_ho = lps.weights.higher_order(w, first) # loop from first to last order for order in range(first + 1, last + 1): w_this_order = lps.weights.higher_order(w, order) w_ho = lps.weights.w_union(w_ho, w_this_order) # Replace original w w = w_ho return w def _splag(y: Any, w: Any) -> Any: """ Flip argument order for transform """ return lps.weights.lag_spatial(w, y) # @TODO: Add support for time-variant geometries (countries) # If geom's don't change use the one from the last time gdf_geom = gdf.loc[gdf.index.get_level_values(0).max()] w = gdf_to_w_q(gdf_geom, first, last) s = gdf.groupby(level=0)[col].transform(_splag, w=w) return s
def create_trajectories(df): print("Creating time index ...") df['# Timestamp'] = pd.to_datetime(df['# Timestamp'], format='%m/%d/%Y %H:%M:%S') df = df.set_index('# Timestamp') print("Creating geometries ...") geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)] df = GeoDataFrame(df, geometry=geometry, crs={'init': '4326'}) print("Creating trajectories ...") trajectories = [] for key, values in df.groupby(['MMSI']): try: for t in Trajectory(key, values).split(): trajectories.append(t) except ValueError: print("Failed to create trajectory!") print("Created {} trajectories!".format(len(trajectories))) shuffle(trajectories) return trajectories
#tijd van string naar timestamp zetten, verhindert omzetten naar shapefile #meeuwen['date_time'] = pd.to_datetime(meeuwen['date_time']) meeuwen.set_index('date_time') #opzich niet nodig print("Set index date_time") punt = [Point(xy) for xy in zip(meeuwen.longitude, meeuwen.latitude)] meeuwen = meeuwen.drop( ['longitude', 'latitude'], axis=1) # om kolommen longitude en latitude weg te laten crs = {'init': 'epsg:4326'} #crs toekennen meeuwen = GeoDataFrame(meeuwen, crs=crs, geometry=punt) meeuwen = meeuwen.to_crs(epsg=31370) #transformatie print('Geometrie naar Lambert') meeuwen['meeuwen_x'] = meeuwen.geometry.x meeuwen['meeuwen_y'] = meeuwen.geometry.y meeuwen = meeuwen.rename(columns={ 'geometry': 'meeuwen_points' }).set_geometry('meeuwen_points') # hernoemen van geometrie kolom print('Kolom x en y toegevoegd in Lambert') meeuwen_dict = dict(tuple(meeuwen.groupby('bird_name'))) for meeuw in meeuwen_dict: x = meeuwen_dict[meeuw].drop(['calc_sunlight'], axis=1) x.to_file( 'C:/Users/maart/OneDrive/Master/Projectwerk Geo-ICT/ShapefilesLambert/{}.shp' .format(meeuw), driver='ESRI Shapefile') print(meeuw)
bj_stations = bj_file.iloc[:, 3:] #%% convert to a geopandas dataframe geometry = [ Point(xy) for xy in zip(bj_stations.Longitude, bj_stations.Latitude) ] bj_stations_adj = bj_stations.drop(['Longitude', 'Latitude'], axis=1) crs = {'init': 'epsg:4326'} gpd_bj_stations = GeoDataFrame(bj_stations_adj, crs=crs, geometry=geometry) gpd_bj_stations.set_index(gpd_bj_stations.Type, inplace=True) #%% group by types slist = [["Urban site"], ["Suburban site"], ["Clean site"], ["Regional background site"], ["Traffic monitoring site"]] nlist = ['Urban', 'Suburban', 'Clean', 'RegionalBG', 'Traffic'] ndict = {k: v for v, ks in zip(nlist, slist) for k in ks} station_types = [] for group in gpd_bj_stations.groupby(gpd_bj_stations.index.map(ndict.get)): station_types.append(group[1]) print(station_types) #%% df_clean = station_types[0] df_regionalbg = station_types[1] df_suburban = station_types[2] df_traffic = station_types[3] df_urban = station_types[4] #%% types = [ "Clean sites", "Regional background sites", "Suburban sites", "Traffic sites", "Urban sites" ] fig, ax = plt.subplots() ax.set_aspect('equal')
nodes # In[59]: # FINDING AVERAGE DISTANCE TO 1st/5th/10th NEAREST CAFE BY NEIGHBORHOOD # performing spatial join with nhoods shapefile nhood_nodes = gpd.sjoin(nodes, nhoods, op='within', how='left') nhood_nodes = nhood_nodes.dropna(subset=['Neighborhood']) nhood_nodes # In[80]: from geopandas import GeoDataFrame cafe_distances = GeoDataFrame(nhood_nodes) nearest_cafe = cafe_distances.groupby(['Neighborhood'])[1].mean() nearest_cafe = nearest_cafe.reset_index() second_nearest_cafe = cafe_distances.groupby(['Neighborhood'])[2].mean() second_nearest_cafe = second_nearest_cafe.reset_index() third_nearest_cafe = cafe_distances.groupby(['Neighborhood'])[3].mean() third_nearest_cafe = third_nearest_cafe.reset_index() fourth_nearest_cafe = cafe_distances.groupby(['Neighborhood'])[4].mean() fourth_nearest_cafe = fourth_nearest_cafe.reset_index() fifth_nearest_cafe = cafe_distances.groupby(['Neighborhood'])[5].mean() fifth_nearest_cafe = fifth_nearest_cafe.reset_index() sixth_nearest_cafe = cafe_distances.groupby(['Neighborhood'])[6].mean()
headers={'Authorization': "Bearer "+response['access_token'], 'Accept': "application/vnd.networkfleet.api-v1+json", 'Content-Type': "application/vnd.networkfleet.api-v1+json"} ).json() if rjson['count'] != 0: x = pd.DataFrame(rjson) Name = pd.io.json.json_normalize(x['gpsMessage']) Nameedit = Name[['latitude', 'longitude', 'messageTime']] Nameedit['truck_name'] = i Nameedit['timeedit'] = pd.to_datetime(Nameedit['messageTime']) #Nameedit['hour'] = (Nameedit["timeedit"]-timezone).dt.hour Nameedit['datetime'] = (Nameedit['timeedit']-timezone).dt.strftime("%Y-%m-%d %H:00:00") geometry = [Point(xy) for xy in zip(Nameedit.longitude, Nameedit.latitude)] Nameedit1 = GeoDataFrame(Nameedit, geometry=geometry) Nameedit1 = Nameedit1.groupby(['truck_name', 'datetime'])['geometry'].apply(lambda x: LineString(x.tolist())) Nameedit1 = GeoDataFrame(Nameedit1, geometry='geometry') Nameedit1 = pd.DataFrame(Nameedit1.to_records()) Nameedit1 = GeoDataFrame(Nameedit1, geometry='geometry') Nameedit1.crs = {'init' :'epsg:4326'} x1 = gpd.sjoin(Nameedit1, dataSrc, op='intersects') x1 = x1[['STREET_ID', 'datetime']] appended_data = appended_data.append(x1) print("success: "+ i) else: print("0 count: " +i) except: print ("fail: " + i)
############################################################################################################ #engine = create_engine('mysql+pymysql://ucfnmbz:[email protected]:3306/ucfnmbz') # Create SQL connection engine conn = engine.raw_connection() route_shapes=pd.read_sql_table('shapes', engine) # Zip the coordinates into a point object and convert to a GeoDataFrame geometry = [Point(xy) for xy in zip(route_shapes.shape_pt_lon, route_shapes.shape_pt_lat)] r_shape_geo = GeoDataFrame(route_shapes, geometry=geometry) # Aggregate these points with the GroupBy r_shape_geo = r_shape_geo.groupby(['shape_id'])['geometry'].apply(lambda x: LineString(x.tolist())) r_shape_geo = GeoDataFrame(r_shape_geo, geometry='geometry') r_shape_geo['shape_id']=r_shape_geo.index r_shape_geo.plot() # Function to generate WKB hex def wkb_hexer(line): return line.wkt #Convert geometry column in GeoDataFrame to hex #Then the GeoDataFrames are just regular DataFrames r_shape_geo['geometry'] = r_shape_geo['geometry'].apply(lambda x: x.wkt) r_shape_geo.rename(index=str, columns={"geometry": "geom", "C": "c"}) # Connect to database and export data
def func(arg): last_idterm_idx, idterm = arg # for last_track_idx, idterm in enumerate(idterms_cars): print(idterm) idterm = str(idterm) # print('VIASAT GPS track:', track_ID) viasat_data = pd.read_sql_query( ''' SELECT * FROM public.routecheck_2019 WHERE idterm = '%s' ''' % idterm, conn_HAIG) if len(viasat_data) > 0: viasat_data = viasat_data.sort_values('timedate') ## add a field with the "NEXT timedate" in seconds viasat_data['next_totalseconds'] = viasat_data.totalseconds.shift(-1) viasat_data['next_timedate'] = viasat_data.timedate.shift(-1) viasat_data['next_totalseconds'] = viasat_data[ 'next_totalseconds'].astype('Int64') viasat_data['next_totalseconds'] = viasat_data[ 'next_totalseconds'].fillna(0) viasat_data['next_lon'] = viasat_data.longitude.shift( -1) # longitude of the next trip viasat_data['next_lat'] = viasat_data.latitude.shift( -1) # latitude of the next trip all_trips = list(viasat_data.idtrajectory.unique()) ### initialize an empty dataframe # route_CATANIA = pd.DataFrame([]) for idx, idtrajectory in enumerate(all_trips): # idtrajectory = 122344050 # print(idtrajectory) ## filter data by idterm and by idtrajectory (trip) data = viasat_data[viasat_data.idtrajectory == idtrajectory] ## group by TRIP_ID, check numbers of line, if > 1 then only get the one with larger number of lines counts_TRIP_ID = data.groupby( data[['TRIP_ID']].columns.tolist(), sort=False).size().reset_index().rename(columns={0: 'counts'}) data = data[data.TRIP_ID == counts_TRIP_ID[ counts_TRIP_ID.counts == max( counts_TRIP_ID.counts)].TRIP_ID[0]] ### zip the coordinates into a point object and convert to a GeoData Frame #### if len(data) > 3: geometry = [ Point(xy) for xy in zip(data.longitude, data.latitude) ] df = GeoDataFrame(data, geometry=geometry) # Aggregate these points with the GroupBy df = df.groupby([ 'idtrajectory' ])['geometry'].apply(lambda x: LineString(x.tolist())) df = GeoDataFrame(df, geometry='geometry') # df.plot() df.columns = ['geometry'] idtrace_o = data[data.segment == min(data.segment)][[ 'id' ]].iloc[0][0] idtrace_d = data[data.segment == max(data.segment)][[ 'id' ]].iloc[0][0] # latitude_o = data[data.segment == min(data.segment)][['latitude']].iloc[0][0] ## at the ORIGIN # longitude_o = data[data.segment == min(data.segment)][['longitude']].iloc[0][0] ## at the ORIGIN # latitude_d = data[data.segment == max(data.segment)][['latitude']].iloc[0][0] ## at the DESTINATION # longitude_d = data[data.segment == max(data.segment)][['longitude']].iloc[0][0] ## at the DESTINATION timedate = str(data[data.segment == min(data.segment)][[ 'timedate' ]].iloc[0][0]) ## at the ORIGIN ## trip distance in meters (sum of the increment of the "progressive" ## add a field with the "previous progressive" data['last_progressive'] = data.progressive.shift() # <------- data['last_progressive'] = data['last_progressive'].astype( 'Int64') data['last_progressive'] = data['last_progressive'].fillna(0) ## compute increments of the distance (in meters) data['increment'] = data.progressive - data.last_progressive ## sum all the increments tripdistance_m = sum( data['increment'][1:len(data['increment'])][ data.increment > 0]) ## trip time in seconds (duration) time_o = data[data.segment == min(data.segment)][['path_time' ]].iloc[0][0] time_d = data[data.segment == max(data.segment)][['path_time' ]].iloc[0][0] triptime_s = time_d - time_o # time_o = data[data.segment == min(data.segment)][['totalseconds']].iloc[0][0] # time_d = data[data.segment == max(data.segment)][['totalseconds']].iloc[0][0] # triptime_s = time_d - time_o checkcode = data[data.segment == min(data.segment)][[ 'anomaly' ]].iloc[0][0] ## at the ORIGIN ## intervallo di tempo tra un l'inizio di due viaggi successivi breaktime_s = data[data.segment == max(data.segment)][['next_timedate']].iloc[0][0] - \ data[data.segment == max(data.segment)][['timedate']].iloc[0][0] breaktime_s = breaktime_s.total_seconds() if breaktime_s < 0: breaktime_s = None ### get distance between the position of two consecutive TRIPS (from END of a TRIP to START of a NEW TRIP) lon_end = data[data.segment == max(data.segment)][[ 'longitude' ]].iloc[0][0] # longitude at the END of a TRIP lat_end = data[data.segment == max(data.segment)][[ 'latitude' ]].iloc[0][0] lon_start = data[data.segment == max(data.segment)][[ 'next_lon' ]].iloc[0][0] # longitude at the START of a NEW TRIP lat_start = data[data.segment == max(data.segment)][[ 'next_lat' ]].iloc[0][0] ### find distance between coordinates of two consecutive TRIPS in METERS!!! ### end = (37.571518, 14.895852) ### start = (37.570873, 14.896243) deviation_pos = great_circle_track_node( lon_end, lat_end, lon_start, lat_start) ### build the final dataframe ("route" table) if tripdistance_m > 0: df_ROUTE = pd.DataFrame({ 'idtrajectory': [idtrajectory], 'idterm': [idterm], 'idtrace_o': [idtrace_o], 'idtrace_d': [idtrace_d], # 'latitude_o': [latitude_o], # 'longitude_o': [longitude_o], # 'latitude_d': [latitude_d], # 'longitude_d': [longitude_d], 'timedate_o': [timedate], 'tripdistance_m': [tripdistance_m], 'triptime_s': [triptime_s], 'checkcode': [checkcode], 'breaktime_s': [breaktime_s] }) geom = df['geometry'].apply(wkb_hexer) df_ROUTE['geom'] = geom.iloc[0] df_ROUTE['deviation_pos_m'] = deviation_pos # route_CATANIA = route_CATANIA.append(df_ROUTE) connection = engine.connect() df_ROUTE.to_sql("PROVA_route_2019", con=connection, schema="public", if_exists='append') connection.close()
class Passeringslinje(object): """ En klasse for å håndtere passeringslinjer og funksjoner knyttet til dette. """ def __init__(self, gdf, passline): #Gjøres tilgjengelig grunnet bruk i klassefunksjoner self.gdf = gdf self.passline = passline #Gjøres tilgjengelig grunnet intern og ekstern bruk self.populasjon = GeoDataFrame() self.unique_mmsi_dict = {} self.populasjon_dict = {} self.grupperte_passeringer_dict = {} #============================================================================== #Her behandles analysen av en enkelt trafikklinje gitt som LineString #============================================================================== #Sjekker om vi jobber med en eller fler passeringslinjer if type(self.passline) == LineString: #Lager en serie med True/False basert på om krysser eller ikke passeringer = self.gdf.intersects(self.passline) #Legger til kolonne med summering av passeringer. #Gjengir antall involverte objekter, ikke antallet krysninger self.antall_haler = self.gdf.assign(passeringer=passeringer).query( 'passeringer != 0')['passeringer'].sum(axis=0) #begrenser populasjon til skipshaler som har passert linjen self.populasjon = self.gdf.assign( passeringer=passeringer).query('passeringer != 0') #Lager en liste over unike mmsi`er i populasjonen self.unique_mmsi_list = self.populasjon.mmsi.unique() #Finner antallet unike skip ved summering av unike mmsi`er self.antall_unike_skip = len(self.unique_mmsi_list) #Henter ut koordinatene for krysningspunktene krysninger = self.populasjon.intersection(self.passline) self.populasjon['crossing_point'] = krysninger #Lager listevariabler til å lagre verdier for snekring av sluttpopulasjon cross_heading = list() cross_time = list() heading_geometric = list() tail_list = list() columns = self.populasjon.columns #Lager duplikate rader av halene med MultiPoint slik at alle rader representerer en unik passering for idx, row in self.populasjon.iterrows(): if 'Point' == row.crossing_point.type: tail_list.append(row) elif 'MultiPoint' == row.crossing_point.type: tmp_points = list() tmp_points.extend(p for p in row.crossing_point) for i in range(len(tmp_points)): row['crossing_point'] = tmp_points[i] tail_list.append(row) crs = {'init': 'epsg:4326'} #Setter sammen halene til en GeoDataFrame self.populasjon = GeoDataFrame(pd.DataFrame.from_records( tail_list, columns=columns), geometry='geometry', crs=crs) for idx, row in self.populasjon.iterrows(): #Henter ut den aktuelle raden sin LineString line = row.geometry #Lager listevariabler for lagring av linjens x- og y-koordinater coord_list_x = [] coord_list_y = [] #løper gjennom linjen punkt for punkt og henter ut de respektive koordinatbestandene for coords in line.coords: coord_list_x.append(coords[0]) coord_list_y.append(coords[1]) #Deler krysningspunktet i sine respektive deler pass_coords_x = row['crossing_point'].x pass_coords_y = row['crossing_point'].y #Leter etter nærmeste verdi i linjens liste over x- og y-koordinater listeposisjon_x = min( range(len(coord_list_x)), key=lambda i: abs(coord_list_x[i] - pass_coords_x)) listeposisjon_y = min( range(len(coord_list_y)), key=lambda i: abs(coord_list_y[i] - pass_coords_y)) #Da det ikke er gitt at dette vil bli samme punkt for x-verdier og y-verdier henter jeg snittposisjonen når dem ikke er like. listeposisjon = int((listeposisjon_x + listeposisjon_y) / 2) #Henter ut heading og tidspunkt fra riktig plass i listene som lages i lag_haler() som finnes i functions.py cross_heading.append(row.pass_heading[listeposisjon]) cross_time.append(row.times[listeposisjon]) tmp_point1 = line.coords[listeposisjon - 1] tmp_point2 = line.coords[listeposisjon] tmp_line = LineString([(tmp_point1), (tmp_point2)]) tmp_bearing = line_bearing(tmp_line) heading_geometric.append(tmp_bearing) #Legger til kolonne for passeringspunktets koordinater, heading og tidspunkt ved krysning. Fjerner listevariablene fra lag_haler() self.populasjon['heading_geometric'] = heading_geometric self.populasjon['heading'] = cross_heading self.populasjon['crossing_time'] = cross_time try: self.populasjon.drop(['times', 'pass_heading'], axis=1, inplace=True) except: pass #line_bearing() importeres fra functions.py passline_angle = line_bearing(self.passline) #passline_limits() finnes i functions.py Krever passline_angle som argument og returnerer grensevinkel en og to samt trafikkretning #Kategoriserer skipstrafikk til enten Nord/Sør eller Øst/Vest for å kunne skille den dikotomisk #Grensene for trafikkinndelingen skjer dynamisk basert på passeringslinjens orientasjon. limit1, limit2, trafikk_retning = passline_limits(passline_angle) retning_list = list() #itererer gjennom kryssende skipshaler for å klassifisere dem på retning over linja for idx, row in self.populasjon.iterrows(): if trafikk_retning == 'Ost/Vest': if row['heading'] >= limit1 and row['heading'] <= limit2: retning_list.append('Ost') else: retning_list.append('Vest') elif trafikk_retning == 'Nord/Syd': if row['heading'] > limit1 and row['heading'] < limit2: retning_list.append('Syd') else: retning_list.append('Nord') self.populasjon['retning'] = retning_list # printer noen interessante variabler til konsollen print('\n*******************************************') print('Passeringslinjeklasse er vellykket initiert') print('*******************************************\n') print( str(self.antall_haler) + ' skipshaler har krysset passeringslinjen\n') print('Skipspopulasjonen består av ' + str(self.antall_unike_skip) + ' unike mmsi numre.') print('\nAntallet krysninger av linjene er: ' + str(self.populasjon.shape[0]) + '\n') #============================================================================== #Her behandles analysen av passeringslinjene gitt som GeoDataFrame #============================================================================== elif type(self.passline) == GeoDataFrame: #Henter ut antallet passeringslinjer self.passline_antall = self.passline.shape[0] antall_haler_dict = dict() antall_unike_skip_dict = dict() #Teller for å telle det totale antallet passeringer for alle linjer total_counter = 0 #liste for lagring av printtekst for den enkelte linje som initieres self.print_list = list() for i in range(self.passline_antall): #Henter ut aktuell passeringslinje som shapely LineString current_passline = self.passline.loc[i, 'geometry'] #Henter ut navnet til passeringslinjen, benyttes osm keys i samlings-dictionary til slutt passeringlinje_navn = self.passline.loc[ i, self.passline.columns[0]] #.lstrip('Passline_') #Lager en Serie på lengde med gdf som indikerer med en bool.-verdi om det er krysning eller ikke passeringer = self.gdf.intersects(current_passline) #Summerer sammen for å finne antallet haler som er involvert/antallet objekter som krysser linjen antall_haler_dict[passeringlinje_navn] = self.gdf.assign( passeringer=passeringer).query( 'passeringer != 0')['passeringer'].sum(axis=0) #Henter ut halene som krysser, også kalt skipspopulasjonen og legger den som value i en dictionary med passeringslinjenavn som key self.populasjon_dict[passeringlinje_navn] = self.gdf.assign( passeringer=passeringer).query('passeringer != 0') #Legger unike mmsier representert i krysningen av linjen i en dictionary med passeringslinjenavn som key self.unique_mmsi_dict[ passeringlinje_navn] = self.populasjon_dict[ passeringlinje_navn].mmsi.unique() #Summerer sammen for å finne antallet unike mmsièr som er representert i populasjonen antall_unike_skip_dict[passeringlinje_navn] = len( self.unique_mmsi_dict[passeringlinje_navn]) #Henter ut en Serie med krysningspunktene mellom skipspopulasjonen og linjen, kan være MultiPoint, LineString, GeometryCollection. Foreløpig ikke støtt på de to siste krysninger = self.populasjon_dict[ passeringlinje_navn].intersection(current_passline) self.populasjon_dict[passeringlinje_navn][ 'crossing_point'] = krysninger columns = self.populasjon_dict[passeringlinje_navn].columns tail_list = list() #Lager duplikate rader hvor det er flere "intersections" slik at hver rad representerer en unik passering for idx, row in self.populasjon_dict[ passeringlinje_navn].iterrows(): if 'Point' == row.crossing_point.type: tail_list.append(row) elif 'MultiPoint' == row.crossing_point.type: tmp_points = list() tmp_points.extend(p for p in row.crossing_point) for i in range(len(tmp_points)): row['crossing_point'] = tmp_points[i] tail_list.append(row) crs = {'init': 'epsg:4326'} #Setter sammen den nye GeoDataFramen som har egen rad for hver unike passering. self.populasjon_dict[passeringlinje_navn] = GeoDataFrame( pd.DataFrame.from_records(tail_list, columns=columns), geometry='geometry', crs=crs) #Lager listevariabler for lagring av skipets orientering ved krysning og tidspunktet for krysningen cross_heading = list() cross_time = list() heading_geometric = list() krysningspunkt_nr = list() #Hvis ikke det er noen krysninger på den aktuelle passeringslinjen går vi videre if antall_haler_dict[passeringlinje_navn] == 0: pass else: for idx, row in self.populasjon_dict[ passeringlinje_navn].iterrows(): #Henter ut den aktuelle raden sin LineString line = row.geometry #Lager listevariabler for lagring av linjen x- og y-koordinater coord_list_x = list() coord_list_y = list() #løper gjennom linjen punkt for punkt og henter ut de respektive koordinatbestandene for coords in line.coords: coord_list_x.append(coords[0]) coord_list_y.append(coords[1]) #Deler krysningspunktet i sine respektive deler pass_coords_x = row['crossing_point'].x pass_coords_y = row['crossing_point'].y #Leter etter nærmeste verdi i linjens liste over x- og y-koordinater #Link!!! listeposisjon_x = min( range(len(coord_list_x)), key=lambda i: abs(coord_list_x[i] - pass_coords_x)) listeposisjon_y = min( range(len(coord_list_y)), key=lambda i: abs(coord_list_y[i] - pass_coords_y)) #Da det ikke er gitt at dette vil bli samme punkt for x-verdier og y-verdier henter jeg snittposisjonen når dem ikke er like. listeposisjon = int( (listeposisjon_x + listeposisjon_y) / 2) #Henter ut heading og tidspunkt fra riktig plass i listene som lages i lag_haler() som finnes i functions.py cross_heading.append(row.pass_heading[listeposisjon]) cross_time.append(row.times[listeposisjon]) tmp_point1 = line.coords[listeposisjon - 1] tmp_point2 = line.coords[listeposisjon] tmp_line = LineString([(tmp_point1), (tmp_point2)]) tmp_bearing = line_bearing(tmp_line) heading_geometric.append(tmp_bearing) krysningspunkt_nr.append(listeposisjon) #Legger til kolonne for passeringspunktets koordinater, heading og tidspunkt ved krysning. Fjerner listevariablene fra lag_haler() self.populasjon_dict[passeringlinje_navn][ 'heading_geometric'] = heading_geometric self.populasjon_dict[passeringlinje_navn][ 'heading'] = cross_heading self.populasjon_dict[passeringlinje_navn][ 'crossing_time'] = cross_time self.populasjon_dict[passeringlinje_navn][ 'point_position'] = krysningspunkt_nr self.populasjon_dict[passeringlinje_navn].drop( ['times', 'pass_heading'], axis=1, inplace=True) #kalkulerer bearing til passeringslinjen. line_bearing() finnes i functions.py passline_angle = line_bearing(current_passline) #passline_limits() finnes i functions.py Krever passline_angle som argument og returnerer grensevinkel en og to samt trafikkretning #Kategoriserer skipstrafikk til enten Nord/Sør eller Øst/Vest for å kunne skille den dikotomisk #Grensene for trafikkinndelingen skjer dynamisk basert på passeringslinjens orientasjon. limit1, limit2, trafikk_retning = passline_limits( passline_angle) self.passline['trafikk_retning'] = trafikk_retning retning_list = list() rutepunkt_list = list() #itererer gjennom kryssende skipshaler for å klassifisere dem på retning over linja for idx, row in self.populasjon_dict[ passeringlinje_navn].iterrows(): if trafikk_retning == 'Ost/Vest': if row['heading'] >= limit1 and row[ 'heading'] <= limit2: retning_list.append('Ost') tmp_rutepunkt = str(passeringlinje_navn) + 'Ost' rutepunkt_list.append(tmp_rutepunkt) else: retning_list.append('Vest') tmp_rutepunkt = str(passeringlinje_navn) + 'Vest' rutepunkt_list.append(tmp_rutepunkt) elif trafikk_retning == 'Nord/Syd': if row['heading'] > limit1 and row['heading'] < limit2: retning_list.append('Syd') tmp_rutepunkt = str(passeringlinje_navn) + 'Syd' rutepunkt_list.append(tmp_rutepunkt) else: retning_list.append('Nord') tmp_rutepunkt = str(passeringlinje_navn) + 'Nord' rutepunkt_list.append(tmp_rutepunkt) else: pass self.populasjon_dict[passeringlinje_navn][ 'retning'] = retning_list self.populasjon_dict[passeringlinje_navn][ 'passert_linje'] = passeringlinje_navn self.populasjon_dict[passeringlinje_navn][ 'rutepunkt'] = rutepunkt_list antall_krysninger = self.populasjon_dict[ passeringlinje_navn].shape[0] total_counter += antall_krysninger print_string = "\n*********************************************\nPasseringslinje " + str( passeringlinje_navn ) + "\n*********************************************\n" + str( antall_haler_dict[passeringlinje_navn] ) + " skipshaler har krysset over linjen\nSkipspopulasjonen består av " + str( antall_unike_skip_dict[passeringlinje_navn] ) + " unike mmsi nummer\nAntallet krysninger av linjen er " + str( antall_krysninger) + "\n" self.print_list.append(print_string) # printer noen interessante variabler til konsollen print('\n*********************************************') print('Passeringslinjeklasse er vellykket initiert') print('*********************************************\n') print('De ' + str(self.passline_antall) + ' linjene har samlet blitt krysset ' + str(total_counter) + ' antall ganger.') for item in self.print_list: print(item) self.total_populasjon = GeoDataFrame(pd.concat( self.populasjon_dict, ignore_index=True, sort=False), geometry='geometry', crs=crs) def antall_skipstype(self, column='shiptype_nr', agg_column='Antall_passeringer'): if type(self.passline) == LineString: self.grupperte_passeringer = self.populasjon.groupby([column]).agg( { agg_column: { 'Antall Passeringer': 'sum' }, 'mmsi': { 'Antall skip': 'count' } }) self.grupperte_passeringer.columns = self.grupperte_passeringer.columns.droplevel( 0) self.grupperte_passeringer = self.grupperte_passeringer.reset_index( ) self.grupperte_passeringer.rename( columns={'shiptype_nr': 'Skipstype_nr'}, inplace=True) print('\nAntall skip passert gruppert på skipstype: ') print(self.grupperte_passeringer) return self.grupperte_passeringer elif type(self.passline) == GeoDataFrame: self.group_table_dict = {} self.grupperte_passeringer_dict = {} for i in range(self.passline_antall): passeringlinje_navn = self.passline.loc[ i, self.passline.columns[0]] self.grupperte_passeringer_dict[ passeringlinje_navn] = self.populasjon_dict[ passeringlinje_navn].groupby([column]).agg({ agg_column: { 'Antall Passeringer': 'sum' }, 'mmsi': { 'Antall skip': 'count' } }) self.grupperte_passeringer_dict[ passeringlinje_navn].columns = self.grupperte_passeringer_dict[ passeringlinje_navn].columns.droplevel(0) self.grupperte_passeringer_dict[ passeringlinje_navn] = self.grupperte_passeringer_dict[ passeringlinje_navn].reset_index() self.grupperte_passeringer_dict[passeringlinje_navn].rename( columns={'shiptype_nr': 'Skipstype_nr'}, inplace=True) print('Antall skip per passeringslinje gruppert på skipstype: ') print(self.grupperte_passeringer_dict) return self.grupperte_passeringer_dict else: pass def pivot(self, index='shiptype_label', column='lengdegruppe'): #Lager fasit på rekkefølge på kolonner column_list = [ 'Missing_length', '0-12', '12-21', '21-28', '28-70', '70-100', '100-150', '150-200', '200-250', '250-300', '300-350', '<350' ] if type(self.passline) == LineString: #Lager pivottabellen, vil få kolonner i tilfeldig rekkefølge pivot = self.populasjon.pivot_table(values='passeringer', index=[index], columns=[column], aggfunc=np.sum) #Henter ut kolonnene som er felles for fasit på rekkefølge og pivot columns = [x for x in column_list if x in pivot] #Sørger for at kolonnene kommer i riktig rekkefølge pivot = pivot[columns] return pivot elif type(self.passline) == GeoDataFrame: pivot_table_dict = {} self.pivot_dict = {} for i in range(self.passline_antall): passeringlinje_navn = self.passline.loc[ i, self.passline.columns[0]] pivot_table_dict[passeringlinje_navn] = self.populasjon_dict[ passeringlinje_navn].pivot_table(values='passeringer', index=[index], columns=[column], aggfunc=np.sum) columns = [ x for x in column_list if x in pivot_table_dict[passeringlinje_navn] ] self.pivot_dict[passeringlinje_navn] = pivot_table_dict[ passeringlinje_navn][columns] print( "\nPivot-tabeller samlet i en dictionary med passeringslinjenavn som 'key' \n" ) return self.pivot_dict else: pass def hierchical(self): if type(self.passline) == LineString: grupperte_passeringer = self.populasjon.groupby( ['shiptype_label', 'retning']).agg({ 'passeringer': { 'Antall Passeringer': 'sum' }, 'mmsi': { 'Antall skip': 'count' } }) grupperte_passeringer.columns = grupperte_passeringer.columns.droplevel( 0) grupperte_passeringer.unstack(0) return grupperte_passeringer elif type(self.passline) == GeoDataFrame: grupperte_passeringer_dict = dict() for i in range(self.passline_antall): passeringlinje_navn = self.passline.loc[ i, self.passline.columns[0]] grupperte_passeringer_dict[ passeringlinje_navn] = self.populasjon_dict[ passeringlinje_navn].groupby( ['shiptype_label', 'skipsretning']).agg({ 'ant_passeringer': { 'Antall Passeringer': 'sum' }, 'mmsi': { 'Antall skip': 'count' } }) grupperte_passeringer_dict[ passeringlinje_navn].columns = grupperte_passeringer_dict[ passeringlinje_navn].columns.droplevel(0) grupperte_passeringer_dict[passeringlinje_navn].unstack() return grupperte_passeringer_dict #Not working yet def pivot_split_direction(self): gdf_dict = self.populasjon_dict retning = self.passline['trafikk_retning'] gdf = GeoDataFrame() for i in range(self.passline_antall): retning1_list = list() retning2_list = list() for row in retning: passeringlinje_navn = self.passline.loc[ row, self.passline.columns[0]].lstrip('Passline_') retning1, retning2 = row['trafikk_retning'].split("/") retning1_list.append(retning1) retning2_list.append(retning2) gdf1 = gdf_dict[gdf.skipsretning == retning1] gdf2 = gdf_dict[gdf.skipsretning == retning2] pivot2 = pd.pivot_table(gdf2, values=['ant_passeringer'], index=['shiptype_nr', 'lengdegruppe'], columns=['skipsretning'], aggfunc=np.sum) pivot1 = pd.pivot_table(gdf1, values=['ant_passeringer'], index=['shiptype_nr', 'lengdegruppe'], columns=['skipsretning'], aggfunc=np.sum) return pivot1, pivot2 def map_html(self, tiltakspunkter, filepath): """ tiltakspunkter er av typen Point og oppgitt i epsg:4326 filepath er der du ønsker .html filen med kart skal havne. """ tiltakspunkter = tiltakspunkter box_tmp = bounding_box(tiltakspunkter) box_center_x = (box_tmp[0] + box_tmp[2]) / 2 box_center_y = (box_tmp[1] + box_tmp[3]) / 2 map_center_lon = box_center_x map_center_lat = box_center_y map = folium.Map(location=[map_center_lat, map_center_lon], zoom_start=9, tiles='Stamen Terrain') tiltaksFeature = FeatureGroup(name='Tiltakspunkter', show=False) marker_cluster = plugins.MarkerCluster(options=dict( zoomToBoundsOnClick=False)).add_to(tiltaksFeature) #feature_tiltak = FeatureGroup(name='tiltakspunkter') tooltip_tiltak = 'Tiltakspunkt' tmp_tiltak = tiltakspunkter['punkt_geom_wkt'] x_list = tmp_tiltak.apply(lambda p: p.x) y_list = tmp_tiltak.apply(lambda p: p.y) for i in range(0, len(x_list)): try: Marker(location=[y_list[i], x_list[i]], popup=tiltakspunkter['punkt_navn'][i], icon=folium.Icon(color='red', icon_color='black', icon='angle-double-down', prefix='fa'), tooltip=tooltip_tiltak).add_to(marker_cluster) except: Marker(location=[y_list[i], x_list[i]], popup='Atter et punkt', icon=folium.Icon(color='red', icon_color='black', icon='angle-double-down', prefix='fa'), tooltip=tooltip_tiltak).add_to(marker_cluster) feature_skipshaler = FeatureGroup(name='skipshaler') try: oljetankskip = plugins.FeatureGroupSubGroup( feature_skipshaler, 'oljetankskip') haler_feat_10 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 10] skipshaler_10_json = haler_feat_10.to_json(default=str) style_skipshaler = lambda x: { 'color': '#4DB6AC', 'weight': 3, 'opacity': 0.1, } haler_olje = folium.features.GeoJson( skipshaler_10_json, style_function=style_skipshaler) haler_olje.add_to(oljetankskip) except: pass try: kjemikalie_produkttankskip = plugins.FeatureGroupSubGroup( feature_skipshaler, 'kjemikalie_produkttankskip') haler_feat_11 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 11] skipshaler_11_json = haler_feat_11.to_json(default=str) style_skipshaler = lambda x: { 'color': '#26A69A', 'weight': 3, 'opacity': 0.1, } haler_kjemi = folium.features.GeoJson( skipshaler_11_json, style_function=style_skipshaler) haler_kjemi.add_to(kjemikalie_produkttankskip) except: pass try: gasstankskip = plugins.FeatureGroupSubGroup( feature_skipshaler, 'gasstankskip') haler_feat_12 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 12] skipshaler_12_json = haler_feat_12.to_json(default=str) style_skipshaler = lambda x: { 'color': '#009688', 'weight': 3, 'opacity': 0.1, } haler_gass = folium.features.GeoJson( skipshaler_12_json, style_function=style_skipshaler) haler_gass.add_to(gasstankskip) except: pass try: bulkskip = plugins.FeatureGroupSubGroup(feature_skipshaler, 'bulkskip') haler_feat_13 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 13] skipshaler_13_json = haler_feat_13.to_json(default=str) style_skipshaler = lambda x: { 'color': '#00897B', 'weight': 3, 'opacity': 0.1, } haler_bulk = folium.features.GeoJson( skipshaler_13_json, style_function=style_skipshaler) haler_bulk.add_to(bulkskip) except: pass try: stykkgods_roro_skip = plugins.FeatureGroupSubGroup( feature_skipshaler, 'stykkgods_roro_skip') haler_feat_14 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 14] skipshaler_14_json = haler_feat_14.to_json(default=str) style_skipshaler = lambda x: { 'color': '#00796B', 'weight': 3, 'opacity': 0.1, } haler_stykkgods = folium.features.GeoJson( skipshaler_14_json, style_function=style_skipshaler) haler_stykkgods.add_to(stykkgods_roro_skip) except: pass try: konteinerskip = plugins.FeatureGroupSubGroup( feature_skipshaler, 'konteinerskip') haler_feat_15 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 15] skipshaler_15_json = haler_feat_15.to_json(default=str) style_skipshaler = lambda x: { 'color': '#00695C', 'weight': 3, 'opacity': 0.1, } haler_konteiner = folium.features.GeoJson( skipshaler_15_json, style_function=style_skipshaler) haler_konteiner.add_to(konteinerskip) except: pass try: passasjerbat = plugins.FeatureGroupSubGroup( feature_skipshaler, 'passasjerbat') haler_feat_16 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 16] skipshaler_16_json = haler_feat_16.to_json(default=str) style_skipshaler = lambda x: { 'color': '#81C784', 'weight': 3, 'opacity': 0.1, } haler_passasjer = folium.features.GeoJson( skipshaler_16_json, style_function=style_skipshaler) haler_passasjer.add_to(passasjerbat) except: pass try: ropax_skip = plugins.FeatureGroupSubGroup(feature_skipshaler, 'ropax_skip') haler_feat_17 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 17] skipshaler_17_json = haler_feat_17.to_json(default=str) style_skipshaler = lambda x: { 'color': '#66BB6A', 'weight': 3, 'opacity': 0.1, } haler_ropax = folium.features.GeoJson( skipshaler_17_json, style_function=style_skipshaler) haler_ropax.add_to(ropax_skip) except: pass try: cruiseskip = plugins.FeatureGroupSubGroup(feature_skipshaler, 'cruiseskip') haler_feat_18 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 18] skipshaler_18_json = haler_feat_18.to_json(default=str) style_skipshaler = lambda x: { 'color': '#4CAF50', 'weight': 3, 'opacity': 0.1, } haler_cruise = folium.features.GeoJson( skipshaler_18_json, style_function=style_skipshaler) haler_cruise.add_to(cruiseskip) except: pass try: offshore_supplyskip = plugins.FeatureGroupSubGroup( feature_skipshaler, 'offshore_supplyskip') haler_feat_19 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 19] skipshaler_19_json = haler_feat_19.to_json(default=str) style_skipshaler = lambda x: { 'color': '#43A047', 'weight': 3, 'opacity': 0.1, } haler_offshore = folium.features.GeoJson( skipshaler_19_json, style_function=style_skipshaler) haler_offshore.add_to(offshore_supplyskip) except: pass try: andre_offshorefartoy = plugins.FeatureGroupSubGroup( feature_skipshaler, 'andre_offshorefartoy') haler_feat_20 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 20] skipshaler_20_json = haler_feat_20.to_json(default=str) style_skipshaler = lambda x: { 'color': '#388E3C', 'weight': 3, 'opacity': 0.1, } haler_andre_offshore = folium.features.GeoJson( skipshaler_20_json, style_function=style_skipshaler) haler_andre_offshore.add_to(andre_offshorefartoy) except: pass try: bronnbat = plugins.FeatureGroupSubGroup(feature_skipshaler, 'bronnbat') haler_feat_21 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 21] skipshaler_21_json = haler_feat_21.to_json(default=str) style_skipshaler = lambda x: { 'color': '#00E676', 'weight': 3, 'opacity': 0.1, } haler_bronn = folium.features.GeoJson( skipshaler_21_json, style_function=style_skipshaler) haler_bronn.add_to(bronnbat) except: pass try: slepefartoy = plugins.FeatureGroupSubGroup(feature_skipshaler, 'slepefartoy') haler_feat_22 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 22] skipshaler_22_json = haler_feat_22.to_json(default=str) style_skipshaler = lambda x: { 'color': '#00C853', 'weight': 3, 'opacity': 0.1, } haler_slepe = folium.features.GeoJson( skipshaler_22_json, style_function=style_skipshaler) haler_slepe.add_to(slepefartoy) except: pass try: andre_servicefartoy = plugins.FeatureGroupSubGroup( feature_skipshaler, 'andre_servicefartoy') haler_feat_23 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 23] skipshaler_23_json = haler_feat_23.to_json(default=str) style_skipshaler = lambda x: { 'color': '#9CCC65', 'weight': 3, 'opacity': 0.1, } haler_andre_service = folium.features.GeoJson( skipshaler_23_json, style_function=style_skipshaler) haler_andre_service.add_to(andre_servicefartoy) except: pass try: fiskefartoy = plugins.FeatureGroupSubGroup(feature_skipshaler, 'fiskefartoy') haler_feat_24 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 24] skipshaler_24_json = haler_feat_24.to_json(default=str) style_skipshaler = lambda x: { 'color': '#7CB342', 'weight': 3, 'opacity': 0.1, } haler_fisk = folium.features.GeoJson( skipshaler_24_json, style_function=style_skipshaler) haler_fisk.add_to(fiskefartoy) except: pass try: annet = plugins.FeatureGroupSubGroup(feature_skipshaler, 'annet') haler_feat_25 = self.total_populasjon[ self.total_populasjon.shiptype_nr == 25] skipshaler_25_json = haler_feat_25.to_json(default=str) style_skipshaler = lambda x: { 'color': '#558B2F', 'weight': 3, 'opacity': 0.1, } haler_annet = folium.features.GeoJson( skipshaler_25_json, style_function=style_skipshaler) haler_annet.add_to(annet) except: pass feature_passlines = FeatureGroup(name='passeringslinjer') passeringslinje_json = self.passline.to_json(default=str) tooltip_passeringslinje = 'Passeringslinje' style_passline = lambda x: { 'color': '#000000', 'weight': 4, 'opacity': 1.0, } folium.features.GeoJson( passeringslinje_json, style_function=style_passline, tooltip=tooltip_passeringslinje).add_to(feature_passlines) #feature_tiltak.add_to(marker_cluster) marker_cluster.add_to(map) feature_passlines.add_to(map) feature_skipshaler.add_to(map) oljetankskip.add_to(map) kjemikalie_produkttankskip.add_to(map) gasstankskip.add_to(map) bulkskip.add_to(map) stykkgods_roro_skip.add_to(map) konteinerskip.add_to(map) passasjerbat.add_to(map) ropax_skip.add_to(map) cruiseskip.add_to(map) offshore_supplyskip.add_to(map) andre_offshorefartoy.add_to(map) bronnbat.add_to(map) slepefartoy.add_to(map) andre_servicefartoy.add_to(map) fiskefartoy.add_to(map) annet.add_to(map) folium.LayerControl(collapsed=False).add_to(map) minimap = plugins.MiniMap() map.add_child(minimap) map.add_child(folium.LatLngPopup()) map.save(filepath) def linjekalkulasjon(self, key1, key2): if type(self.passline) == LineString: print( 'Denne funksjonen gir ingen mening med en passeringslinje, vennligst sjekk dette objektet.' ) elif type(self.passline) == GeoDataFrame: passline_populasjon = self.populasjon_dict[key1] passline_populasjon2 = self.populasjon_dict[key2][[ 'tail_id', 'crossing_point', 'crossing_time', 'rutepunkt', 'point_position' ]] diff_tabell = passline_populasjon.merge(passline_populasjon2, on='tail_id', how='inner') diff_tabell['tid_diff'] = abs(diff_tabell['crossing_time_x'] - diff_tabell['crossing_time_y']) crs = {'init': 'epsg:4326'} diff_tabell = GeoDataFrame(diff_tabell, geometry='geometry', crs=crs) avstand_liste = list() snittfart_liste = list() for idx, row in diff_tabell.iterrows(): tmp_linje = row['geometry'] point_position1 = row['point_position_x'] point_position2 = row['point_position_y'] #GIR DETTE MENING if point_position1 < point_position2: split_line = LineString( tmp_linje.coords[point_position1:(point_position2 + 1)]) else: split_line = LineString( tmp_linje.coords[(point_position2):point_position1 + 1]) project = partial(pyproj.transform, pyproj.Proj(init='epsg:4326'), pyproj.Proj(init='epsg:32633')) new_line = transform(project, split_line) tmp_avstand = new_line.length avstand_liste.append(tmp_avstand) tid_sekund = row['tid_diff'].seconds try: snittfart = tmp_avstand / tid_sekund except: snittfart = 'NaN' snittfart_liste.append(snittfart) diff_tabell['avstand_diff'] = avstand_liste diff_tabell['snittfart'] = snittfart_liste return diff_tabell else: print('Funksjonen er ikke riktig instantiert')
def _remove_tinynetworks( flw: gpd.GeoDataFrame, min_path_size: float, min_path_length: float, min_network_size: float, ) -> gpd.GeoDataFrame: """Remove small paths in NHDPlus flowline database. Ported from `nhdplusTools <https://github.com/USGS-R/nhdplusTools>`__ Parameters ---------- flw : geopandas.GeoDataFrame NHDPlus flowlines with at least the following columns: levelpathi, hydroseq, totdasqkm, terminalfl, startflag, pathlength, terminalpa min_network_size : float Minimum size of drainage network in sqkm. min_path_length : float Minimum length of terminal level path of a network in km. min_path_size : float Minimum size of outlet level path of a drainage basin in km. Drainage basins with an outlet drainage area smaller than this value will be removed. Returns ------- geopandas.GeoDataFrame Flowlines with small paths removed. """ req_cols = [ "levelpathi", "hydroseq", "terminalfl", "startflag", "terminalpa", "totdasqkm", "pathlength", ] _check_requirements(req_cols, flw) flw[req_cols[:-2]] = flw[req_cols[:-2]].astype("Int64") if min_path_size > 0: short_paths = flw.groupby("levelpathi").apply( lambda x: (x.hydroseq == x.hydroseq.min()) & (x.totdasqkm < min_path_size) & (x.totdasqkm >= 0)) short_paths = short_paths.index.get_level_values( "levelpathi")[short_paths].tolist() flw = flw[~flw.levelpathi.isin(short_paths)] terminal_filter = (flw.terminalfl == 1) & (flw.totdasqkm < min_network_size) start_filter = (flw.startflag == 1) & (flw.pathlength < min_path_length) if any(terminal_filter.dropna()) or any(start_filter.dropna()): tiny_networks = flw[terminal_filter].append(flw[start_filter]) flw = flw[~flw.terminalpa.isin(tiny_networks.terminalpa.unique())] return flw