def centroid_and_links(nodes, n_clusters): clusters, cluster_series = spatial.zone_clusters( nodes, n_clusters=n_clusters, geo_union_method=lambda lg: shapely.geometry.MultiPoint(list(lg)), geo_join_method=geo_join_method) index_name = cluster_series.index.name index_name = index_name if index_name else 'index' grouped = cluster_series.reset_index().groupby('cluster') first = list(grouped[index_name].first()) node_lists = list(grouped[index_name].agg(lambda s: list(s))) node_geo_dict = nodes['geometry'].to_dict() def link_geometry(a, b): return shapely.geometry.LineString( [node_geo_dict[a], node_geo_dict[b]]) values = [] for node_list in node_lists: for a in node_list: for b in node_list: if a != b: values.append([a, b, link_geometry(a, b)]) links = pd.DataFrame(values, columns=['a', 'b', 'geometry']) links['length'] = skims.distance_from_geometry(links['geometry']) return first, links
def renumber(zones, volume, n_clusters=10, volume_columns=['volume'], cluster_column=None): clusters, cluster_series = spatial.zone_clusters(zones, n_clusters, cluster_column) grouped = renumber_volume(volume, cluster_series, volume_columns=volume_columns) return clusters, grouped, cluster_series
def skim_matrix(zones, token, n_clusters, coordinates_unit='degree', skim_matrix_kwargs={}): clusters, cluster_series = spatial.zone_clusters(zones, n_clusters, 1e-9) cluster_euclidean = all_skim_matrix( clusters, token, coordinates_unit=coordinates_unit**skim_matrix_kwargs) df = euclidean(zones, coordinates_unit=coordinates_unit) df = pd.merge(df, pd.DataFrame(cluster_series), left_on='origin', right_index=True) df = pd.merge(df, pd.DataFrame(cluster_series), left_on='destination', right_index=True, suffixes=['_origin', '_destination']) df = pd.merge(df, cluster_euclidean.rename( columns={ 'origin': 'cluster_origin', 'destination': 'cluster_destination', 'distance': 'cluster_distance', 'duration': 'cluster_duration' }), on=['cluster_origin', 'cluster_destination'], suffixes=['', '_cluster']) df['distance_rate'] = (df['euclidean_distance'] / df['euclidean_distance_cluster']).fillna(0) df['distance'] = df['cluster_distance'] * df['distance_rate'] df['duration'] = df['cluster_duration'] * df['distance_rate'] euclidean_to_path_length = 1 / (df['euclidean_distance_cluster'] / df['cluster_distance']).mean() euclidean_speed = (df['euclidean_distance_cluster'] / df['duration']).mean() df.loc[df['euclidean_distance_cluster'] == 0, 'duration'] = df['euclidean_distance'] / euclidean_speed df.loc[df['euclidean_distance_cluster'] == 0, 'distance'] = df['euclidean_distance'] * euclidean_to_path_length return df.fillna(0)
def renumber_quetzal(zones, volume, od_stack, n_clusters=10, cluster_column=None, volume_columns=['volume'], volume_od_columns=['volume_pt'], distance_columns=['euclidean_distance']): clusters, cluster_series = spatial.zone_clusters( zones, n_clusters=n_clusters, cluster_column=cluster_column) grouped = renumber_volume(volume, cluster_series, volume_columns=volume_columns) od_stack_grouped = renumber_od_stack(od_stack, cluster_series, volume_od_columns, distance_columns) return clusters, grouped, cluster_series, od_stack_grouped
def node_clustering(links, nodes, n_clusters, prefixe='', group_id=None, **kwargs): disaggregated_nodes = nodes.copy() if group_id is None: clusters, cluster_series = spatial.zone_clusters(nodes, n_clusters=n_clusters, **kwargs) else: clusters = nodes.groupby(group_id).first() cluster_series = nodes[group_id] cluster_dict = cluster_series.to_dict() centroids = clusters.copy() centroids['geometry'] = centroids['geometry'].apply(lambda g: g.centroid) try: links = links.copy() links['disaggregated_a'] = links['a'] links['disaggregated_b'] = links['b'] links['a'] = links['a'].apply(lambda x: prefixe + str(cluster_dict[x])) links['b'] = links['b'].apply(lambda x: prefixe + str(cluster_dict[x])) except AttributeError: links = None clusters['count'] = cluster_series.value_counts() disaggregated_nodes['cluster'] = cluster_series parenthood = pd.merge(disaggregated_nodes, centroids, left_on='cluster', right_index=True, suffixes=['_node', '_centroid']) parenthood['geometry'] = parenthood.apply(parenthood_geometry, axis=1) centroids.index = prefixe + pd.Series(centroids.index).astype(str) return links, centroids, clusters, parenthood
def cluster_snail_number(zones, n_clusters=20, centre=None, buffer=10): """ zones: GeoSeries """ df = pd.DataFrame(zones).reset_index().copy() if centre is None: union = cascaded_union(df.geometry).buffer(buffer) centre = union.centroid # Snail clusterize clusters, cluster_series = spatial.zone_clusters(df, n_clusters=n_clusters) df['cluster'] = cluster_series snail = snail_number(clusters, centre) clusters['snail'] = snail df = df.merge(snail.reset_index(), on='cluster') df.drop('cluster', 1, inplace=True) # snail numbering within cluster to_concat = [] for cluster in set(df['cluster_snail']): temp_df = df.loc[df['cluster_snail']==cluster] temp_centre = cascaded_union(temp_df.geometry).centroid temp_snail = snail_number(temp_df, temp_centre) temp_df['snail'] = temp_snail to_concat.append(temp_df) concat = pd.concat(to_concat) concat = concat.sort_values(['cluster_snail', 'snail']).reset_index(drop=True) concat = concat.reset_index().rename( columns={ 'level_0': 'id', 'cluster_snail': 'cluster', 'index': 'original_index' } ) return concat[['cluster', 'id', 'original_index']]