def make_map(self, variable, nbins, preset_bins=None): """ Saves map to working directory as .svg. """ soup = BeautifulSoup(self.svg, selfClosingTags=['defs', 'sodipodi: namedview']) paths = soup.findAll('path') if preset_bins == None: data_FJ = ps.Fisher_Jenks(self.county_data[variable].fillna(0), k=nbins) else: data_FJ = preset_bins for p in paths: if p['id'] not in ["State_Lines", "separator"]: try: value = self.data_dict[variable][int(p['id'])] except: continue for n in range(int(nbins) - 2, -1, -1): if value > data_FJ.bins[n]: color_class = n + 1 break else: color_class = 0 color = self.color_bins_hex[nbins][color_class] p['style'] = self.path_style + color mapfile = 'map_' + variable + '.svg' with open(mapfile, 'w') as file: file.write(soup.prettify())
def base_choropleth_classif(map_obj, values, classification='quantiles', \ k=5, cmap='hot_r', sample_fisher=True): ''' Set coloring based based on different classification methods ... Arguments --------- map_obj : Poly/Line collection Output from map_X_shp values : array Numpy array with values to map classification : str Classificatio method to use. Options supported: * 'quantiles' (default) * 'fisher_jenks' * 'equal_interval' k : int Number of bins to classify values in and assign a color to cmap : str Matplotlib coloring scheme sample_fisher : Boolean Defaults to True, controls whether Fisher-Jenks classification uses a sample (faster) or the entire array of values. Ignored if 'classification'!='fisher_jenks' Returns ------- map : PatchCollection Map object with the polygons from the shapefile and unique value coloring ''' if classification == 'quantiles': classification = ps.Quantiles(values, k) boundaries = classification.bins.tolist() if classification == 'equal_interval': classification = ps.Equal_Interval(values, k) boundaries = classification.bins.tolist() if classification == 'fisher_jenks': if sample_fisher: classification = ps.esda.mapclassify.Fisher_Jenks_Sampled( values, k) else: classification = ps.Fisher_Jenks(values, k) boundaries = classification.bins[:] map_obj.set_alpha(0.4) cmap = cm.get_cmap(cmap, k + 1) map_obj.set_cmap(cmap) boundaries.insert(0, values.min()) norm = clrs.BoundaryNorm(boundaries, cmap.N) map_obj.set_norm(norm) if isinstance(map_obj, mpl.collections.PolyCollection): pvalues = _expand_values(values, map_obj.shp2dbf_row) map_obj.set_array(pvalues) map_obj.set_edgecolor('k') elif isinstance(map_obj, mpl.collections.LineCollection): pvalues = _expand_values(values, map_obj.shp2dbf_row) map_obj.set_array(pvalues) elif isinstance(map_obj, mpl.collections.PathCollection): if not hasattr(map_obj, 'shp2dbf_row'): map_obj.shp2dbf_row = np.arange(values.shape[0]) map_obj.set_array(values) return map_obj
def fisher_jenks_map(coords, y, k, title='Fisher-Jenks', sampled=False): """ coords: Map_Projection instance y: array variable to map k: int number of classes title: string map title sampled: binary if True classification bins obtained on a sample of y and then applied. Useful for large n arrays """ if sampled: classification = ps.esda.mapclassify.Fisher_Jenks_Sampled(y, k) else: classification = ps.Fisher_Jenks(y, k) fig = plt.figure() ax = fig.add_subplot(111) patches = [] colors = [] i = 0 shape_colors = y #classification.bins[classification.yb] for shp in coords.projected: for ring in shp: x, y = ring x = x / coords.bounding_box[2] y = y / coords.bounding_box[3] n = len(x) x.shape = (n, 1) y.shape = (n, 1) xy = np.hstack((x, y)) polygon = Polygon(xy, True) patches.append(polygon) colors.append(shape_colors[i]) i += 1 cmap = cm.get_cmap('hot_r', k + 1) boundaries = classification.bins[:] #print boundaries #print min(shape_colors) > 0.0 if min(shape_colors) > 0.0: boundaries.insert(0, 0) else: boundaries.insert(0, boundaries[0] - boundaries[1]) #print boundaries norm = clrs.BoundaryNorm(boundaries, cmap.N) p = PatchCollection(patches, cmap=cmap, alpha=0.4, norm=norm) colors = np.array(colors) p.set_array(colors) ax.add_collection(p) ax.set_frame_on(False) ax.axes.get_yaxis().set_visible(False) ax.axes.get_xaxis().set_visible(False) ax.set_title(title) plt.colorbar(p, cmap=cmap, norm=norm, boundaries=boundaries, ticks=boundaries) plt.show() return classification
def base_choropleth_classif(shp_link, values, classification='quantiles', \ k=5, cmap='hot_r', projection='merc', sample_fisher=True): ''' Create a map object with coloring based on different classification methods, from a shapefile in lon/lat CRS ... Arguments --------- shp_link : str Path to shapefile values : array Numpy array with values to map classification : str Classificatio method to use. Options supported: * 'quantiles' (default) * 'fisher_jenks' * 'equal_interval' k : int Number of bins to classify values in and assign a color to cmap : str Matplotlib coloring scheme projection : str Basemap projection. See [1]_ for a list. Defaults to 'merc' sample_fisher : Boolean Defaults to True, controls whether Fisher-Jenks classification uses a sample (faster) or the entire array of values. Ignored if 'classification'!='fisher_jenks' Returns ------- map : PatchCollection Map object with the polygons from the shapefile and unique value coloring Links ----- .. [1] <http://matplotlib.org/basemap/api/basemap_api.html#module-mpl_toolkits.basemap> ''' if classification == 'quantiles': classification = ps.Quantiles(values, k) boundaries = classification.bins.tolist() if classification == 'equal_interval': classification = ps.Equal_Interval(values, k) boundaries = classification.bins.tolist() if classification == 'fisher_jenks': if sample_fisher: classification = ps.esda.mapclassify.Fisher_Jenks_Sampled( values, k) else: classification = ps.Fisher_Jenks(values, k) boundaries = classification.bins[:] map_obj = map_poly_shp_lonlat(shp_link, projection=projection) map_obj.set_alpha(0.4) cmap = cm.get_cmap(cmap, k + 1) map_obj.set_cmap(cmap) boundaries.insert(0, 0) norm = clrs.BoundaryNorm(boundaries, cmap.N) map_obj.set_norm(norm) map_obj.set_array(values) return map_obj
def main(): # Parameters # ---------- show_legend = False #True show_title_text = False #True # Plot base-grid (with no-data hashes) show_noData = False # Choose classifier (if None, use self specified classification) #'NaturalBreaks' #'JenksCaspall' #'MaximumBreaks' #'FisherJenks' #"HeadTail" mapclassifier = None # Filepaths data_fp = "data/MFD_Population_24H_Tallinn_500m_grid.shp" roads_fp = "data/Tallinn_main_roads_for_visualization.shp" boundaries_fp = "data/TLN_bordersDASY.shp" water_fp = "data/TLN_water_clip_OSM.shp" outdir = "results/population_maps" # Read files data = gpd.read_file(data_fp) roads = gpd.read_file(roads_fp) boundaries = gpd.read_file(boundaries_fp) water = gpd.read_file(water_fp) # Re-project all into the same crs as grid roads['geometry'] = roads['geometry'].to_crs(crs=data.crs) roads.crs = data.crs boundaries['geometry'] = boundaries['geometry'].to_crs(crs=data.crs) boundaries.crs = data.crs water['geometry'] = water['geometry'].to_crs(crs=data.crs) water.crs = data.crs # Take only largest waterbodies water['area'] = water.area water = water.sort_values(by='area', ascending=False) water.reset_index(inplace=True) water = water.ix[0:2] # Time columns showing the share of population at different hours tcols = ["H%s" % num for num in range(0, 24)] # Multiply by 100 to get them into percentage (0-100 representation) data[tcols] = data[tcols] * 100 # Create Custom classifier # bins are the upper boundary of the class (including the value itself) # --------------------------------------------------------------------- # Natural Breaks classification (7 classes) that has been rounded (to have a more intuitive legend) my_bins = [0.05, 0.10, 0.20, 0.40, 0.80, 1.6, 3.97] # Classify following columns ccolumns = tcols if mapclassifier: # Stack all values stacked_values = stackColumnValues(df=data, columns=ccolumns) # Classify values based on specific classifier n = 7 my_bins = [x for x in range(n)] if mapclassifier == 'HeadTail': classif = ps.esda.mapclassify.HeadTail_Breaks(stacked_values) elif mapclassifier == 'FisherJenks': classif = ps.Fisher_Jenks(stacked_values, k=n) elif mapclassifier == 'NaturalBreaks': classif = ps.Natural_Breaks(stacked_values, k=n) elif mapclassifier == 'MaximumBreaks': classif = ps.Maximum_Breaks(stacked_values, k=n) elif mapclassifier == 'JenksCaspall': classif = ps.Jenks_Caspall(stacked_values, k=n) # Get bins my_bins = list(classif.bins) # Apply the chosen classification classifier = ps.User_Defined.make(bins=my_bins) classif = data[ccolumns].apply(classifier) # Rename classified column names (add letter c in front) classif.columns = list(map(lambda x: "c" + x, classif.columns)) # Join back to grid data = data.join(classif) # Classified columns showing the distribution of the population ccols = ["cH%s" % num for num in range(0, 24)] # Rename columns and take the 'H' letter from the beginning away data, new_cols = renameTo24HourSystem(data, tcols, minutes=True) # Select color palette palette = sns.diverging_palette(220, 20, n=len(my_bins)) # Get hex colors hex_colors = parseHexSeaborn(palette) # Change White color into more reddish hex_colors[3] = '#FFF2F2' N = len(hex_colors) # Convert to rgb legendcolors = [col.hex2color(hexcol) for hexcol in hex_colors] # Legend labels binlabels = np.array(my_bins) rbinlabels = binlabels.round(2) legend_labels = list(rbinlabels) legend_labels.insert(0, 0) for tattribute in new_cols: # Color balancer color_balancer = list(hex_colors) # Print the classes classcol = "cH%s" % int(tattribute[0:2]) classes = list(data[classcol].unique()) classes.sort() print("%s \t N-classes: %s \t Classes: " % (tattribute, len(classes)), classes) # If there is no values for all classes, remove the color of the specific # class that is missing (so that coloring scheme is identical for all times) if len(classes) < N: class_values = [val for val in range(N)] # Put values in reverse order class_values.reverse() # Find out which classes are missing and remove the color for i in class_values: if not i in classes: del color_balancer[i] # Convert to rgb rgbcolors = [col.hex2color(hexcol) for hexcol in color_balancer] # Dynamo colormap Ncolor = len(color_balancer) dynamocmap = LinearSegmentedColormap.from_list("my_colormap", rgbcolors, N=Ncolor, gamma=1.0) # Initialize Figure if not show_legend: fig, ax = plt.subplots() else: fig = plt.figure(figsize=(8, 7)) # Add axes (1 for image, 2 for custom legend) ax = fig.add_axes( [0.05, 0.15, 0.8, 0.65]) #([DistFromLeft, DistFromBottom, Width, Height]) ax1 = fig.add_axes([0.2, 0.08, 0.6, 0.035]) # Column name for shop information name = "h%s" % int(tattribute[0:2]) if show_noData: # Plot base grid if show_legend: data.plot(ax=ax, color='white', linewidth=0.1, hatch='x', edgecolor='grey', legend=True) else: data.plot(ax=ax, color='white', linewidth=0.1, hatch='x', edgecolor='grey') else: if show_legend: data.plot(ax=ax, color='white', linewidth=0, edgecolor='grey', legend=True) else: data.plot(ax=ax, color='white', linewidth=0, edgecolor='grey') # Clip grid with boundaries data = gpd.overlay(data, boundaries, how='intersection') # Plot the map using custom color map (use the classified column) ax = plotCustomColors(ax=ax, df=data, column=classcol, custom_cmap=dynamocmap, linewidth=0.05, edgecolor='grey') # Plot water bodies water.plot(ax=ax, color='white', alpha=1.0, linewidth=0, edgecolor='grey') #linewidth=0.05 # Plot roads roads.plot(ax=ax, color='grey', lw=0.8, alpha=0.8) # Specify y and x-lim ax.set_xlim(left=531000, right=553000) ax.set_ylim(top=6596000, bottom=6579400) # Remove tick markers ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) # Info texts info_text = "%s" % (tattribute) if not show_legend: ppos_x = 540000 ppos_y = 6595500 else: ppos_x = 540000 ppos_y = 6596500 # Add text about time ax.text(ppos_x, ppos_y, info_text, size=30, color='black', **{'fontname': 'Arial'}) # Add title text if show_title_text: ax.text( ppos_x - 5000, ppos_y + 2000, "Population distribution in Tallinn\n based on mobile phone data", size=20, color='gray', **{'fontname': 'Arial'}) # Add legend if show_legend: ax1.imshow(np.arange(N).reshape(1, N), cmap=mpl.colors.ListedColormap(list(legendcolors)), interpolation="nearest", aspect="auto") # Set locations of the bins ax1.set_xticks(np.arange(N + 1) - .5) ax1.set_yticks([]) # Specify the labels ax1.set_xticklabels(legend_labels) # Set colorbar title cbar_title = 'Share of population (%)' pos_x = 0.25 pos_y = 0.123 plt.figtext(pos_x, pos_y, cbar_title, size=12) # Save figure resolution = 500 outpath = os.path.join( outdir, "%s_PopulationDistribution_map_%sdpi.png" % (tattribute[0:2], resolution)) # Don't show axis borders ax.axis('off') if not show_legend: plt.tight_layout() plt.savefig(outpath, dpi=resolution) #plt.show() plt.close()
def choropleth_map(jsonpath, key, attribute, df=None, classification="Quantiles", classes=5, bins=None, std=None, centroid=None, zoom_start=5, tiles='OpenStreetMap', fill_color="YlGn", fill_opacity=.5, line_opacity=0.2, legend_name='', save=True): ''' One-shot mapping function for folium-based choropleth mapping. jsonpath - the filepath to a JSON file key - the field upon which the JSON and the dataframe will be linked attribute - the attribute to be mapped The rest of the arguments are keyword: classification - type of classification scheme to be used classes - number of classes used bins - breakpoints, if manual classes are desired ''' #Polymorphism by hand... if isinstance(jsonpath, str): if os.path.isfile(jsonpath): sjson = gj.load(open(jsonpath)) else: raise IOError('File not found') if isinstance(jsonpath, dict): raise NotImplementedError( 'Direct mapping from dictionary not yet supported') #with open('tmp.json', 'w') as out: # gj.dump(jsonpath, out) # sjson = gj.load(open('tmp.json')) if isinstance(jsonpath, tuple): if 'ShpWrapper' in str(type(jsonpath[0])) and 'DBF' in str( type(jsonpath[1])): flip('tmp.json', jsonpath[0], jsonpath[1]) sjson = gj.load(open('tmp.json')) jsonpath = 'tmp.json' elif 'ShpWrapper' in str(type(jsonpath[1])) and 'DBF' in str( type(jsonpath[0])): flip('tmp.json', jsonpath[1], jsonpath[0]) sjson = gj.load(open('tmp.json')) jsonpath = 'tmp.json' else: raise IOError( 'Inputs must be GeoJSON filepath, GeoJSON dictionary in memory, or shp-dbf tuple' ) #key construction if df is None: df = json2df(sjson) dfkey = [key, attribute] #centroid search if centroid == None: if 'bbox' in sjson.keys(): bbox = sjson.bbox bbox = bboxsearch(sjson) xs = sum([bbox[0], bbox[2]]) / 2. ys = sum([bbox[1], bbox[3]]) / 2. centroid = [ys, xs] jsonkey = 'feature.properties.' + key choromap = fm.Map( location=centroid, zoom_start=zoom_start, tiles=tiles) # all the elements you need to make a choropleth #standardization if std != None: if isinstance(std, int) or isinstance(std, float): y = np.array(df[attribute] / std) elif type(std) == str: y = np.array(df[attribute] / df[std]) elif callable(std): raise NotImplementedError( 'Functional Standardizations are not implemented yet') else: raise ValueError( 'Standardization must be integer, float, function, or Series') else: y = np.array(df[attribute].tolist()) #For people who don't read documentation... if isinstance(classes, list): bins = classes classes = len(bins) elif isinstance(classes, float): try: classes = int(classes) except: raise ValueError('Classes must be coercable to integers') #classification passing if classification != None: if classification == "Maximum Breaks": #there is probably a better way to do this, but it's a start. mapclass = ps.Maximum_Breaks(y, k=classes).bins.tolist() elif classification == 'Quantiles': mapclass = ps.Quantiles(y, k=classes).bins.tolist() elif classification == 'Fisher-Jenks': mapclass = ps.Fisher_Jenks(y, k=classes).bins elif classification == 'Equal Interval': mapclass = ps.Equal_Interval(y, k=classes).bins.tolist() elif classification == 'Natural Breaks': mapclass = ps.Natural_Breaks(y, k=classes).bins elif classification == 'Jenks Caspall Forced': raise NotImplementedError( 'Jenks Caspall Forced is not implemented yet.') # mapclass = ps.Jenks_Caspall_Forced(y, k=classes).bins.tolist() elif classification == 'Jenks Caspall Sampled': raise NotImplementedError( 'Jenks Caspall Sampled is not implemented yet') # mapclass = ps.Jenks_Caspall_Sampled(y, k=classes).bins.tolist() elif classification == 'Jenks Caspall': mapclass = ps.Jenks_Caspall(y, k=classes).bins.tolist() elif classification == 'User Defined': mapclass = bins elif classification == 'Standard Deviation': if bins == None: l = classes / 2 bins = range(-l, l + 1) mapclass = list(ps.Std_Mean(y, bins).bins) else: mapclass = list(ps.Std_Mean(y, bins).bins) elif classification == 'Percentiles': if bins == None: bins = [1, 10, 50, 90, 99, 100] mapclass = list(ps.Percentiles(y, bins).bins) else: mapclass = list(ps.Percentiles(y, bins).bins) elif classification == 'Max P': #raise NotImplementedError('Max-P classification is not implemented yet') mapclass = ps.Max_P_Classifier(y, k=classes).bins.tolist() else: raise NotImplementedError( 'Your classification is not supported or was not found. Supported classifications are:\n "Maximum Breaks"\n "Quantiles"\n "Fisher-Jenks"\n "Equal Interval"\n "Natural Breaks"\n "Jenks Caspall"\n "User Defined"\n "Percentiles"\n "Max P"' ) else: print('Classification forced to None. Defaulting to Quartiles') mapclass = ps.Quantiles(y, k=classes).bins.tolist() #folium call, try abstracting to a "mapper" function, passing list of args choromap.geo_json(geo_path=jsonpath, key_on=jsonkey, data=df, columns=dfkey, fill_color=fill_color, fill_opacity=fill_opacity, line_opacity=line_opacity, threshold_scale=mapclass[:-1], legend_name=legend_name) if save: fname = jsonpath.rstrip('.json') + '_' + attribute + '.html' choromap.save(fname) return choromap
def column_kde(series_to_plot, num_bins=7, split_type="quantiles", bw=0.15, plot_title="", xlabel="x", ylabel="y"): """ v1.0 function that plots: Kernel Density Estimation (KDE) rugplot shows a classification of the distribution based on 'num_bins' and 'split_type' Plots data from the global variable (GeoDataFrame) 'teranet_da_gdf' ---------------- Input arguments: series_to_plot -- pandas Series -- series to be plotted num_bins -- int -- number of bins to be used for the split of the distribution (default=7) split_type -- str -- type of the split of the distribution (default='quantiles') must be either 'quantiles', 'equal_interval', or 'fisher_jenks' bw -- float -- bandwidth to be used for KDE (default=0.15) -------- Returns: None, plots a KDE, rugplot, and bins of values in 'column_to_plot' """ # generate a list of bins from the split of the distribution using type of split provided in 'split_type' if split_type == 'quantiles': classi = ps.Quantiles(series_to_plot, k=num_bins) elif split_type == 'equal_interval': classi = ps.Equal_Interval(series_to_plot, k=num_bins) elif split_type == 'fisher_jenks': classi = ps.Fisher_Jenks(series_to_plot, k=num_bins) elif type(split_type) == str: raise ValueError("Input parameter 'split_type' must be either 'quantiles', " + "'equal_interval', or 'fisher_jenks'.") else: raise TypeError("Input parameter 'split_type' must be a string and either 'quantiles', " + "'equal_interval, or 'fisher_jenks'.") # print the bins print(classi) # create figure and axis f, ax = plt.subplots(1, figsize=(9, 6)) # plot KDE of the distribution sns.kdeplot(series_to_plot, shade=True, label='Distribution of counts of Teranet records per DA', bw=bw) # plot a rugplot sns.rugplot(series_to_plot, alpha=0.5) # plot the split of the distribution for classi_bin in classi.bins: ax.axvline(classi_bin, color='magenta', linewidth=1, linestyle='--') # plot the mean and the median ax.axvline(series_to_plot.mean(), color='deeppink', linestyle='--', linewidth=1) ax.text(series_to_plot.mean(), 0, "Mean: {0:.2f}".format(series_to_plot.mean()), rotation=90) ax.axvline(series_to_plot.median(), color='coral', linestyle=':') ax.text(series_to_plot.median(), 0, "Median: {0:.2f}".format(series_to_plot.median()), rotation=90) # configure axis parameters ax.set_title(plot_title, fontdict={'fontsize': '18', 'fontweight': '3'}) ax.set_xlabel(xlabel, fontdict={'fontsize': '16', 'fontweight': '3'}) ax.set_ylabel(ylabel, fontdict={'fontsize': '16', 'fontweight': '3'}) ax.legend(loc='best') plt.show()
def createClassifyMap(self, map_type): """ return an instance of pysal.Map_Classifier """ id_group = [] color_group = [] label_group = [] if map_type == stars.MAP_CLASSIFY_EQUAL_INTERVAL: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Equal_Interval(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_PERCENTILES: pct = [1, 10, 50, 90, 99, 100] # doesn't support different defined pct #if self.params.has_key("pct"): # pct = self.params["pct"] cm = pysal.Percentiles(self.data, pct=pct) counts = list(cm.counts) n_counts = len(counts) if n_counts < 6: for i in range(6 - n_counts): counts.append(0) label_group = [ '<1%%(%d)' % counts[0], '1%% - 10%%(%d)' % counts[1], '10%% - 50%%(%d)' % counts[2], '50%% - 90%%(%d)' % counts[3], '90%% - 99%%(%d)' % counts[4], '>99%%(%d)' % counts[5] ] #color_group = self._get_default_color_schema(n_bins) color_group = self.pick_color_set(3, 6, True) elif map_type == stars.MAP_CLASSIFY_BOX_PLOT: hinge = 1.5 # default if self.params.has_key("hinge"): hinge = self.params["hinge"] cm = pysal.Box_Plot(self.data, hinge=hinge) n_bins = len(cm.bins) if n_bins == 5: n_upper_outlier = 0 else: n_upper_outlier = cm.counts[5] label_group = [ 'Lower outlier(%d)' % cm.counts[0], '<25%% (%d)' % cm.counts[1], '25%% - 50%% (%d)' % cm.counts[2], '50%% - 75%% (%d)' % cm.counts[3], '>75%% (%d)' % cm.counts[4], 'Upper outlier (%d)' % n_upper_outlier ] #color_group = self._get_default_color_schema(n_bins) color_group = self.pick_color_set(2, 6, False) elif map_type == stars.MAP_CLASSIFY_QUANTILES: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Quantiles(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_STD_MEAN: cm = pysal.Std_Mean(self.data, multiples=[-2, -1, 0, 1, 2]) n_bins = len(cm.bins) elif map_type == stars.MAP_CLASSIFY_MAXIMUM_BREAK: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Maximum_Breaks(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_NATURAL_BREAK: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Natural_Breaks(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_FISHER_JENKS: cm = pysal.Fisher_Jenks(self.data) # see blow: common label group and color group elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Jenks_Caspall(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k([i[0] for i in cm.bins], cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_SAMPLED: k = 5 # default pct = 0.1 if self.params.has_key("k"): k = self.params["k"] if self.params.has_key("pct"): pct = self.params["pct"] cm = pysal.Jenks_Caspall_Sampled(self.data, k=k, pct=pct) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_FORCED: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Jenks_Caspall_Forced(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_USER_DEFINED: assert self.params.has_key("bins") bins = self.params["bins"] cm = pysal.User_Defined(self.data, bins=bins) k = len(bins) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_MAX_P: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Max_P_Classifier(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_UNIQUE_VALUES: id_group_dict = {} id_other = [] n = 0 for i, item in enumerate(self.data): if n < 10: if not id_group_dict.has_key(item): id_group_dict[item] = [] n += 1 if id_group_dict.has_key(item): id_group_dict[item].append(i) else: id_other.append(i) id_group = id_group_dict.values() unique_values = id_group_dict.keys() max_num_values = n if n <= 10 else 10 label_group = [ str(unique_values[i]) for i in range(max_num_values) ] color_group = [ stars.MAP_COLOR_12_UNIQUE_FILL[i] for i in range(max_num_values) ] #color_group = self.pick_color_set(1, max_num_values,False) if n >= 10: id_group.append(id_other) label_group.append('Others') color_group.append(stars.MAP_COLOR_12_UNIQUE_OTHER) field_name = self.params['field_name'] id_group.insert(0, []) label_group.insert(0, field_name) color_group.insert(0, None) else: raise KeyError, 'Classify map type is illegal' # for some common label group and color group if map_type in [ stars.MAP_CLASSIFY_FISHER_JENKS, stars.MAP_CLASSIFY_STD_MEAN ]: """ upper_bound = 0 if len(cm.counts) == 5 else cm.counts[5] label_group = ['<%s (%d)'% (cm.bins[0],cm.counts[0]), '%s - %s (%d)'% (cm.bins[0], cm.bins[1],cm.counts[1]), '%s - %s (%d)'% (cm.bins[1], cm.bins[2], cm.counts[2]), '%s - %s (%d)'% (cm.bins[2], cm.bins[3], cm.counts[3]), '%s - %s (%d)'% (cm.bins[3], cm.bins[4], cm.counts[4]), '>%s (%d)'% (cm.bins[4], upper_bound)] #color_group = self._get_default_color_schema(len(cm.bins)) color_group = self.pick_color_set(3,7,False)[1:] """ label_group = self._get_range_labels(cm.bins, cm.counts) color_group = self.pick_color_set(3, len(cm.bins), True) #[1:] if map_type != stars.MAP_CLASSIFY_UNIQUE_VALUES: # convert binIds = cm.yb bins = cm.bins n_group = len(bins) id_group = [[] for i in range(n_group)] for i, gid in enumerate(binIds): id_group[gid].append(i) return id_group, label_group, color_group
for y in [2015]: # savings_map_data = pd.DataFrame( # savings_map_data[savings_map_data.REPORTING_YEAR == y].groupby( # ['COUNTY_FIPS', 'FACILITY_ID'], as_index=False # ).savings_MMTCO2E_total_mean.mean() # ) savings_map_data_input = pd.DataFrame( savings_map_data[savings_map_data.REPORTING_YEAR == y].groupby( 'COUNTY_FIPS', as_index=False).savings_MMTCO2E_total.sum()) # FJ_2011 = ps.Fisher_Jenks( # savings_map_data_input.savings_MMTCO2E_total, k = 5 # ) savings_map = MakeCountyMap.CountyEnergy_Maps(savings_map_data_input) if y == 2015: savings_map.make_map('savings_MMTCO2E_total', 5, FJ_2011) else: savings_map.make_map('savings_MMTCO2E_total', 5) print( np.round(ps.Fisher_Jenks(savings_map_data_input.savings_MMTCO2E_total, k=5).bins, decimals=1))
# convert to geopandas df gdf = convert_to_gpd_df(df) # load census blocks blocks = gpd.read_file('../data/census2000blockgroups_poly/census2000blockgroups_poly.shp') blocks = blocks.loc[blocks['COUNTY'] == '025'] blocks = blocks.to_crs({'init': 'epsg:4326'}) df_blocks = join_311_to_blocks(gdf, blocks) # create neighbors from file outfile = "../data/tmp/tmp.shp" weight_matrix = get_queen_neighbors_matrix(gdf, outfile) # create natural breaks for open len open_len_FJ10 = ps.Fisher_Jenks(df_blocks.open_len, k=10) print("Fisher Jenks breaks - open len: {}".format(open_len_FJ10)) print("Fisher Jenks fit- open len: {}".format(open_len_FJ10.adcm)) # join breaks back to blocks df df_blocks = df_blocks.assign(open_len_cl=open_len_FJ10.yb) # calculate spatial lag open_len_lag = ps.lag_spatial(weight_matrix, df_blocks.open_len.values) open_len_lag_FJ10 = ps.Fisher_Jenks(open_len_lag, k=10) print("Fisher Jenks breaks - open len lag: {}".format(open_len_lag_FJ10)) print("Fisher Jenks fit - open len lag: {}".format(open_len_lag_FJ10.adcm)) # join lag breaks back to blocks df_blocks = df_blocks.assign(open_len_lag_cl=open_len_lag_FJ10.yb) df_blocks.to_csv("../data/web/df_block.csv")