def equal_interval_map(coords, y, k, title='Equal Interval'): """ coords: Map_Projection instance y: array variable to map k: int number of classes title: string map title """ classification = ps.Equal_Interval(y, k) fig = plt.figure() ax = fig.add_subplot(111) patches = [] colors = [] i = 0 shape_colors = classification.bins[classification.yb] shape_colors = y #classification.bins[classification.yb] for shp in coords.projected: for ring in shp: x, y = ring x = x / coords.bounding_box[2] y = y / coords.bounding_box[3] n = len(x) x.shape = (n, 1) y.shape = (n, 1) xy = np.hstack((x, y)) polygon = Polygon(xy, True) patches.append(polygon) colors.append(shape_colors[i]) i += 1 cmap = cm.get_cmap('hot_r', k + 1) boundaries = classification.bins.tolist() boundaries.insert(0, 0) norm = clrs.BoundaryNorm(boundaries, cmap.N) p = PatchCollection(patches, cmap=cmap, alpha=0.4, norm=norm) colors = np.array(colors) p.set_array(colors) ax.add_collection(p) ax.set_frame_on(False) ax.axes.get_yaxis().set_visible(False) ax.axes.get_xaxis().set_visible(False) ax.set_title(title) plt.colorbar(p, cmap=cmap, norm=norm, boundaries=boundaries, ticks=boundaries) plt.show() return classification
def base_choropleth_classif(map_obj, values, classification='quantiles', \ k=5, cmap='hot_r', sample_fisher=True): ''' Set coloring based based on different classification methods ... Arguments --------- map_obj : Poly/Line collection Output from map_X_shp values : array Numpy array with values to map classification : str Classificatio method to use. Options supported: * 'quantiles' (default) * 'fisher_jenks' * 'equal_interval' k : int Number of bins to classify values in and assign a color to cmap : str Matplotlib coloring scheme sample_fisher : Boolean Defaults to True, controls whether Fisher-Jenks classification uses a sample (faster) or the entire array of values. Ignored if 'classification'!='fisher_jenks' Returns ------- map : PatchCollection Map object with the polygons from the shapefile and unique value coloring ''' if classification == 'quantiles': classification = ps.Quantiles(values, k) boundaries = classification.bins.tolist() if classification == 'equal_interval': classification = ps.Equal_Interval(values, k) boundaries = classification.bins.tolist() if classification == 'fisher_jenks': if sample_fisher: classification = ps.esda.mapclassify.Fisher_Jenks_Sampled( values, k) else: classification = ps.Fisher_Jenks(values, k) boundaries = classification.bins[:] map_obj.set_alpha(0.4) cmap = cm.get_cmap(cmap, k + 1) map_obj.set_cmap(cmap) boundaries.insert(0, values.min()) norm = clrs.BoundaryNorm(boundaries, cmap.N) map_obj.set_norm(norm) if isinstance(map_obj, mpl.collections.PolyCollection): pvalues = _expand_values(values, map_obj.shp2dbf_row) map_obj.set_array(pvalues) map_obj.set_edgecolor('k') elif isinstance(map_obj, mpl.collections.LineCollection): pvalues = _expand_values(values, map_obj.shp2dbf_row) map_obj.set_array(pvalues) elif isinstance(map_obj, mpl.collections.PathCollection): if not hasattr(map_obj, 'shp2dbf_row'): map_obj.shp2dbf_row = np.arange(values.shape[0]) map_obj.set_array(values) return map_obj
def base_choropleth_classif(shp_link, values, classification='quantiles', \ k=5, cmap='hot_r', projection='merc', sample_fisher=True): ''' Create a map object with coloring based on different classification methods, from a shapefile in lon/lat CRS ... Arguments --------- shp_link : str Path to shapefile values : array Numpy array with values to map classification : str Classificatio method to use. Options supported: * 'quantiles' (default) * 'fisher_jenks' * 'equal_interval' k : int Number of bins to classify values in and assign a color to cmap : str Matplotlib coloring scheme projection : str Basemap projection. See [1]_ for a list. Defaults to 'merc' sample_fisher : Boolean Defaults to True, controls whether Fisher-Jenks classification uses a sample (faster) or the entire array of values. Ignored if 'classification'!='fisher_jenks' Returns ------- map : PatchCollection Map object with the polygons from the shapefile and unique value coloring Links ----- .. [1] <http://matplotlib.org/basemap/api/basemap_api.html#module-mpl_toolkits.basemap> ''' if classification == 'quantiles': classification = ps.Quantiles(values, k) boundaries = classification.bins.tolist() if classification == 'equal_interval': classification = ps.Equal_Interval(values, k) boundaries = classification.bins.tolist() if classification == 'fisher_jenks': if sample_fisher: classification = ps.esda.mapclassify.Fisher_Jenks_Sampled( values, k) else: classification = ps.Fisher_Jenks(values, k) boundaries = classification.bins[:] map_obj = map_poly_shp_lonlat(shp_link, projection=projection) map_obj.set_alpha(0.4) cmap = cm.get_cmap(cmap, k + 1) map_obj.set_cmap(cmap) boundaries.insert(0, 0) norm = clrs.BoundaryNorm(boundaries, cmap.N) map_obj.set_norm(norm) map_obj.set_array(values) return map_obj
def choropleth_map(jsonpath, key, attribute, df=None, classification="Quantiles", classes=5, bins=None, std=None, centroid=None, zoom_start=5, tiles='OpenStreetMap', fill_color="YlGn", fill_opacity=.5, line_opacity=0.2, legend_name='', save=True): ''' One-shot mapping function for folium-based choropleth mapping. jsonpath - the filepath to a JSON file key - the field upon which the JSON and the dataframe will be linked attribute - the attribute to be mapped The rest of the arguments are keyword: classification - type of classification scheme to be used classes - number of classes used bins - breakpoints, if manual classes are desired ''' #Polymorphism by hand... if isinstance(jsonpath, str): if os.path.isfile(jsonpath): sjson = gj.load(open(jsonpath)) else: raise IOError('File not found') if isinstance(jsonpath, dict): raise NotImplementedError( 'Direct mapping from dictionary not yet supported') #with open('tmp.json', 'w') as out: # gj.dump(jsonpath, out) # sjson = gj.load(open('tmp.json')) if isinstance(jsonpath, tuple): if 'ShpWrapper' in str(type(jsonpath[0])) and 'DBF' in str( type(jsonpath[1])): flip('tmp.json', jsonpath[0], jsonpath[1]) sjson = gj.load(open('tmp.json')) jsonpath = 'tmp.json' elif 'ShpWrapper' in str(type(jsonpath[1])) and 'DBF' in str( type(jsonpath[0])): flip('tmp.json', jsonpath[1], jsonpath[0]) sjson = gj.load(open('tmp.json')) jsonpath = 'tmp.json' else: raise IOError( 'Inputs must be GeoJSON filepath, GeoJSON dictionary in memory, or shp-dbf tuple' ) #key construction if df is None: df = json2df(sjson) dfkey = [key, attribute] #centroid search if centroid == None: if 'bbox' in sjson.keys(): bbox = sjson.bbox bbox = bboxsearch(sjson) xs = sum([bbox[0], bbox[2]]) / 2. ys = sum([bbox[1], bbox[3]]) / 2. centroid = [ys, xs] jsonkey = 'feature.properties.' + key choromap = fm.Map( location=centroid, zoom_start=zoom_start, tiles=tiles) # all the elements you need to make a choropleth #standardization if std != None: if isinstance(std, int) or isinstance(std, float): y = np.array(df[attribute] / std) elif type(std) == str: y = np.array(df[attribute] / df[std]) elif callable(std): raise NotImplementedError( 'Functional Standardizations are not implemented yet') else: raise ValueError( 'Standardization must be integer, float, function, or Series') else: y = np.array(df[attribute].tolist()) #For people who don't read documentation... if isinstance(classes, list): bins = classes classes = len(bins) elif isinstance(classes, float): try: classes = int(classes) except: raise ValueError('Classes must be coercable to integers') #classification passing if classification != None: if classification == "Maximum Breaks": #there is probably a better way to do this, but it's a start. mapclass = ps.Maximum_Breaks(y, k=classes).bins.tolist() elif classification == 'Quantiles': mapclass = ps.Quantiles(y, k=classes).bins.tolist() elif classification == 'Fisher-Jenks': mapclass = ps.Fisher_Jenks(y, k=classes).bins elif classification == 'Equal Interval': mapclass = ps.Equal_Interval(y, k=classes).bins.tolist() elif classification == 'Natural Breaks': mapclass = ps.Natural_Breaks(y, k=classes).bins elif classification == 'Jenks Caspall Forced': raise NotImplementedError( 'Jenks Caspall Forced is not implemented yet.') # mapclass = ps.Jenks_Caspall_Forced(y, k=classes).bins.tolist() elif classification == 'Jenks Caspall Sampled': raise NotImplementedError( 'Jenks Caspall Sampled is not implemented yet') # mapclass = ps.Jenks_Caspall_Sampled(y, k=classes).bins.tolist() elif classification == 'Jenks Caspall': mapclass = ps.Jenks_Caspall(y, k=classes).bins.tolist() elif classification == 'User Defined': mapclass = bins elif classification == 'Standard Deviation': if bins == None: l = classes / 2 bins = range(-l, l + 1) mapclass = list(ps.Std_Mean(y, bins).bins) else: mapclass = list(ps.Std_Mean(y, bins).bins) elif classification == 'Percentiles': if bins == None: bins = [1, 10, 50, 90, 99, 100] mapclass = list(ps.Percentiles(y, bins).bins) else: mapclass = list(ps.Percentiles(y, bins).bins) elif classification == 'Max P': #raise NotImplementedError('Max-P classification is not implemented yet') mapclass = ps.Max_P_Classifier(y, k=classes).bins.tolist() else: raise NotImplementedError( 'Your classification is not supported or was not found. Supported classifications are:\n "Maximum Breaks"\n "Quantiles"\n "Fisher-Jenks"\n "Equal Interval"\n "Natural Breaks"\n "Jenks Caspall"\n "User Defined"\n "Percentiles"\n "Max P"' ) else: print('Classification forced to None. Defaulting to Quartiles') mapclass = ps.Quantiles(y, k=classes).bins.tolist() #folium call, try abstracting to a "mapper" function, passing list of args choromap.geo_json(geo_path=jsonpath, key_on=jsonkey, data=df, columns=dfkey, fill_color=fill_color, fill_opacity=fill_opacity, line_opacity=line_opacity, threshold_scale=mapclass[:-1], legend_name=legend_name) if save: fname = jsonpath.rstrip('.json') + '_' + attribute + '.html' choromap.save(fname) return choromap
def column_kde(series_to_plot, num_bins=7, split_type="quantiles", bw=0.15, plot_title="", xlabel="x", ylabel="y"): """ v1.0 function that plots: Kernel Density Estimation (KDE) rugplot shows a classification of the distribution based on 'num_bins' and 'split_type' Plots data from the global variable (GeoDataFrame) 'teranet_da_gdf' ---------------- Input arguments: series_to_plot -- pandas Series -- series to be plotted num_bins -- int -- number of bins to be used for the split of the distribution (default=7) split_type -- str -- type of the split of the distribution (default='quantiles') must be either 'quantiles', 'equal_interval', or 'fisher_jenks' bw -- float -- bandwidth to be used for KDE (default=0.15) -------- Returns: None, plots a KDE, rugplot, and bins of values in 'column_to_plot' """ # generate a list of bins from the split of the distribution using type of split provided in 'split_type' if split_type == 'quantiles': classi = ps.Quantiles(series_to_plot, k=num_bins) elif split_type == 'equal_interval': classi = ps.Equal_Interval(series_to_plot, k=num_bins) elif split_type == 'fisher_jenks': classi = ps.Fisher_Jenks(series_to_plot, k=num_bins) elif type(split_type) == str: raise ValueError("Input parameter 'split_type' must be either 'quantiles', " + "'equal_interval', or 'fisher_jenks'.") else: raise TypeError("Input parameter 'split_type' must be a string and either 'quantiles', " + "'equal_interval, or 'fisher_jenks'.") # print the bins print(classi) # create figure and axis f, ax = plt.subplots(1, figsize=(9, 6)) # plot KDE of the distribution sns.kdeplot(series_to_plot, shade=True, label='Distribution of counts of Teranet records per DA', bw=bw) # plot a rugplot sns.rugplot(series_to_plot, alpha=0.5) # plot the split of the distribution for classi_bin in classi.bins: ax.axvline(classi_bin, color='magenta', linewidth=1, linestyle='--') # plot the mean and the median ax.axvline(series_to_plot.mean(), color='deeppink', linestyle='--', linewidth=1) ax.text(series_to_plot.mean(), 0, "Mean: {0:.2f}".format(series_to_plot.mean()), rotation=90) ax.axvline(series_to_plot.median(), color='coral', linestyle=':') ax.text(series_to_plot.median(), 0, "Median: {0:.2f}".format(series_to_plot.median()), rotation=90) # configure axis parameters ax.set_title(plot_title, fontdict={'fontsize': '18', 'fontweight': '3'}) ax.set_xlabel(xlabel, fontdict={'fontsize': '16', 'fontweight': '3'}) ax.set_ylabel(ylabel, fontdict={'fontsize': '16', 'fontweight': '3'}) ax.legend(loc='best') plt.show()
def createClassifyMap(self, map_type): """ return an instance of pysal.Map_Classifier """ id_group = [] color_group = [] label_group = [] if map_type == stars.MAP_CLASSIFY_EQUAL_INTERVAL: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Equal_Interval(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_PERCENTILES: pct = [1, 10, 50, 90, 99, 100] # doesn't support different defined pct #if self.params.has_key("pct"): # pct = self.params["pct"] cm = pysal.Percentiles(self.data, pct=pct) counts = list(cm.counts) n_counts = len(counts) if n_counts < 6: for i in range(6 - n_counts): counts.append(0) label_group = [ '<1%%(%d)' % counts[0], '1%% - 10%%(%d)' % counts[1], '10%% - 50%%(%d)' % counts[2], '50%% - 90%%(%d)' % counts[3], '90%% - 99%%(%d)' % counts[4], '>99%%(%d)' % counts[5] ] #color_group = self._get_default_color_schema(n_bins) color_group = self.pick_color_set(3, 6, True) elif map_type == stars.MAP_CLASSIFY_BOX_PLOT: hinge = 1.5 # default if self.params.has_key("hinge"): hinge = self.params["hinge"] cm = pysal.Box_Plot(self.data, hinge=hinge) n_bins = len(cm.bins) if n_bins == 5: n_upper_outlier = 0 else: n_upper_outlier = cm.counts[5] label_group = [ 'Lower outlier(%d)' % cm.counts[0], '<25%% (%d)' % cm.counts[1], '25%% - 50%% (%d)' % cm.counts[2], '50%% - 75%% (%d)' % cm.counts[3], '>75%% (%d)' % cm.counts[4], 'Upper outlier (%d)' % n_upper_outlier ] #color_group = self._get_default_color_schema(n_bins) color_group = self.pick_color_set(2, 6, False) elif map_type == stars.MAP_CLASSIFY_QUANTILES: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Quantiles(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_STD_MEAN: cm = pysal.Std_Mean(self.data, multiples=[-2, -1, 0, 1, 2]) n_bins = len(cm.bins) elif map_type == stars.MAP_CLASSIFY_MAXIMUM_BREAK: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Maximum_Breaks(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_NATURAL_BREAK: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Natural_Breaks(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_FISHER_JENKS: cm = pysal.Fisher_Jenks(self.data) # see blow: common label group and color group elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Jenks_Caspall(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k([i[0] for i in cm.bins], cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_SAMPLED: k = 5 # default pct = 0.1 if self.params.has_key("k"): k = self.params["k"] if self.params.has_key("pct"): pct = self.params["pct"] cm = pysal.Jenks_Caspall_Sampled(self.data, k=k, pct=pct) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_FORCED: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Jenks_Caspall_Forced(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_USER_DEFINED: assert self.params.has_key("bins") bins = self.params["bins"] cm = pysal.User_Defined(self.data, bins=bins) k = len(bins) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_MAX_P: k = 5 # default if self.params.has_key("k"): k = self.params["k"] cm = pysal.Max_P_Classifier(self.data, k=k) # add label group, color group label_group = self._get_label_group_by_k(cm.bins, cm.counts) #color_group = self._get_color_schema_by_k(k) color_group = self.pick_color_set(1, len(cm.bins), False) elif map_type == stars.MAP_CLASSIFY_UNIQUE_VALUES: id_group_dict = {} id_other = [] n = 0 for i, item in enumerate(self.data): if n < 10: if not id_group_dict.has_key(item): id_group_dict[item] = [] n += 1 if id_group_dict.has_key(item): id_group_dict[item].append(i) else: id_other.append(i) id_group = id_group_dict.values() unique_values = id_group_dict.keys() max_num_values = n if n <= 10 else 10 label_group = [ str(unique_values[i]) for i in range(max_num_values) ] color_group = [ stars.MAP_COLOR_12_UNIQUE_FILL[i] for i in range(max_num_values) ] #color_group = self.pick_color_set(1, max_num_values,False) if n >= 10: id_group.append(id_other) label_group.append('Others') color_group.append(stars.MAP_COLOR_12_UNIQUE_OTHER) field_name = self.params['field_name'] id_group.insert(0, []) label_group.insert(0, field_name) color_group.insert(0, None) else: raise KeyError, 'Classify map type is illegal' # for some common label group and color group if map_type in [ stars.MAP_CLASSIFY_FISHER_JENKS, stars.MAP_CLASSIFY_STD_MEAN ]: """ upper_bound = 0 if len(cm.counts) == 5 else cm.counts[5] label_group = ['<%s (%d)'% (cm.bins[0],cm.counts[0]), '%s - %s (%d)'% (cm.bins[0], cm.bins[1],cm.counts[1]), '%s - %s (%d)'% (cm.bins[1], cm.bins[2], cm.counts[2]), '%s - %s (%d)'% (cm.bins[2], cm.bins[3], cm.counts[3]), '%s - %s (%d)'% (cm.bins[3], cm.bins[4], cm.counts[4]), '>%s (%d)'% (cm.bins[4], upper_bound)] #color_group = self._get_default_color_schema(len(cm.bins)) color_group = self.pick_color_set(3,7,False)[1:] """ label_group = self._get_range_labels(cm.bins, cm.counts) color_group = self.pick_color_set(3, len(cm.bins), True) #[1:] if map_type != stars.MAP_CLASSIFY_UNIQUE_VALUES: # convert binIds = cm.yb bins = cm.bins n_group = len(bins) id_group = [[] for i in range(n_group)] for i, gid in enumerate(binIds): id_group[gid].append(i) return id_group, label_group, color_group