예제 #1
0
파일: mapping.py 프로젝트: jomerson/pysal
def equal_interval_map(coords, y, k, title='Equal Interval'):
    """

    coords: Map_Projection instance

    y: array
       variable to map

    k: int
       number of classes

    title: string
           map title
    """
    classification = ps.Equal_Interval(y, k)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    patches = []
    colors = []
    i = 0
    shape_colors = classification.bins[classification.yb]
    shape_colors = y
    #classification.bins[classification.yb]
    for shp in coords.projected:
        for ring in shp:
            x, y = ring
            x = x / coords.bounding_box[2]
            y = y / coords.bounding_box[3]
            n = len(x)
            x.shape = (n, 1)
            y.shape = (n, 1)
            xy = np.hstack((x, y))
            polygon = Polygon(xy, True)
            patches.append(polygon)
            colors.append(shape_colors[i])
        i += 1
    cmap = cm.get_cmap('hot_r', k + 1)
    boundaries = classification.bins.tolist()
    boundaries.insert(0, 0)
    norm = clrs.BoundaryNorm(boundaries, cmap.N)
    p = PatchCollection(patches, cmap=cmap, alpha=0.4, norm=norm)
    colors = np.array(colors)
    p.set_array(colors)
    ax.add_collection(p)
    ax.set_frame_on(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.axes.get_xaxis().set_visible(False)
    ax.set_title(title)
    plt.colorbar(p,
                 cmap=cmap,
                 norm=norm,
                 boundaries=boundaries,
                 ticks=boundaries)
    plt.show()
    return classification
예제 #2
0
def base_choropleth_classif(map_obj, values, classification='quantiles', \
        k=5, cmap='hot_r', sample_fisher=True):
    '''
    Set coloring based based on different classification
    methods
    ...

    Arguments
    ---------

    map_obj         : Poly/Line collection
                      Output from map_X_shp
    values          : array
                      Numpy array with values to map
    classification  : str
                      Classificatio method to use. Options supported:
                        * 'quantiles' (default)
                        * 'fisher_jenks'
                        * 'equal_interval'

    k               : int
                      Number of bins to classify values in and assign a color
                      to
    cmap            : str
                      Matplotlib coloring scheme
    sample_fisher   : Boolean
                      Defaults to True, controls whether Fisher-Jenks
                      classification uses a sample (faster) or the entire
                      array of values. Ignored if 'classification'!='fisher_jenks'

    Returns
    -------

    map             : PatchCollection
                      Map object with the polygons from the shapefile and
                      unique value coloring

    '''
    if classification == 'quantiles':
        classification = ps.Quantiles(values, k)
        boundaries = classification.bins.tolist()

    if classification == 'equal_interval':
        classification = ps.Equal_Interval(values, k)
        boundaries = classification.bins.tolist()

    if classification == 'fisher_jenks':
        if sample_fisher:
            classification = ps.esda.mapclassify.Fisher_Jenks_Sampled(
                values, k)
        else:
            classification = ps.Fisher_Jenks(values, k)
        boundaries = classification.bins[:]

    map_obj.set_alpha(0.4)

    cmap = cm.get_cmap(cmap, k + 1)
    map_obj.set_cmap(cmap)

    boundaries.insert(0, values.min())
    norm = clrs.BoundaryNorm(boundaries, cmap.N)
    map_obj.set_norm(norm)

    if isinstance(map_obj, mpl.collections.PolyCollection):
        pvalues = _expand_values(values, map_obj.shp2dbf_row)
        map_obj.set_array(pvalues)
        map_obj.set_edgecolor('k')
    elif isinstance(map_obj, mpl.collections.LineCollection):
        pvalues = _expand_values(values, map_obj.shp2dbf_row)
        map_obj.set_array(pvalues)
    elif isinstance(map_obj, mpl.collections.PathCollection):
        if not hasattr(map_obj, 'shp2dbf_row'):
            map_obj.shp2dbf_row = np.arange(values.shape[0])
        map_obj.set_array(values)
    return map_obj
예제 #3
0
파일: mapping.py 프로젝트: jomerson/pysal
def base_choropleth_classif(shp_link, values, classification='quantiles', \
        k=5, cmap='hot_r', projection='merc', sample_fisher=True):
    '''
    Create a map object with coloring based on different classification
    methods, from a shapefile in lon/lat CRS
    ...

    Arguments
    ---------

    shp_link        : str
                      Path to shapefile
    values          : array
                      Numpy array with values to map
    classification  : str
                      Classificatio method to use. Options supported:
                        * 'quantiles' (default)
                        * 'fisher_jenks'
                        * 'equal_interval'
                            
    k               : int
                      Number of bins to classify values in and assign a color
                      to
    cmap            : str
                      Matplotlib coloring scheme
    projection      : str
                      Basemap projection. See [1]_ for a list. Defaults to
                      'merc'
    sample_fisher   : Boolean
                      Defaults to True, controls whether Fisher-Jenks
                      classification uses a sample (faster) or the entire
                      array of values. Ignored if 'classification'!='fisher_jenks'

    Returns
    -------

    map             : PatchCollection
                      Map object with the polygons from the shapefile and
                      unique value coloring
    
    Links
    -----
    .. [1] <http://matplotlib.org/basemap/api/basemap_api.html#module-mpl_toolkits.basemap>
    '''
    if classification == 'quantiles':
        classification = ps.Quantiles(values, k)
        boundaries = classification.bins.tolist()

    if classification == 'equal_interval':
        classification = ps.Equal_Interval(values, k)
        boundaries = classification.bins.tolist()

    if classification == 'fisher_jenks':
        if sample_fisher:
            classification = ps.esda.mapclassify.Fisher_Jenks_Sampled(
                values, k)
        else:
            classification = ps.Fisher_Jenks(values, k)
        boundaries = classification.bins[:]

    map_obj = map_poly_shp_lonlat(shp_link, projection=projection)
    map_obj.set_alpha(0.4)

    cmap = cm.get_cmap(cmap, k + 1)
    map_obj.set_cmap(cmap)

    boundaries.insert(0, 0)
    norm = clrs.BoundaryNorm(boundaries, cmap.N)
    map_obj.set_norm(norm)

    map_obj.set_array(values)
    return map_obj
예제 #4
0
def choropleth_map(jsonpath,
                   key,
                   attribute,
                   df=None,
                   classification="Quantiles",
                   classes=5,
                   bins=None,
                   std=None,
                   centroid=None,
                   zoom_start=5,
                   tiles='OpenStreetMap',
                   fill_color="YlGn",
                   fill_opacity=.5,
                   line_opacity=0.2,
                   legend_name='',
                   save=True):
    '''
    One-shot mapping function for folium-based choropleth mapping. 

    jsonpath - the filepath to a JSON file
    key - the field upon which the JSON and the dataframe will be linked
    attribute - the attribute to be mapped

    The rest of the arguments are keyword:
    
    classification - type of classification scheme to be used
    classes - number of classes used
    bins - breakpoints, if manual classes are desired


    '''

    #Polymorphism by hand...

    if isinstance(jsonpath, str):
        if os.path.isfile(jsonpath):
            sjson = gj.load(open(jsonpath))
        else:
            raise IOError('File not found')

    if isinstance(jsonpath, dict):
        raise NotImplementedError(
            'Direct mapping from dictionary not yet supported')
        #with open('tmp.json', 'w') as out:
        #    gj.dump(jsonpath, out)
        #    sjson = gj.load(open('tmp.json'))

    if isinstance(jsonpath, tuple):
        if 'ShpWrapper' in str(type(jsonpath[0])) and 'DBF' in str(
                type(jsonpath[1])):
            flip('tmp.json', jsonpath[0], jsonpath[1])
            sjson = gj.load(open('tmp.json'))
            jsonpath = 'tmp.json'

        elif 'ShpWrapper' in str(type(jsonpath[1])) and 'DBF' in str(
                type(jsonpath[0])):
            flip('tmp.json', jsonpath[1], jsonpath[0])
            sjson = gj.load(open('tmp.json'))
            jsonpath = 'tmp.json'

        else:
            raise IOError(
                'Inputs must be GeoJSON filepath, GeoJSON dictionary in memory, or shp-dbf tuple'
            )

    #key construction
    if df is None:
        df = json2df(sjson)
    dfkey = [key, attribute]

    #centroid search
    if centroid == None:
        if 'bbox' in sjson.keys():
            bbox = sjson.bbox
        bbox = bboxsearch(sjson)
        xs = sum([bbox[0], bbox[2]]) / 2.
        ys = sum([bbox[1], bbox[3]]) / 2.
        centroid = [ys, xs]
    jsonkey = 'feature.properties.' + key

    choromap = fm.Map(
        location=centroid, zoom_start=zoom_start,
        tiles=tiles)  # all the elements you need to make a choropleth

    #standardization
    if std != None:
        if isinstance(std, int) or isinstance(std, float):
            y = np.array(df[attribute] / std)
        elif type(std) == str:
            y = np.array(df[attribute] / df[std])
        elif callable(std):
            raise NotImplementedError(
                'Functional Standardizations are not implemented yet')
        else:
            raise ValueError(
                'Standardization must be integer, float, function, or Series')
    else:
        y = np.array(df[attribute].tolist())

    #For people who don't read documentation...
    if isinstance(classes, list):
        bins = classes
        classes = len(bins)
    elif isinstance(classes, float):
        try:
            classes = int(classes)
        except:
            raise ValueError('Classes must be coercable to integers')

    #classification passing
    if classification != None:
        if classification == "Maximum Breaks":  #there is probably a better way to do this, but it's a start.
            mapclass = ps.Maximum_Breaks(y, k=classes).bins.tolist()
        elif classification == 'Quantiles':
            mapclass = ps.Quantiles(y, k=classes).bins.tolist()
        elif classification == 'Fisher-Jenks':
            mapclass = ps.Fisher_Jenks(y, k=classes).bins
        elif classification == 'Equal Interval':
            mapclass = ps.Equal_Interval(y, k=classes).bins.tolist()
        elif classification == 'Natural Breaks':
            mapclass = ps.Natural_Breaks(y, k=classes).bins
        elif classification == 'Jenks Caspall Forced':
            raise NotImplementedError(
                'Jenks Caspall Forced is not implemented yet.')
        #   mapclass = ps.Jenks_Caspall_Forced(y, k=classes).bins.tolist()
        elif classification == 'Jenks Caspall Sampled':
            raise NotImplementedError(
                'Jenks Caspall Sampled is not implemented yet')
        #   mapclass = ps.Jenks_Caspall_Sampled(y, k=classes).bins.tolist()
        elif classification == 'Jenks Caspall':
            mapclass = ps.Jenks_Caspall(y, k=classes).bins.tolist()
        elif classification == 'User Defined':
            mapclass = bins
        elif classification == 'Standard Deviation':
            if bins == None:
                l = classes / 2
                bins = range(-l, l + 1)
                mapclass = list(ps.Std_Mean(y, bins).bins)
            else:
                mapclass = list(ps.Std_Mean(y, bins).bins)
        elif classification == 'Percentiles':
            if bins == None:
                bins = [1, 10, 50, 90, 99, 100]
                mapclass = list(ps.Percentiles(y, bins).bins)
            else:
                mapclass = list(ps.Percentiles(y, bins).bins)
        elif classification == 'Max P':
            #raise NotImplementedError('Max-P classification is not implemented yet')
            mapclass = ps.Max_P_Classifier(y, k=classes).bins.tolist()
        else:
            raise NotImplementedError(
                'Your classification is not supported or was not found. Supported classifications are:\n "Maximum Breaks"\n "Quantiles"\n "Fisher-Jenks"\n "Equal Interval"\n "Natural Breaks"\n "Jenks Caspall"\n "User Defined"\n "Percentiles"\n "Max P"'
            )
    else:
        print('Classification forced to None. Defaulting to Quartiles')
        mapclass = ps.Quantiles(y, k=classes).bins.tolist()

    #folium call, try abstracting to a "mapper" function, passing list of args
    choromap.geo_json(geo_path=jsonpath,
                      key_on=jsonkey,
                      data=df,
                      columns=dfkey,
                      fill_color=fill_color,
                      fill_opacity=fill_opacity,
                      line_opacity=line_opacity,
                      threshold_scale=mapclass[:-1],
                      legend_name=legend_name)

    if save:
        fname = jsonpath.rstrip('.json') + '_' + attribute + '.html'
        choromap.save(fname)

    return choromap
예제 #5
0
def column_kde(series_to_plot, num_bins=7, split_type="quantiles", bw=0.15,
               plot_title="", xlabel="x", ylabel="y"):
    """
    v1.0
    function that plots: Kernel Density Estimation (KDE)
                         rugplot
                         shows a classification of the distribution based on 'num_bins' and 'split_type'

    Plots data from the global variable (GeoDataFrame) 'teranet_da_gdf'

    ----------------
    Input arguments: series_to_plot -- pandas Series -- series to be plotted

                     num_bins       -- int    -- number of bins to be used for the split of
                                                 the distribution (default=7)

                     split_type     -- str    -- type of the split of the distribution (default='quantiles')
                                                 must be either 'quantiles', 'equal_interval', or 'fisher_jenks'

                     bw             -- float  -- bandwidth to be used for KDE (default=0.15)

    --------
    Returns:     None, plots a KDE, rugplot, and bins of values in 'column_to_plot'
    """
    # generate a list of bins from the split of the distribution using type of split provided in 'split_type'
    if split_type == 'quantiles':
        classi = ps.Quantiles(series_to_plot, k=num_bins)
    elif split_type == 'equal_interval':
        classi = ps.Equal_Interval(series_to_plot, k=num_bins)
    elif split_type == 'fisher_jenks':
        classi = ps.Fisher_Jenks(series_to_plot, k=num_bins)
    elif type(split_type) == str:
        raise ValueError("Input parameter 'split_type' must be either 'quantiles', " +
                         "'equal_interval', or 'fisher_jenks'.")
    else:
        raise TypeError("Input parameter 'split_type' must be a string and either 'quantiles', " +
                        "'equal_interval, or 'fisher_jenks'.")
    # print the bins
    print(classi)

    # create figure and axis
    f, ax = plt.subplots(1, figsize=(9, 6))

    # plot KDE of the distribution
    sns.kdeplot(series_to_plot,
                shade=True,
                label='Distribution of counts of Teranet records per DA',
                bw=bw)

    # plot a rugplot
    sns.rugplot(series_to_plot, alpha=0.5)

    # plot the split of the distribution
    for classi_bin in classi.bins:
        ax.axvline(classi_bin, color='magenta', linewidth=1, linestyle='--')

    # plot the mean and the median
    ax.axvline(series_to_plot.mean(),
               color='deeppink',
               linestyle='--',
               linewidth=1)

    ax.text(series_to_plot.mean(),
            0,
            "Mean: {0:.2f}".format(series_to_plot.mean()),
            rotation=90)

    ax.axvline(series_to_plot.median(),
               color='coral',
               linestyle=':')

    ax.text(series_to_plot.median(),
            0,
            "Median: {0:.2f}".format(series_to_plot.median()),
            rotation=90)

    # configure axis parameters
    ax.set_title(plot_title,
                 fontdict={'fontsize': '18', 'fontweight': '3'})
    ax.set_xlabel(xlabel,
                  fontdict={'fontsize': '16', 'fontweight': '3'})
    ax.set_ylabel(ylabel,
                  fontdict={'fontsize': '16', 'fontweight': '3'})

    ax.legend(loc='best')

    plt.show()
예제 #6
0
    def createClassifyMap(self, map_type):
        """ return an instance of pysal.Map_Classifier """
        id_group = []
        color_group = []
        label_group = []

        if map_type == stars.MAP_CLASSIFY_EQUAL_INTERVAL:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Equal_Interval(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_PERCENTILES:
            pct = [1, 10, 50, 90, 99, 100]
            # doesn't support different defined pct
            #if self.params.has_key("pct"):
            #    pct = self.params["pct"]
            cm = pysal.Percentiles(self.data, pct=pct)
            counts = list(cm.counts)
            n_counts = len(counts)
            if n_counts < 6:
                for i in range(6 - n_counts):
                    counts.append(0)
            label_group = [
                '<1%%(%d)' % counts[0],
                '1%% - 10%%(%d)' % counts[1],
                '10%% - 50%%(%d)' % counts[2],
                '50%% - 90%%(%d)' % counts[3],
                '90%% - 99%%(%d)' % counts[4],
                '>99%%(%d)' % counts[5]
            ]
            #color_group = self._get_default_color_schema(n_bins)
            color_group = self.pick_color_set(3, 6, True)

        elif map_type == stars.MAP_CLASSIFY_BOX_PLOT:
            hinge = 1.5  # default
            if self.params.has_key("hinge"):
                hinge = self.params["hinge"]

            cm = pysal.Box_Plot(self.data, hinge=hinge)
            n_bins = len(cm.bins)
            if n_bins == 5:
                n_upper_outlier = 0
            else:
                n_upper_outlier = cm.counts[5]
            label_group = [
                'Lower outlier(%d)' % cm.counts[0],
                '<25%% (%d)' % cm.counts[1],
                '25%% - 50%% (%d)' % cm.counts[2],
                '50%% - 75%% (%d)' % cm.counts[3],
                '>75%% (%d)' % cm.counts[4],
                'Upper outlier (%d)' % n_upper_outlier
            ]

            #color_group = self._get_default_color_schema(n_bins)
            color_group = self.pick_color_set(2, 6, False)

        elif map_type == stars.MAP_CLASSIFY_QUANTILES:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]

            cm = pysal.Quantiles(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_STD_MEAN:
            cm = pysal.Std_Mean(self.data, multiples=[-2, -1, 0, 1, 2])
            n_bins = len(cm.bins)

        elif map_type == stars.MAP_CLASSIFY_MAXIMUM_BREAK:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Maximum_Breaks(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_NATURAL_BREAK:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Natural_Breaks(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_FISHER_JENKS:
            cm = pysal.Fisher_Jenks(self.data)

            # see blow: common label group and color group

        elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Jenks_Caspall(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k([i[0] for i in cm.bins],
                                                     cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_SAMPLED:
            k = 5  # default
            pct = 0.1
            if self.params.has_key("k"):
                k = self.params["k"]
            if self.params.has_key("pct"):
                pct = self.params["pct"]
            cm = pysal.Jenks_Caspall_Sampled(self.data, k=k, pct=pct)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_FORCED:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Jenks_Caspall_Forced(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_USER_DEFINED:
            assert self.params.has_key("bins")
            bins = self.params["bins"]
            cm = pysal.User_Defined(self.data, bins=bins)
            k = len(bins)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_MAX_P:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Max_P_Classifier(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_UNIQUE_VALUES:
            id_group_dict = {}
            id_other = []

            n = 0
            for i, item in enumerate(self.data):
                if n < 10:
                    if not id_group_dict.has_key(item):
                        id_group_dict[item] = []
                        n += 1
                if id_group_dict.has_key(item):
                    id_group_dict[item].append(i)
                else:
                    id_other.append(i)

            id_group = id_group_dict.values()
            unique_values = id_group_dict.keys()
            max_num_values = n if n <= 10 else 10

            label_group = [
                str(unique_values[i]) for i in range(max_num_values)
            ]
            color_group = [
                stars.MAP_COLOR_12_UNIQUE_FILL[i]
                for i in range(max_num_values)
            ]
            #color_group = self.pick_color_set(1, max_num_values,False)
            if n >= 10:
                id_group.append(id_other)
                label_group.append('Others')
                color_group.append(stars.MAP_COLOR_12_UNIQUE_OTHER)

            field_name = self.params['field_name']
            id_group.insert(0, [])
            label_group.insert(0, field_name)
            color_group.insert(0, None)

        else:
            raise KeyError, 'Classify map type is illegal'

        # for some common label group and color group
        if map_type in [
                stars.MAP_CLASSIFY_FISHER_JENKS, stars.MAP_CLASSIFY_STD_MEAN
        ]:
            """
            upper_bound = 0 if len(cm.counts) == 5 else cm.counts[5]
            label_group = ['<%s (%d)'% (cm.bins[0],cm.counts[0]),
                           '%s - %s (%d)'% (cm.bins[0], cm.bins[1],cm.counts[1]),
                           '%s - %s (%d)'% (cm.bins[1], cm.bins[2], cm.counts[2]),
                           '%s - %s (%d)'% (cm.bins[2], cm.bins[3], cm.counts[3]),
                           '%s - %s (%d)'% (cm.bins[3], cm.bins[4], cm.counts[4]),
                           '>%s (%d)'% (cm.bins[4], upper_bound)]
            #color_group = self._get_default_color_schema(len(cm.bins))
            color_group = self.pick_color_set(3,7,False)[1:]
            """
            label_group = self._get_range_labels(cm.bins, cm.counts)
            color_group = self.pick_color_set(3, len(cm.bins), True)  #[1:]

        if map_type != stars.MAP_CLASSIFY_UNIQUE_VALUES:
            # convert
            binIds = cm.yb
            bins = cm.bins

            n_group = len(bins)
            id_group = [[] for i in range(n_group)]
            for i, gid in enumerate(binIds):
                id_group[gid].append(i)

        return id_group, label_group, color_group