Exemple #1
0
    def make_map(self, variable, nbins, preset_bins=None):
        """
        Saves map to working directory as .svg.
        """

        soup = BeautifulSoup(self.svg,
                             selfClosingTags=['defs', 'sodipodi: namedview'])

        paths = soup.findAll('path')

        if preset_bins == None:

            data_FJ = ps.Fisher_Jenks(self.county_data[variable].fillna(0),
                                      k=nbins)

        else:

            data_FJ = preset_bins

        for p in paths:

            if p['id'] not in ["State_Lines", "separator"]:
                try:
                    value = self.data_dict[variable][int(p['id'])]
                except:
                    continue

                for n in range(int(nbins) - 2, -1, -1):
                    if value > data_FJ.bins[n]:
                        color_class = n + 1
                        break
                    else:
                        color_class = 0

                color = self.color_bins_hex[nbins][color_class]

                p['style'] = self.path_style + color

        mapfile = 'map_' + variable + '.svg'

        with open(mapfile, 'w') as file:
            file.write(soup.prettify())
Exemple #2
0
def base_choropleth_classif(map_obj, values, classification='quantiles', \
        k=5, cmap='hot_r', sample_fisher=True):
    '''
    Set coloring based based on different classification
    methods
    ...

    Arguments
    ---------

    map_obj         : Poly/Line collection
                      Output from map_X_shp
    values          : array
                      Numpy array with values to map
    classification  : str
                      Classificatio method to use. Options supported:
                        * 'quantiles' (default)
                        * 'fisher_jenks'
                        * 'equal_interval'

    k               : int
                      Number of bins to classify values in and assign a color
                      to
    cmap            : str
                      Matplotlib coloring scheme
    sample_fisher   : Boolean
                      Defaults to True, controls whether Fisher-Jenks
                      classification uses a sample (faster) or the entire
                      array of values. Ignored if 'classification'!='fisher_jenks'

    Returns
    -------

    map             : PatchCollection
                      Map object with the polygons from the shapefile and
                      unique value coloring

    '''
    if classification == 'quantiles':
        classification = ps.Quantiles(values, k)
        boundaries = classification.bins.tolist()

    if classification == 'equal_interval':
        classification = ps.Equal_Interval(values, k)
        boundaries = classification.bins.tolist()

    if classification == 'fisher_jenks':
        if sample_fisher:
            classification = ps.esda.mapclassify.Fisher_Jenks_Sampled(
                values, k)
        else:
            classification = ps.Fisher_Jenks(values, k)
        boundaries = classification.bins[:]

    map_obj.set_alpha(0.4)

    cmap = cm.get_cmap(cmap, k + 1)
    map_obj.set_cmap(cmap)

    boundaries.insert(0, values.min())
    norm = clrs.BoundaryNorm(boundaries, cmap.N)
    map_obj.set_norm(norm)

    if isinstance(map_obj, mpl.collections.PolyCollection):
        pvalues = _expand_values(values, map_obj.shp2dbf_row)
        map_obj.set_array(pvalues)
        map_obj.set_edgecolor('k')
    elif isinstance(map_obj, mpl.collections.LineCollection):
        pvalues = _expand_values(values, map_obj.shp2dbf_row)
        map_obj.set_array(pvalues)
    elif isinstance(map_obj, mpl.collections.PathCollection):
        if not hasattr(map_obj, 'shp2dbf_row'):
            map_obj.shp2dbf_row = np.arange(values.shape[0])
        map_obj.set_array(values)
    return map_obj
Exemple #3
0
def fisher_jenks_map(coords, y, k, title='Fisher-Jenks', sampled=False):
    """

    coords: Map_Projection instance

    y: array
       variable to map

    k: int
       number of classes

    title: string
           map title

    sampled: binary
             if True classification bins obtained on a sample of y and then 
                 applied. Useful for large n arrays
    """

    if sampled:
        classification = ps.esda.mapclassify.Fisher_Jenks_Sampled(y, k)
    else:
        classification = ps.Fisher_Jenks(y, k)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    patches = []
    colors = []
    i = 0
    shape_colors = y
    #classification.bins[classification.yb]
    for shp in coords.projected:
        for ring in shp:
            x, y = ring
            x = x / coords.bounding_box[2]
            y = y / coords.bounding_box[3]
            n = len(x)
            x.shape = (n, 1)
            y.shape = (n, 1)
            xy = np.hstack((x, y))
            polygon = Polygon(xy, True)
            patches.append(polygon)
            colors.append(shape_colors[i])
        i += 1
    cmap = cm.get_cmap('hot_r', k + 1)
    boundaries = classification.bins[:]
    #print boundaries
    #print min(shape_colors) > 0.0
    if min(shape_colors) > 0.0:
        boundaries.insert(0, 0)
    else:
        boundaries.insert(0, boundaries[0] - boundaries[1])
    #print boundaries
    norm = clrs.BoundaryNorm(boundaries, cmap.N)
    p = PatchCollection(patches, cmap=cmap, alpha=0.4, norm=norm)
    colors = np.array(colors)
    p.set_array(colors)
    ax.add_collection(p)
    ax.set_frame_on(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.axes.get_xaxis().set_visible(False)
    ax.set_title(title)
    plt.colorbar(p,
                 cmap=cmap,
                 norm=norm,
                 boundaries=boundaries,
                 ticks=boundaries)
    plt.show()
    return classification
Exemple #4
0
def base_choropleth_classif(shp_link, values, classification='quantiles', \
        k=5, cmap='hot_r', projection='merc', sample_fisher=True):
    '''
    Create a map object with coloring based on different classification
    methods, from a shapefile in lon/lat CRS
    ...

    Arguments
    ---------

    shp_link        : str
                      Path to shapefile
    values          : array
                      Numpy array with values to map
    classification  : str
                      Classificatio method to use. Options supported:
                        * 'quantiles' (default)
                        * 'fisher_jenks'
                        * 'equal_interval'
                            
    k               : int
                      Number of bins to classify values in and assign a color
                      to
    cmap            : str
                      Matplotlib coloring scheme
    projection      : str
                      Basemap projection. See [1]_ for a list. Defaults to
                      'merc'
    sample_fisher   : Boolean
                      Defaults to True, controls whether Fisher-Jenks
                      classification uses a sample (faster) or the entire
                      array of values. Ignored if 'classification'!='fisher_jenks'

    Returns
    -------

    map             : PatchCollection
                      Map object with the polygons from the shapefile and
                      unique value coloring
    
    Links
    -----
    .. [1] <http://matplotlib.org/basemap/api/basemap_api.html#module-mpl_toolkits.basemap>
    '''
    if classification == 'quantiles':
        classification = ps.Quantiles(values, k)
        boundaries = classification.bins.tolist()

    if classification == 'equal_interval':
        classification = ps.Equal_Interval(values, k)
        boundaries = classification.bins.tolist()

    if classification == 'fisher_jenks':
        if sample_fisher:
            classification = ps.esda.mapclassify.Fisher_Jenks_Sampled(
                values, k)
        else:
            classification = ps.Fisher_Jenks(values, k)
        boundaries = classification.bins[:]

    map_obj = map_poly_shp_lonlat(shp_link, projection=projection)
    map_obj.set_alpha(0.4)

    cmap = cm.get_cmap(cmap, k + 1)
    map_obj.set_cmap(cmap)

    boundaries.insert(0, 0)
    norm = clrs.BoundaryNorm(boundaries, cmap.N)
    map_obj.set_norm(norm)

    map_obj.set_array(values)
    return map_obj
def main():

    # Parameters
    # ----------

    show_legend = False  #True
    show_title_text = False  #True

    # Plot base-grid (with no-data hashes)
    show_noData = False

    # Choose classifier (if None, use self specified classification)
    #'NaturalBreaks' #'JenksCaspall' #'MaximumBreaks' #'FisherJenks' #"HeadTail"
    mapclassifier = None

    # Filepaths
    data_fp = "data/MFD_Population_24H_Tallinn_500m_grid.shp"
    roads_fp = "data/Tallinn_main_roads_for_visualization.shp"
    boundaries_fp = "data/TLN_bordersDASY.shp"
    water_fp = "data/TLN_water_clip_OSM.shp"
    outdir = "results/population_maps"

    # Read files
    data = gpd.read_file(data_fp)
    roads = gpd.read_file(roads_fp)
    boundaries = gpd.read_file(boundaries_fp)
    water = gpd.read_file(water_fp)

    # Re-project all into the same crs as grid
    roads['geometry'] = roads['geometry'].to_crs(crs=data.crs)
    roads.crs = data.crs

    boundaries['geometry'] = boundaries['geometry'].to_crs(crs=data.crs)
    boundaries.crs = data.crs

    water['geometry'] = water['geometry'].to_crs(crs=data.crs)
    water.crs = data.crs

    # Take only largest waterbodies
    water['area'] = water.area
    water = water.sort_values(by='area', ascending=False)
    water.reset_index(inplace=True)
    water = water.ix[0:2]

    # Time columns showing the share of population at different hours
    tcols = ["H%s" % num for num in range(0, 24)]

    # Multiply by 100 to get them into percentage (0-100 representation)
    data[tcols] = data[tcols] * 100

    # Create Custom classifier
    # bins are the upper boundary of the class (including the value itself)
    # ---------------------------------------------------------------------

    # Natural Breaks classification (7 classes) that has been rounded (to have a more intuitive legend)
    my_bins = [0.05, 0.10, 0.20, 0.40, 0.80, 1.6, 3.97]

    # Classify following columns
    ccolumns = tcols

    if mapclassifier:

        # Stack all values
        stacked_values = stackColumnValues(df=data, columns=ccolumns)

        # Classify values based on specific classifier
        n = 7
        my_bins = [x for x in range(n)]

        if mapclassifier == 'HeadTail':
            classif = ps.esda.mapclassify.HeadTail_Breaks(stacked_values)
        elif mapclassifier == 'FisherJenks':
            classif = ps.Fisher_Jenks(stacked_values, k=n)
        elif mapclassifier == 'NaturalBreaks':
            classif = ps.Natural_Breaks(stacked_values, k=n)
        elif mapclassifier == 'MaximumBreaks':
            classif = ps.Maximum_Breaks(stacked_values, k=n)
        elif mapclassifier == 'JenksCaspall':
            classif = ps.Jenks_Caspall(stacked_values, k=n)

        # Get bins
        my_bins = list(classif.bins)

    # Apply the chosen classification
    classifier = ps.User_Defined.make(bins=my_bins)
    classif = data[ccolumns].apply(classifier)

    # Rename classified column names (add letter c in front)
    classif.columns = list(map(lambda x: "c" + x, classif.columns))

    # Join back to grid
    data = data.join(classif)

    # Classified columns showing the distribution of the population
    ccols = ["cH%s" % num for num in range(0, 24)]

    # Rename columns and take the 'H' letter from the beginning away
    data, new_cols = renameTo24HourSystem(data, tcols, minutes=True)

    # Select color palette
    palette = sns.diverging_palette(220, 20, n=len(my_bins))

    # Get hex colors
    hex_colors = parseHexSeaborn(palette)

    # Change White color into more reddish
    hex_colors[3] = '#FFF2F2'

    N = len(hex_colors)

    # Convert to rgb
    legendcolors = [col.hex2color(hexcol) for hexcol in hex_colors]

    # Legend labels
    binlabels = np.array(my_bins)
    rbinlabels = binlabels.round(2)
    legend_labels = list(rbinlabels)
    legend_labels.insert(0, 0)

    for tattribute in new_cols:

        # Color balancer
        color_balancer = list(hex_colors)

        # Print the classes
        classcol = "cH%s" % int(tattribute[0:2])
        classes = list(data[classcol].unique())
        classes.sort()

        print("%s \t N-classes: %s \t Classes: " % (tattribute, len(classes)),
              classes)

        # If there is no values for all classes, remove the color of the specific
        # class that is missing (so that coloring scheme is identical for all times)
        if len(classes) < N:
            class_values = [val for val in range(N)]
            # Put values in reverse order
            class_values.reverse()
            # Find out which classes are missing and remove the color
            for i in class_values:
                if not i in classes:
                    del color_balancer[i]
        # Convert to rgb
        rgbcolors = [col.hex2color(hexcol) for hexcol in color_balancer]

        # Dynamo colormap
        Ncolor = len(color_balancer)
        dynamocmap = LinearSegmentedColormap.from_list("my_colormap",
                                                       rgbcolors,
                                                       N=Ncolor,
                                                       gamma=1.0)

        # Initialize Figure
        if not show_legend:
            fig, ax = plt.subplots()
        else:
            fig = plt.figure(figsize=(8, 7))
            # Add axes (1 for image, 2 for custom legend)
            ax = fig.add_axes(
                [0.05, 0.15, 0.8,
                 0.65])  #([DistFromLeft, DistFromBottom, Width, Height])
            ax1 = fig.add_axes([0.2, 0.08, 0.6, 0.035])

        # Column name for shop information
        name = "h%s" % int(tattribute[0:2])

        if show_noData:
            # Plot base grid
            if show_legend:
                data.plot(ax=ax,
                          color='white',
                          linewidth=0.1,
                          hatch='x',
                          edgecolor='grey',
                          legend=True)
            else:
                data.plot(ax=ax,
                          color='white',
                          linewidth=0.1,
                          hatch='x',
                          edgecolor='grey')
        else:
            if show_legend:
                data.plot(ax=ax,
                          color='white',
                          linewidth=0,
                          edgecolor='grey',
                          legend=True)
            else:
                data.plot(ax=ax, color='white', linewidth=0, edgecolor='grey')

        # Clip grid with boundaries
        data = gpd.overlay(data, boundaries, how='intersection')

        # Plot the map using custom color map (use the classified column)
        ax = plotCustomColors(ax=ax,
                              df=data,
                              column=classcol,
                              custom_cmap=dynamocmap,
                              linewidth=0.05,
                              edgecolor='grey')

        # Plot water bodies
        water.plot(ax=ax,
                   color='white',
                   alpha=1.0,
                   linewidth=0,
                   edgecolor='grey')  #linewidth=0.05

        # Plot roads
        roads.plot(ax=ax, color='grey', lw=0.8, alpha=0.8)

        # Specify y and x-lim
        ax.set_xlim(left=531000, right=553000)
        ax.set_ylim(top=6596000, bottom=6579400)

        # Remove tick markers
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)

        # Info texts
        info_text = "%s" % (tattribute)
        if not show_legend:
            ppos_x = 540000
            ppos_y = 6595500
        else:
            ppos_x = 540000
            ppos_y = 6596500
        # Add text about time
        ax.text(ppos_x,
                ppos_y,
                info_text,
                size=30,
                color='black',
                **{'fontname': 'Arial'})
        # Add title text
        if show_title_text:
            ax.text(
                ppos_x - 5000,
                ppos_y + 2000,
                "Population distribution in Tallinn\n   based on mobile phone data",
                size=20,
                color='gray',
                **{'fontname': 'Arial'})

        # Add legend
        if show_legend:
            ax1.imshow(np.arange(N).reshape(1, N),
                       cmap=mpl.colors.ListedColormap(list(legendcolors)),
                       interpolation="nearest",
                       aspect="auto")

            # Set locations of the bins
            ax1.set_xticks(np.arange(N + 1) - .5)
            ax1.set_yticks([])

            # Specify the labels
            ax1.set_xticklabels(legend_labels)

            # Set colorbar title
            cbar_title = 'Share of population (%)'
            pos_x = 0.25
            pos_y = 0.123
            plt.figtext(pos_x, pos_y, cbar_title, size=12)

        # Save figure
        resolution = 500
        outpath = os.path.join(
            outdir, "%s_PopulationDistribution_map_%sdpi.png" %
            (tattribute[0:2], resolution))

        # Don't show axis borders
        ax.axis('off')

        if not show_legend:
            plt.tight_layout()

        plt.savefig(outpath, dpi=resolution)
        #plt.show()
        plt.close()
Exemple #6
0
def choropleth_map(jsonpath,
                   key,
                   attribute,
                   df=None,
                   classification="Quantiles",
                   classes=5,
                   bins=None,
                   std=None,
                   centroid=None,
                   zoom_start=5,
                   tiles='OpenStreetMap',
                   fill_color="YlGn",
                   fill_opacity=.5,
                   line_opacity=0.2,
                   legend_name='',
                   save=True):
    '''
    One-shot mapping function for folium-based choropleth mapping. 

    jsonpath - the filepath to a JSON file
    key - the field upon which the JSON and the dataframe will be linked
    attribute - the attribute to be mapped

    The rest of the arguments are keyword:
    
    classification - type of classification scheme to be used
    classes - number of classes used
    bins - breakpoints, if manual classes are desired


    '''

    #Polymorphism by hand...

    if isinstance(jsonpath, str):
        if os.path.isfile(jsonpath):
            sjson = gj.load(open(jsonpath))
        else:
            raise IOError('File not found')

    if isinstance(jsonpath, dict):
        raise NotImplementedError(
            'Direct mapping from dictionary not yet supported')
        #with open('tmp.json', 'w') as out:
        #    gj.dump(jsonpath, out)
        #    sjson = gj.load(open('tmp.json'))

    if isinstance(jsonpath, tuple):
        if 'ShpWrapper' in str(type(jsonpath[0])) and 'DBF' in str(
                type(jsonpath[1])):
            flip('tmp.json', jsonpath[0], jsonpath[1])
            sjson = gj.load(open('tmp.json'))
            jsonpath = 'tmp.json'

        elif 'ShpWrapper' in str(type(jsonpath[1])) and 'DBF' in str(
                type(jsonpath[0])):
            flip('tmp.json', jsonpath[1], jsonpath[0])
            sjson = gj.load(open('tmp.json'))
            jsonpath = 'tmp.json'

        else:
            raise IOError(
                'Inputs must be GeoJSON filepath, GeoJSON dictionary in memory, or shp-dbf tuple'
            )

    #key construction
    if df is None:
        df = json2df(sjson)
    dfkey = [key, attribute]

    #centroid search
    if centroid == None:
        if 'bbox' in sjson.keys():
            bbox = sjson.bbox
        bbox = bboxsearch(sjson)
        xs = sum([bbox[0], bbox[2]]) / 2.
        ys = sum([bbox[1], bbox[3]]) / 2.
        centroid = [ys, xs]
    jsonkey = 'feature.properties.' + key

    choromap = fm.Map(
        location=centroid, zoom_start=zoom_start,
        tiles=tiles)  # all the elements you need to make a choropleth

    #standardization
    if std != None:
        if isinstance(std, int) or isinstance(std, float):
            y = np.array(df[attribute] / std)
        elif type(std) == str:
            y = np.array(df[attribute] / df[std])
        elif callable(std):
            raise NotImplementedError(
                'Functional Standardizations are not implemented yet')
        else:
            raise ValueError(
                'Standardization must be integer, float, function, or Series')
    else:
        y = np.array(df[attribute].tolist())

    #For people who don't read documentation...
    if isinstance(classes, list):
        bins = classes
        classes = len(bins)
    elif isinstance(classes, float):
        try:
            classes = int(classes)
        except:
            raise ValueError('Classes must be coercable to integers')

    #classification passing
    if classification != None:
        if classification == "Maximum Breaks":  #there is probably a better way to do this, but it's a start.
            mapclass = ps.Maximum_Breaks(y, k=classes).bins.tolist()
        elif classification == 'Quantiles':
            mapclass = ps.Quantiles(y, k=classes).bins.tolist()
        elif classification == 'Fisher-Jenks':
            mapclass = ps.Fisher_Jenks(y, k=classes).bins
        elif classification == 'Equal Interval':
            mapclass = ps.Equal_Interval(y, k=classes).bins.tolist()
        elif classification == 'Natural Breaks':
            mapclass = ps.Natural_Breaks(y, k=classes).bins
        elif classification == 'Jenks Caspall Forced':
            raise NotImplementedError(
                'Jenks Caspall Forced is not implemented yet.')
        #   mapclass = ps.Jenks_Caspall_Forced(y, k=classes).bins.tolist()
        elif classification == 'Jenks Caspall Sampled':
            raise NotImplementedError(
                'Jenks Caspall Sampled is not implemented yet')
        #   mapclass = ps.Jenks_Caspall_Sampled(y, k=classes).bins.tolist()
        elif classification == 'Jenks Caspall':
            mapclass = ps.Jenks_Caspall(y, k=classes).bins.tolist()
        elif classification == 'User Defined':
            mapclass = bins
        elif classification == 'Standard Deviation':
            if bins == None:
                l = classes / 2
                bins = range(-l, l + 1)
                mapclass = list(ps.Std_Mean(y, bins).bins)
            else:
                mapclass = list(ps.Std_Mean(y, bins).bins)
        elif classification == 'Percentiles':
            if bins == None:
                bins = [1, 10, 50, 90, 99, 100]
                mapclass = list(ps.Percentiles(y, bins).bins)
            else:
                mapclass = list(ps.Percentiles(y, bins).bins)
        elif classification == 'Max P':
            #raise NotImplementedError('Max-P classification is not implemented yet')
            mapclass = ps.Max_P_Classifier(y, k=classes).bins.tolist()
        else:
            raise NotImplementedError(
                'Your classification is not supported or was not found. Supported classifications are:\n "Maximum Breaks"\n "Quantiles"\n "Fisher-Jenks"\n "Equal Interval"\n "Natural Breaks"\n "Jenks Caspall"\n "User Defined"\n "Percentiles"\n "Max P"'
            )
    else:
        print('Classification forced to None. Defaulting to Quartiles')
        mapclass = ps.Quantiles(y, k=classes).bins.tolist()

    #folium call, try abstracting to a "mapper" function, passing list of args
    choromap.geo_json(geo_path=jsonpath,
                      key_on=jsonkey,
                      data=df,
                      columns=dfkey,
                      fill_color=fill_color,
                      fill_opacity=fill_opacity,
                      line_opacity=line_opacity,
                      threshold_scale=mapclass[:-1],
                      legend_name=legend_name)

    if save:
        fname = jsonpath.rstrip('.json') + '_' + attribute + '.html'
        choromap.save(fname)

    return choromap
Exemple #7
0
def column_kde(series_to_plot, num_bins=7, split_type="quantiles", bw=0.15,
               plot_title="", xlabel="x", ylabel="y"):
    """
    v1.0
    function that plots: Kernel Density Estimation (KDE)
                         rugplot
                         shows a classification of the distribution based on 'num_bins' and 'split_type'

    Plots data from the global variable (GeoDataFrame) 'teranet_da_gdf'

    ----------------
    Input arguments: series_to_plot -- pandas Series -- series to be plotted

                     num_bins       -- int    -- number of bins to be used for the split of
                                                 the distribution (default=7)

                     split_type     -- str    -- type of the split of the distribution (default='quantiles')
                                                 must be either 'quantiles', 'equal_interval', or 'fisher_jenks'

                     bw             -- float  -- bandwidth to be used for KDE (default=0.15)

    --------
    Returns:     None, plots a KDE, rugplot, and bins of values in 'column_to_plot'
    """
    # generate a list of bins from the split of the distribution using type of split provided in 'split_type'
    if split_type == 'quantiles':
        classi = ps.Quantiles(series_to_plot, k=num_bins)
    elif split_type == 'equal_interval':
        classi = ps.Equal_Interval(series_to_plot, k=num_bins)
    elif split_type == 'fisher_jenks':
        classi = ps.Fisher_Jenks(series_to_plot, k=num_bins)
    elif type(split_type) == str:
        raise ValueError("Input parameter 'split_type' must be either 'quantiles', " +
                         "'equal_interval', or 'fisher_jenks'.")
    else:
        raise TypeError("Input parameter 'split_type' must be a string and either 'quantiles', " +
                        "'equal_interval, or 'fisher_jenks'.")
    # print the bins
    print(classi)

    # create figure and axis
    f, ax = plt.subplots(1, figsize=(9, 6))

    # plot KDE of the distribution
    sns.kdeplot(series_to_plot,
                shade=True,
                label='Distribution of counts of Teranet records per DA',
                bw=bw)

    # plot a rugplot
    sns.rugplot(series_to_plot, alpha=0.5)

    # plot the split of the distribution
    for classi_bin in classi.bins:
        ax.axvline(classi_bin, color='magenta', linewidth=1, linestyle='--')

    # plot the mean and the median
    ax.axvline(series_to_plot.mean(),
               color='deeppink',
               linestyle='--',
               linewidth=1)

    ax.text(series_to_plot.mean(),
            0,
            "Mean: {0:.2f}".format(series_to_plot.mean()),
            rotation=90)

    ax.axvline(series_to_plot.median(),
               color='coral',
               linestyle=':')

    ax.text(series_to_plot.median(),
            0,
            "Median: {0:.2f}".format(series_to_plot.median()),
            rotation=90)

    # configure axis parameters
    ax.set_title(plot_title,
                 fontdict={'fontsize': '18', 'fontweight': '3'})
    ax.set_xlabel(xlabel,
                  fontdict={'fontsize': '16', 'fontweight': '3'})
    ax.set_ylabel(ylabel,
                  fontdict={'fontsize': '16', 'fontweight': '3'})

    ax.legend(loc='best')

    plt.show()
Exemple #8
0
    def createClassifyMap(self, map_type):
        """ return an instance of pysal.Map_Classifier """
        id_group = []
        color_group = []
        label_group = []

        if map_type == stars.MAP_CLASSIFY_EQUAL_INTERVAL:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Equal_Interval(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_PERCENTILES:
            pct = [1, 10, 50, 90, 99, 100]
            # doesn't support different defined pct
            #if self.params.has_key("pct"):
            #    pct = self.params["pct"]
            cm = pysal.Percentiles(self.data, pct=pct)
            counts = list(cm.counts)
            n_counts = len(counts)
            if n_counts < 6:
                for i in range(6 - n_counts):
                    counts.append(0)
            label_group = [
                '<1%%(%d)' % counts[0],
                '1%% - 10%%(%d)' % counts[1],
                '10%% - 50%%(%d)' % counts[2],
                '50%% - 90%%(%d)' % counts[3],
                '90%% - 99%%(%d)' % counts[4],
                '>99%%(%d)' % counts[5]
            ]
            #color_group = self._get_default_color_schema(n_bins)
            color_group = self.pick_color_set(3, 6, True)

        elif map_type == stars.MAP_CLASSIFY_BOX_PLOT:
            hinge = 1.5  # default
            if self.params.has_key("hinge"):
                hinge = self.params["hinge"]

            cm = pysal.Box_Plot(self.data, hinge=hinge)
            n_bins = len(cm.bins)
            if n_bins == 5:
                n_upper_outlier = 0
            else:
                n_upper_outlier = cm.counts[5]
            label_group = [
                'Lower outlier(%d)' % cm.counts[0],
                '<25%% (%d)' % cm.counts[1],
                '25%% - 50%% (%d)' % cm.counts[2],
                '50%% - 75%% (%d)' % cm.counts[3],
                '>75%% (%d)' % cm.counts[4],
                'Upper outlier (%d)' % n_upper_outlier
            ]

            #color_group = self._get_default_color_schema(n_bins)
            color_group = self.pick_color_set(2, 6, False)

        elif map_type == stars.MAP_CLASSIFY_QUANTILES:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]

            cm = pysal.Quantiles(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_STD_MEAN:
            cm = pysal.Std_Mean(self.data, multiples=[-2, -1, 0, 1, 2])
            n_bins = len(cm.bins)

        elif map_type == stars.MAP_CLASSIFY_MAXIMUM_BREAK:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Maximum_Breaks(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_NATURAL_BREAK:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Natural_Breaks(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_FISHER_JENKS:
            cm = pysal.Fisher_Jenks(self.data)

            # see blow: common label group and color group

        elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Jenks_Caspall(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k([i[0] for i in cm.bins],
                                                     cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_SAMPLED:
            k = 5  # default
            pct = 0.1
            if self.params.has_key("k"):
                k = self.params["k"]
            if self.params.has_key("pct"):
                pct = self.params["pct"]
            cm = pysal.Jenks_Caspall_Sampled(self.data, k=k, pct=pct)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_JENKS_CASPALL_FORCED:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Jenks_Caspall_Forced(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_USER_DEFINED:
            assert self.params.has_key("bins")
            bins = self.params["bins"]
            cm = pysal.User_Defined(self.data, bins=bins)
            k = len(bins)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_MAX_P:
            k = 5  # default
            if self.params.has_key("k"):
                k = self.params["k"]
            cm = pysal.Max_P_Classifier(self.data, k=k)

            # add label group, color group
            label_group = self._get_label_group_by_k(cm.bins, cm.counts)
            #color_group = self._get_color_schema_by_k(k)
            color_group = self.pick_color_set(1, len(cm.bins), False)

        elif map_type == stars.MAP_CLASSIFY_UNIQUE_VALUES:
            id_group_dict = {}
            id_other = []

            n = 0
            for i, item in enumerate(self.data):
                if n < 10:
                    if not id_group_dict.has_key(item):
                        id_group_dict[item] = []
                        n += 1
                if id_group_dict.has_key(item):
                    id_group_dict[item].append(i)
                else:
                    id_other.append(i)

            id_group = id_group_dict.values()
            unique_values = id_group_dict.keys()
            max_num_values = n if n <= 10 else 10

            label_group = [
                str(unique_values[i]) for i in range(max_num_values)
            ]
            color_group = [
                stars.MAP_COLOR_12_UNIQUE_FILL[i]
                for i in range(max_num_values)
            ]
            #color_group = self.pick_color_set(1, max_num_values,False)
            if n >= 10:
                id_group.append(id_other)
                label_group.append('Others')
                color_group.append(stars.MAP_COLOR_12_UNIQUE_OTHER)

            field_name = self.params['field_name']
            id_group.insert(0, [])
            label_group.insert(0, field_name)
            color_group.insert(0, None)

        else:
            raise KeyError, 'Classify map type is illegal'

        # for some common label group and color group
        if map_type in [
                stars.MAP_CLASSIFY_FISHER_JENKS, stars.MAP_CLASSIFY_STD_MEAN
        ]:
            """
            upper_bound = 0 if len(cm.counts) == 5 else cm.counts[5]
            label_group = ['<%s (%d)'% (cm.bins[0],cm.counts[0]),
                           '%s - %s (%d)'% (cm.bins[0], cm.bins[1],cm.counts[1]),
                           '%s - %s (%d)'% (cm.bins[1], cm.bins[2], cm.counts[2]),
                           '%s - %s (%d)'% (cm.bins[2], cm.bins[3], cm.counts[3]),
                           '%s - %s (%d)'% (cm.bins[3], cm.bins[4], cm.counts[4]),
                           '>%s (%d)'% (cm.bins[4], upper_bound)]
            #color_group = self._get_default_color_schema(len(cm.bins))
            color_group = self.pick_color_set(3,7,False)[1:]
            """
            label_group = self._get_range_labels(cm.bins, cm.counts)
            color_group = self.pick_color_set(3, len(cm.bins), True)  #[1:]

        if map_type != stars.MAP_CLASSIFY_UNIQUE_VALUES:
            # convert
            binIds = cm.yb
            bins = cm.bins

            n_group = len(bins)
            id_group = [[] for i in range(n_group)]
            for i, gid in enumerate(binIds):
                id_group[gid].append(i)

        return id_group, label_group, color_group
for y in [2015]:

    #    savings_map_data = pd.DataFrame(
    #        savings_map_data[savings_map_data.REPORTING_YEAR == y].groupby(
    #            ['COUNTY_FIPS', 'FACILITY_ID'], as_index=False
    #            ).savings_MMTCO2E_total_mean.mean()
    #            )

    savings_map_data_input = pd.DataFrame(
        savings_map_data[savings_map_data.REPORTING_YEAR == y].groupby(
            'COUNTY_FIPS', as_index=False).savings_MMTCO2E_total.sum())

    #    FJ_2011 = ps.Fisher_Jenks(
    #        savings_map_data_input.savings_MMTCO2E_total, k = 5
    #        )

    savings_map = MakeCountyMap.CountyEnergy_Maps(savings_map_data_input)

    if y == 2015:

        savings_map.make_map('savings_MMTCO2E_total', 5, FJ_2011)

    else:

        savings_map.make_map('savings_MMTCO2E_total', 5)

    print(
        np.round(ps.Fisher_Jenks(savings_map_data_input.savings_MMTCO2E_total,
                                 k=5).bins,
                 decimals=1))
# convert to geopandas df
gdf = convert_to_gpd_df(df)

# load census blocks
blocks = gpd.read_file('../data/census2000blockgroups_poly/census2000blockgroups_poly.shp')
blocks = blocks.loc[blocks['COUNTY'] == '025']
blocks = blocks.to_crs({'init': 'epsg:4326'})

df_blocks = join_311_to_blocks(gdf, blocks)

# create neighbors from file
outfile = "../data/tmp/tmp.shp"
weight_matrix = get_queen_neighbors_matrix(gdf, outfile)

# create natural breaks for open len
open_len_FJ10 = ps.Fisher_Jenks(df_blocks.open_len, k=10)
print("Fisher Jenks breaks - open len: {}".format(open_len_FJ10))
print("Fisher Jenks fit- open len: {}".format(open_len_FJ10.adcm))
# join breaks back to blocks df
df_blocks = df_blocks.assign(open_len_cl=open_len_FJ10.yb)

# calculate spatial lag
open_len_lag = ps.lag_spatial(weight_matrix, df_blocks.open_len.values)
open_len_lag_FJ10 = ps.Fisher_Jenks(open_len_lag, k=10)
print("Fisher Jenks breaks - open len lag: {}".format(open_len_lag_FJ10))
print("Fisher Jenks fit - open len lag: {}".format(open_len_lag_FJ10.adcm))
# join lag breaks back to blocks
df_blocks = df_blocks.assign(open_len_lag_cl=open_len_lag_FJ10.yb)

df_blocks.to_csv("../data/web/df_block.csv")