Example #1
0
def draw_map(df, measure, Colors, Title):
    plt.clf()
    fig = plt.figure()
    ax = fig.add_subplot(111, axisbg='None')
    ax.set_xlim(minx - 0.2 * w, maxx + 0.2 * w)
    ax.set_ylim(miny - 0.2 * h, maxy + 0.2 * h)
    ax.set_aspect(1)

    cmap = plt.get_cmap(Colors)
    df['patches'] = df['Poly'].map(
        lambda x: PolygonPatch(x, ec='#555555', lw=0.2, alpha=0.5, zorder=4))
    pc = PatchCollection(df['patches'].values, match_original=True)
    norm = mpl.colors.Normalize()
    pc.set_facecolor(cmap(norm(df[measure].values)))
    ax.add_collection(pc)
    plt.title(Title)

    #Add a colorbar for the PolyCollection
    #Classified the prices into 7 classes using natural break
    breaks = nb(df[df[measure].notnull()][measure].values, initial=500, k=7)
    jb = pd.DataFrame({'jenks_bins': breaks.yb},
                      index=df[df[measure].notnull()].index)
    df = df.join(jb)
    df.jenks_bins.fillna(-1, inplace=True)
    jenks_labels = ["<= Above %0.1f" % b for b in breaks.bins]
    cb = colorbar_index(ncolors=len(jenks_labels),
                        cmap=cmap,
                        shrink=0.5,
                        labels=jenks_labels)
    cb.ax.tick_params(labelsize=8)

    fig.set_size_inches(10, 10)
    plt.savefig(Title, ext='png', close=True, dpi=400, bbox_inches='tight')
Example #2
0
def normalise(df, element):

    df[element].fillna(0, inplace=True)

    df[f'{element}_nn'] = pd.to_numeric(df[f'{element}'])

    df[f'{element}_fn'] = df[f'{element}_nn'].apply(
        lambda x: neg_conversions(x))

    df[f'{element}_log'] = df[f'{element}_fn'].apply(lambda x: log10(x))

    median = df[f'{element}_log'].median()

    # Mean Absolute Deviation: Tukey, J.W., 1977. Exploratory Data Analysis. Addison-Wesley, Reading, 688 pp
    mad = df[f'{element}_log'].mad()

    # Set threshold: http://crcleme.org.au/Pubs/guides/gawler/a7_id_anomalies.pdf
    threshold = median + 2 * mad
    min_value = df[f'{element}_log'].min()

    df['normalised'] = df[f'{element}_log'].apply(
        lambda x: scaling(x, min_value, threshold))

    classifier = nb(df[f'{element}_log'], 7)
    df['classifications'] = df[f'{element}_log'].apply(classifier)
    df.classifications.replace([1, 2, 3, 4, 5, 6, 7], [
        '#82817d', '#55b1d9', '#5bd955', '#e6a94e', '#e02d2d', '#da2de0',
        '#af00b5'
    ],
                               inplace=True)

    return df
Example #3
0
    def computeNDVINaturalBreaks(self, rawndvifile):
        print("Computing Natural breaks on NDVI..")
        self.files['ndvi_file'] = rawndvifile
        with rasterio.open(rawndvifile) as src:
            profile = src.profile
            bands = src.read()
            for band in bands:
                b = band[(band != np.array(None)) & (np.logical_not(np.isnan(band))) ]
                breaks = nb(b.ravel(),k=4,initial=1)
                bins = breaks.bins.tolist()
        
        bins.insert(0,-1) # add -1 to the beginning of the breaks
        
        classfiedndvitmppath = os.path.join(self.cwd,config.settings['outputdirectory'],'tmp','classified-ndvi.tiff')
    
        print("Writing new NDVI with Natural break classes..")
        with rasterio.open(rawndvifile) as src:
            profile = src.profile
            bands = src.read(masked=True)
            for band in bands: 
                # b = band[(band != np.array(None)) & (np.logical_not(np.isnan(band))) ]
                for x in np.nditer(band, op_flags=['readwrite']):
                    x[...] = np.digitize(x,bins)

                # Reproject and write each band

            with rasterio.open(classfiedndvitmppath, 'w', **profile) as dst:
                dst.write(bands)
Example #4
0
def IndicatorHeatMap(YEAR, LOCATION="Maryland", INDICATOR="None"):
    # Using a CSV of economic indicators, create a heatmap that
    # shows the data. This will be used under scatter and hexbin maps
    # if an indicator is selected.
    # This can probably be turned into an if/elif/else statement that
    # sets a common variable to a string based on the indicator chosen
    # and passes that variable through the breaks and jenks process.
    df_map = pd.merge(df_map, indicator_list, on="county_name", how="left")
    if INDICATOR == "None":
        pass
    elif INDICATOR == "Median Household Income":
        # Select county or neighborhood MHHI data from the passed year
        breaks = nb(
            df_map[df_map["mhhi"].notnull()].mhhi.values,
            initial=300,
            k=5)
            
        jb = pd.DataFrame({"jenks_bins":breaks.yb}, index=df_map[df_map["mhhi"].notnull()].index)
        df_map = df_map.join(jb)
        df_map.jenks_bins.fillna(-1, inplace=True)

        jenks_labels = ["Median Household Income:\n<= %f" % b for b in breaks.bins]
    elif INDICATOR == "Millennial Population Growth":
        # Select county or neighborhood Millennial population data from the passed year
        pass
    else:
        print "The %s indicator is not yet available in this program." % INDICATOR
    
    ax = fig.add_subplot(111, axisbg = "w", frame_on = False)

    # Change get_cmap color based on INDICATOR    
    cmap = plt.get_cmap("Blues")
    df_map["patches"] = df_map["poly"].map(lambda x: PolygonPatch(x, ec="#555555", lw=.2, alpha=1, zorder=4))
    pc = PatchCollection(df_map["patches"], match_original=True)
    norm = Normalize()
    pc.set_facecolor(cmap(norm(df_map["jenks_bins"].values)))
    ax.add_collection(pc)

    cb = colorbar_index(ncolors=len(jenks_labels), cmap=cmap, shrink=0.5, labels=jenks_labels)
    cb.ax.tick_params(labelsize=8)

    m.drawmapscale(
        -125, 20,
        -125, 20,
        10.,
        barstyle = "fancy", labelstyle = "simple",
        fillcolor1 = "w", fillcolor2 = "w",
        fontcolor = "w",
        zorder=9,
        units = "m",
        fontsize =7)    
Example #5
0
def jenks_breaks(df_map, field):
    # Calculate Jenks natural breaks for density
    breaks = nb(
        df_map[df_map[field].notnull()][field].values,
        initial=300,
        k=5)
    # the notnull method lets us match indices when joining
    jb = pd.DataFrame({'bins': breaks.yb}, index=df_map[df_map[field].notnull()].index)
    df_map = df_map.join(jb)
    df_map.jenks_bins.fillna(-1, inplace=True)

    jenks_labels = ["up to %0.f%% (%s EDs)" % (b*100, c) for b, c in zip(
        breaks.bins, breaks.counts)]
    #jenks_labels.insert(0, 'No plaques (%s wards)' % len(df_map[df_map['density_km'].isnull()]))
    return df_map, jenks_labels
  def dataDF(self):
    super(GreaterBostonDensity, self).dataDF()

    self.df_map['count'] = self.df_map['poly'].map(lambda x: int(len(filter(prep(x).contains, self.dataPoints))))
    self.df_map['density_m'] = self.df_map['count'] / self.df_map['area_m']
    self.df_map['density_km'] = self.df_map['count'] / self.df_map['area_km']
    # it's easier to work with NaN values when classifying
    self.df_map.replace(to_replace={'density_m': {0: np.nan}, 'density_km': {0: np.nan}}, inplace=True)

    self.breaks = nb(
      self.df_map[self.df_map['density_km'].notnull()].density_km.values,
      initial=300,
      k=5)
    # the notnull method lets us match indices when joining
    jb = pd.DataFrame({'jenks_bins': self.breaks.yb}, index=self.df_map[self.df_map['density_km'].notnull()].index)
    self.df_map = self.df_map.join(jb)
    self.df_map.jenks_bins.fillna(-1, inplace=True)

    return
  def dataDF(self):
    temp = []
    for d in self.dataPoints:
      temp.append(int(d['CT_ID']))

    self.dataPoints = np.array(temp)

    self.df_map['count'] = self.df_map['CT_ID_10'].map(lambda x: int((self.dataPoints == int(x)).sum()))
    self.df_map['POP100'] = self.df_map['POP100'].apply(float)
    self.df_map['density'] = (self.df_map['count'] * 1000) / self.df_map['POP100']
    # it's easier to work with NaN values when classifying
    self.df_map.replace(to_replace={'density': {0: np.nan}}, inplace=True)

    self.breaks = nb(
      self.df_map[self.df_map['density'].notnull()].density.values,
      initial=300,
      k=5)
    # the notnull method lets us match indices when joining
    jb = pd.DataFrame({'jenks_bins': self.breaks.yb}, index=self.df_map[self.df_map['density'].notnull()].index)
    self.df_map = self.df_map.join(jb)
    self.df_map.jenks_bins.fillna(-1, inplace=True)
    return
Example #8
0
    def chloropleth(self, query, color = "Blues"):
        """shows a chloropleth map of crimes
        
        Args: 
            query: name of sql
        """
        self.load()
        data = pd.read_sql_query(con=self.con, sql=query)
        points = self.gen_points(data, self.data_map)
        self.data_map['count'] = self.data_map['poly'].map(lambda x: len(list(filter(prep(x).contains, points))))
        self.data_map['density_m'] = self.data_map['count'] / self.data_map['area_m']
        self.data_map['density_km'] = self.data_map['count'] / self.data_map['area_km']
        self.data_map.replace(to_replace={'density_m': {0: np.nan}, 'density_km': {0: np.nan}}, inplace=True)
    
        breaks = nb(
            self.data_map[self.data_map['density_km'].notnull()].density_km.values,
            initial=300,
            k=5)

        jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=self.data_map[self.data_map['density_km'].notnull()].index)
        self.data_map = self.data_map.join(jb)
        self.data_map.jenks_bins.fillna(-1, inplace=True)

        jenks_labels = ["<= %0.1f/km$^2$(%s communities)" % (b, c) for b, c in zip(
            breaks.bins, breaks.counts)]
        jenks_labels.insert(0, 'None (%s communities)' % len(self.data_map[self.data_map['density_km'].isnull()]))
    
        cmap = plt.get_cmap(color)
        self.data_map['patches'] = self.data_map['poly'].map(lambda x: PolygonPatch(x, ec='#555555', lw=.2, alpha=1., zorder=4))
        pc = PatchCollection(self.data_map['patches'], match_original=True)
        norm = Normalize()
        pc.set_facecolor(cmap(norm(self.data_map['jenks_bins'].values)))
        self.ax.add_collection(pc)

        cb = self.gen_colorbar(colors=len(jenks_labels), color_map=cmap, shrink=0.5, labels=jenks_labels)
        cb.ax.tick_params(labelsize=6)

        plt.tight_layout()
        plt.show()
    """
    Makes a choropelth map of a given shapefile and a numerical column (val_col) of a dataframe. 
    Shapefile and dataframe must both have a matching index_column for joining. 
    Uses Jenks natural breaks by PySAL for classifying.
    
    If you want to highlight a polygon differently, pass it to main_poly and uncomment lines 43-47.
    
    Based on this tutorial: http://ramiro.org/notebook/basemap-choropleth/
    """

	num_colors = num_breaks
	cm = plt.get_cmap(color_ramp)
	scheme = [cm(i / num_colors) for i in range(num_colors)]
    
    # Create bins for color values
    breaks = nb( dataframe[val_col], initial=200, k = num_colors - 1)
    bins = breaks.bins
    frame['bin'] = breaks.yb

    mpl.style.use('seaborn-muted')
    fig = plt.figure(figsize=(22, 12))

    ax = fig.add_subplot(111, axisbg='w', frame_on=False)
    fig.suptitle('Map of {}'.format(title), fontsize=30, y=.95)
    
    m = Basemap(lon_0=0, projection='robin')
    m.drawmapboundary(color='w')
    
    m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2)   
    for info, shape in zip(m.units_info, m.units):
        idx = info[index_col]
Example #10
0
def HeatMap(YEAR, EAGB_INDUSTRY, LANDMARK="None", LOCATION="Maryland", INDICATOR="None", SAVE=False, DRAFT=False):
    # Get basemap and df_map
    m, df_map = GeographySelector()[0], GeographySelector()[1]
    
    # Get establishment points for year and industry with ClusterPoints()
    map_estab_points = ClusterPoints(YEAR, EAGB_INDUSTRY)[0]
    
    #Find the density of establishments for EAGB_INDUSTRY in YEAR for each county
    df_map["count"] = df_map["poly"].map(lambda x: int(len(filter(prep(x).contains, map_estab_points))))
    df_map["density_m"] = df_map["count"]/df_map["area_m"]
    df_map.replace(to_replace={"density_m": {0:np.nan}}, inplace=True)
    
    # Calculate Jenks natural breaks for density
    breaks = nb(
        df_map[df_map["density_m"].notnull()].density_m.values,
        initial = 300,
        # Number of bins to sort counties into:
        k = 5)
        
    # The notnull method lets us match indices when joining
    jb = pd.DataFrame({"jenks_bins": breaks.yb}, index=df_map[df_map["density_m"].notnull()].index)
    df_map = df_map.join(jb)
    df_map.jenks_bins.fillna(-1, inplace=True)
    
    jenks_labels = ["<= %0.1f/m$^2$ (%s counties)" % (b, c) for b,c in zip(
        breaks.bins, breaks.counts)]
    jenks_labels.insert(0, "No %(1)s Establishments (%(2)s counties)" % {"1":EAGB_INDUSTRY, "2":len(df_map[df_map["density_m"].isnull()])})

    plt.clf()
    fig = plt.figure()
    ax = fig.add_subplot(111, axisbg="w", frameon=False)

    # Use a color ramp determined by EAGB_INDUSTRY with an if statement
    # Change "Blues" to some variable
    cmap = plt.get_cmap("Blues")
    # Draw counties with grey outlines
    df_map["patches"] = df_map["poly"].map(lambda x: PolygonPatch(x, ec="#555555", lw=0.2, alpha=1.0, zorder=4))
    pc = PatchCollection(df_map["patches"], match_original=True)
    # Impose color map onto patch collection
    norm = Normalize()
    pc.set_facecolor(cmap(norm(df_map["jenks_bins"].values)))
    ax.add_collection(pc,zorder=5)

    # Add a color bar
    cb = colorbar_index(ncolors=len(jenks_labels), cmap=cmap, shrink=0.5, labels=jenks_labels)
    cb.ax.tick_params(labelsize=8)
    '''
    # Show highest densities in descending order
    highest = "\n".join(
        value[1] for _, value in df_map[(df_map["jenks_bins"] == 4)][:10].sort().iterrows())
    highest = "Most Dense Counties:\n\n" + highest
    
    details = cb.ax.text(
    -1., 0-0.007,
    highest,
    ha="right", va="bottom",
    size = 8,
    color = "#555555")
    '''
    
    # Copyright and source data info
    smallprint = ax.text(
        0.02, 0,
        ha="left", va = "bottom",
        size = 10,
        color = "#555555",
        transform = ax.transAxes,
        s = "Classification Method: Jenks Natural Breaks\nTotal Points: %(1)s\nLandmarks: %(2)s\nEconomic Indicator: %(3)s\nContains NETS Database data\n$\copyright$ EAGB copyright and database right 2015" % {"1":len(ClusterPoints(YEAR, EAGB_INDUSTRY, LANDMARK)[0]), "2":LANDMARK, "3":INDICATOR}
        )
    ''' 
    m.drawmapscale(
        coords[0] + 0.08, coords[1] + 0.015,
        coords[0], coords[1],
        10.,
        barstyle = "fancy", labelstyle = "simple",
        fillcolor1 = "w", fillcolor2 = "#555555",
        fontcolor = "#555555",
        zorder=11,
        units = "m",
        fontsize = 7) 
    ''' 
    plt.tight_layout()
    fig.set_size_inches(10,10)
    plt.title("%(1)s Establishment Density, %(2)s\n%(3)s"), {"1":EAGB_INDUSTRY, "2":YEAR, "3":LOCATION}

    #Passed argument tells program whether to save maps    
    if SAVE == True:
        plt.savefig("All %(1)s %(2)s_heatmap.eps" % {"1":EAGB_INDUSTRY, "2":YEAR}, alpha = True)
        plt.savefig("All %(1)s %(2)s_heatmap.png" % {"1":EAGB_INDUSTRY, "2":YEAR}, alpha = True)
    else:
        pass
    
    plt.show()
Example #11
0
def show_density(genre):
	infile=folder + genre + ' Restaurant.txt'

	#Extract all the data into a dict
	output = dict()
	output['lon']=[]
	output['lat']=[]
	output['rat']=[]

	with open(infile) as f:
		for line in f:
			LineList = line.split('\t')
			output['rat'].append(LineList[2].split()[0])
			output['lat'].append(LineList[6].split(',')[0])
			output['lon'].append(LineList[6].split(',')[1])
		
	#Create a Pandas DataFrame
	df = pd.DataFrame(output)
	#Drop data contains None(Just for practice)
	df = df.dropna()
	df[['rat','lat','lon']] = df[['rat','lat','lon']].astype(float)
 	
	#Create Point objects in map coordinates from dataframe lon and lat values
	map_points = pd.Series([Point(m(mapped_x, mapped_y)) for mapped_x , mapped_y in zip(df['lon'], df['lat'])])

	rstrnt_points = MultiPoint(list(map_points.values))
	#print len(m.newyork[1]),type(m.newyork),m.newyork_info
	df_map = pd.DataFrame({
		'poly':[Polygon(xy) for xy in m.newyork]})
	df_map['area_m'] = df_map['poly'].map(lambda x:x.area)
	df_map['area_km'] = df_map['area_m']/1000000
	#prepared object
	wards_polygon = prep(MultiPolygon(list(df_map['poly'].values)))
	#Calculate points that fall within the New York boundary
	ny_points = filter(wards_polygon.contains,rstrnt_points)
	
	#########Creating a Choropleth Map, Normalised by Ward Area
	df_map['count'] = df_map['poly'].map(lambda x: int(len(filter(prep(x).contains, ny_points))))
	df_map['density_m'] = df_map['count'] / df_map['area_m']
	df_map['density_km'] = df_map['count'] / df_map['area_km']
	# it's easier to work with NaN values when classifying
	df_map.replace(to_replace={'density_m': {0: np.nan}, 'density_km': {0: np.nan}}, inplace=True)

	# Calculate Jenks natural breaks for density
	breaks = nb(
	    df_map[df_map['density_km'].notnull()].density_km.values,
	    initial=300,
	    k=5)
	# the notnull method lets us match indices when joining
	jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map['density_km'].notnull()].index)
	df_map = df_map.join(jb)
	df_map.jenks_bins.fillna(-1, inplace=True)

	jenks_labels = ["<= %0.1f/km$^2$(%s blocks)" % (b, c) for b, c in zip(
	    breaks.bins, breaks.counts)]
	jenks_labels.insert(0, 'No restaurant (%s blocks)' % len(df_map[df_map['density_km'].isnull()]))
	plt.close()
	fig = plt.figure()
	ax = fig.add_subplot(111, axisbg='w', frame_on=False)
	# use a blue colour ramp - we'll be converting it to a map using cmap()
	cmap = plt.get_cmap('Blues')
	# draw wards with grey outlines
	df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(x, ec='#555555', lw=.2, alpha=1., zorder=4))
	pc = PatchCollection(df_map['patches'], match_original=True)
	# impose our colour map onto the patch collection
	norm = Normalize()
	pc.set_facecolor(cmap(norm(df_map['jenks_bins'].values)))
	ax.add_collection(pc)

	# Add a colour bar
	cb = colorbar_index(ncolors=len(jenks_labels), cmap=cmap, shrink=0.5, labels=jenks_labels)
	cb.ax.tick_params(labelsize=6)

	# Show highest densities, in descending order
	highest = '\n'.join(
	    str(value[1]) for value in df_map[(df_map['jenks_bins'] == 4)][:10].sort().iterrows())
	highest = 'Most Dense Blocks:\n\n' + highest

	# Draw a map scale
	m.drawmapscale(
	    coords[0] + 0.19, coords[1] + 0.015,
	    coords[0], coords[1],
	    10.,
	    barstyle='fancy', labelstyle='simple',
	    fillcolor1='w', fillcolor2='#555555',
	    fontcolor='#555555',
	    zorder=5)

	# this will set the image width to 722px at 100dpi
	plt.title(genre + " Restaurant Density, New York")
	plt.tight_layout()
	fig.set_size_inches(7.22, 5.25)
	plt.savefig('image/' + genre+'_Restaurants_Density_NewYork.png', dpi=100, alpha=True)
	plt.show()
Example #12
0
def create_figure(year):
    data1 = pd.read_csv('mappedData.csv')
    data = data1[data1['year'] == year]

    # load the shape file as shp. Here I have saved my shapefile in the folder 'LKA_adm_2' within my working directory.
    # your shapefile should end in .shp
    # shp = fiona.open('LKA_adm_2/LKA_adm1.shp')
    shp = fiona.open('static/shapefile/SG_NHS_HealthBoards_2018_WGS84.shp')
    osgb36 = pyproj.Proj("+init=EPSG:27700")
    wgs84 = pyproj.Proj("+init=EPSG:4326")
    # we can access the boundaries (the 2 lat,long pairs) using shp.bounds
    bds = shp.bounds

    # close the shp file
    shp.close()

    # define a variable called extra which we will use for padding the map when we display it (in this case I've selected a 10% pad)
    extra = 0.1

    # ll = pyproj.transform(osgb36,wgs84,bds[0],bds[1])
    # define the lower left hand boundary (longitude, latitude)
    ll = (bds[0], bds[1])

    # define the upper right hand boundary (longitude, latitude)
    ur = (bds[2], bds[3])
    # ur = pyproj.transform(osgb36,wgs84,bds[2], bds[3])

    # concatenate the lower left and upper right into a variable called coordinates
    coords = list(chain(ll, ur))

    # define variables for the width and the height of the map
    w, h = coords[2] - coords[0], coords[3] - coords[1]

    m = Basemap(
        # set projection to 'tmerc' which is apparently less distorting when close-in
        projection='tmerc',

        # set longitude as average of lower, upper longitude bounds
        # lon_0=np.average(pyproj.transform(osgb36,wgs84,bds[0], bds[2])),
        lon_0=np.average([bds[0], bds[2]]),
        lat_0=np.average([bds[1], bds[3]]),

        # set latitude as average of lower,upper latitude bounds
        # lat_0=np.average(pyproj.transform(osgb36,wgs84,bds[1], bds[3])),

        # string describing ellipsoid (‘GRS80’ or ‘WGS84’, for example). Not sure what this does...
        ellps='WGS84',

        # set the map boundaries. Note that we use the extra variable to provide a 10% buffer around the map
        llcrnrlon=coords[0] - extra * w,
        llcrnrlat=coords[1] - extra + 0.01 * h,
        urcrnrlon=coords[2] + extra * w,
        urcrnrlat=coords[3] + extra + 0.01 * h,

        # provide latitude of 'true scale.' Not sure what this means, I would check the Basemap API if you are a GIS guru
        lat_ts=0,

        # resolution of boundary database to use. Can be c (crude), l (low), i (intermediate), h (high), f (full) or None.
        resolution='i',

        # don't show the axis ticks automatically
        suppress_ticks=True)

    m.readshapefile(
        # provide the path to the shapefile, but leave off the .shp extension
        'static/shapefile/SG_NHS_HealthBoards_2018_WGS84',

        # name your map something useful (I named this 'srilanka')
        'scotland',

        # set the default shape boundary coloring (default is black) and the zorder (layer order)
        color='none',
        zorder=2)

    # set up a map dataframe
    df_map = pd.DataFrame({

        # access the x,y coords and define a polygon for each item in m.scotland
        'poly': [Polygon(xy) for xy in m.scotland],
        # conver HBCode to a column called 'boardcode'
        'boardcode': [boardcode['HBCode'] for boardcode in m.scotland_info]
    })

    # add the polygon area
    df_map['area_m'] = df_map['poly'].map(lambda x: x.area / 1000)

    # convert meters to miles
    df_map['area_miles'] = df_map['area_m'] * 0.000621371
    data = data.rename(columns={'code': 'boardcode'})

    df_map = pd.merge(df_map, data, on='boardcode')
    jenks = True

    var_2_analyze = 'alcandmental'

    if jenks == True:
        # Calculate Jenks natural breaks for each polygon
        breaks = nb(
            # set the data to use
            df_map[df_map[var_2_analyze].notnull()][var_2_analyze].values,

            # since this is an optimization function we need to give it a number of initial solutions to find.
            # you can adjust this number if you are unsatisfied with the bin results
            initial=300,

            # k is the number of natural breaks you would like to apply. I've set it to 10, but you can change.
            k=14)

    else:
        # Define my own breaks [even split each 20 percentage points] Note that the bins are the top range so >20, >40, etc
        # you can change the bins to whatever you like, though they should be based on the data you are analyzing
        # since I am going to plot data on a 0 to 100 scale, I chose these break points
        my_bins = [25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75]

        # Calculate the user defined breaks for our defined bins
        breaks = mapclassify.User_Defined(

            # set the data to use
            df_map[df_map[var_2_analyze].notnull()][var_2_analyze].values,

            # use my bins
            my_bins)

    # check if 'bins' already exists and drop it if it does so that we can recreate it using our new break information
    if 'bins' in df_map.columns:
        df_map = df_map.drop('bins', 1)
        # print ('Bins column already existed, so we dropped the bins column')

    # the notnull method lets us match indices when joining
    # b is a dataframe of the bins with the var_2_analyze index
    b = pd.DataFrame({'bins': breaks.yb},
                     index=df_map[df_map[var_2_analyze].notnull()].index)

    # join b back to df_map
    df_map = df_map.join(b)

    # and handle our NA's if there are any
    df_map.bins.fillna(-1, inplace=True)

    # check if this is a jenks or user-defined break
    if jenks == True:

        # if jenks, use these labels
        bin_labels = ["<= %0.0f" % b for b in breaks.bins]
    else:

        # if user defined, use these ones
        bin_labels = ["< %0.0f" % b for b in breaks.bins]
    # initialize the plot
    plt.clf()

    # define the figure and set the facecolor (e.g. background) to white
    fig = plt.figure(facecolor='white')

    # ad a subplot called 'ax'
    ax = fig.add_subplot(111, facecolor='w', frame_on=False)

    # use a blue colour ramp ('Blues') - we'll be converting it to a map using cmap()
    # you could also use 'Oranges' or 'Greens'
    cmap = plt.get_cmap('Purples').reversed()

    # draw district with grey outlines
    df_map['patches'] = df_map['poly'].map(
        lambda x: PolygonPatch(x, ec='#555555', lw=.2, alpha=1., zorder=4))

    # set the PatchCollection with our defined 'patches'
    pc = PatchCollection(df_map['patches'], match_original=True)

    # normalize our bins between the min and max values within the bins
    norm = Normalize(vmin=df_map['bins'].min(), vmax=df_map['bins'].max())

    # impose our color map onto the patch collection
    pc.set_facecolor(cmap(norm(df_map['bins'].values)))
    ax.add_collection(pc)

    # Add a color bar which has our bin_labels applied
    cb = colorbar_index(ncolors=len(bin_labels),
                        cmap=cmap,
                        shrink=0.5,
                        labels=bin_labels)
    # set the font size of the labels (set to size 10 here)
    cb.ax.tick_params(labelsize=10)

    # Draw a map scale
    m.drawmapscale(
        #set the coordinates where the scale should appear
        coords[0] + 0.08,
        coords[1] + 0.215,
        coords[0],
        coords[1],
        # what is the max value of the scale (here it's set to 25 for 25 miles)
        1.,
        barstyle='fancy',
        labelstyle='simple',
        fillcolor1='w',
        fillcolor2='#555555',
        fontcolor='#555555',
        zorder=5,
        # what units would you like to use. Defaults to km
        units='mi')

    # set the layout to maximally fit the bounding area
    plt.tight_layout()

    # define the size of the figure
    fig.set_size_inches(5, 6)

    mapName = 'map_' + str(year) + 'combinedRatio.png'

    # save the figure. Increase the dpi to increase the quality of the output .png. For example, dpi=1000 is super high quality
    # note that the figure will be saved as 'sri_lanka_' then the name of the variable under analysis
    # you can change this to whatever you want
    plt.savefig('static/' + mapName, dpi=500, alpha=True)
    return fig
                  right_index = True,
                  how = 'right')
# not fully satisfactory... loss of polygons

ls_disp_com_rg = ['available_surface_%s' %rg for rg in ls_rgs] +\
                 ['surface_%s' %rg for rg in ls_rgs]

# ###################
# DRAW AVAIL SURFACE
# ###################

for retail_group in ls_rgs:
  field = 'available_surface_%s' %retail_group
  
  breaks = nb(df_com[df_com[field].notnull()][field].values,
              initial=20,
              k=5)
  
  # zero excluded from natural breaks... specific class with val -1 (added later)
  df_com.replace(to_replace={'surface_%s' %retail_group: {0: np.nan}}, inplace=True)
  
  # the notnull method lets us match indices when joining
  jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_com[df_com[field].notnull()].index)
  # need to drop duplicate index in jb
  jb = jb.reset_index().drop_duplicates(subset=['index'],
                                        take_last=True).set_index('index')
  # propagated to all rows in df_com with same index
  df_com['jenks_bins'] = jb['jenks_bins']
  df_com.jenks_bins.fillna(-1, inplace=True)
  
  jenks_labels = ["<= {:,.0f} avail surf. ({:d} mun.)".format(b, c)\
    if type(cmap) == str:
        cmap = get_cmap(cmap)
    colors_i = np.concatenate((np.linspace(0, 1., N), (0., 0., 0., 0.)))
    colors_rgba = cmap(colors_i)
    indices = np.linspace(0, 1., N + 1)
    cdict = {}
    for ki, key in enumerate(('red', 'green', 'blue')):
        cdict[key] = [(indices[i], colors_rgba[i - 1, ki], colors_rgba[i, ki]) for i in xrange(N + 1)]
    return matplotlib.colors.LinearSegmentedColormap(cmap.name + "_%d" % N, cdict, 1024)
    
#Let's make the map
df_map['Price'] = df_map['poly'].map(lambda x: np.mean([d_price[(i.x, i.y)] for i in filter(prep(x).contains, price_points)]))

#Calculate Jenks natural breaks for price
breaks = nb(
    df_map[df_map['Price'].notnull()].Price.values,
    initial=300,
    k=5)

#the notnull method lets us match indices when joining
jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map['Price'].notnull()].index)
df_map = df_map.join(jb)
df_map.jenks_bins.fillna(-1, inplace=True)

#Let's convert prices in a more readable format (e.g GBP 1,500,000)
locale.setlocale(locale.LC_ALL, '')
locale.currency(7000000, grouping=True )

jenks_labels = [u"\xA3" + "%s (%s wards)" % (locale.currency(b, grouping = True)[1:-3], c) for b, c in zip(
    breaks.bins, breaks.counts)]
jenks_labels.insert(0, 'No property sales registered\n(%s wards)' % len(df_map[df_map['Price'].isnull()]))
Example #15
0
        thecount += 1
try:
    myDataFrame = pd.DataFrame({"TheData": myArray})
    #La fonction "GetParameterAsText" invite l'utilisateur de nommer le fichier géographique
    #  ("feature class" ou "fc") sur lequel les opérations vont commencer.
    #Ce script utilise "IQH_FINAL" comme le champ des données sur lequel les opérations vont
    #   commencer ("field").
    #La calculation utilise les progiciels arcpy.da (analyse des données) et numpy.
    #Nonobstant que le tableau numérique consiste en nombres entiers, le tableau est
    #   transformé au format de point flottant ("float") parce que le progiciel PySAL
    #   a besoin de cette transformation pour l'intégrer avec la fonction KMEANS.
    #L'iteration "for-if-else" trie les valeurs "null" des vraies données, et après cette
    #   sortation, les données sont transferées en format de cadre des données pandas.

    print "Calcul des Jenks natural breaks..."
    breaks = nb(myDataFrame["TheData"].dropna().values, k=4, initial=20)
    #Le calcul des valeurs Jenks est produit par le progiciel pysal.  Tous les valeurs
    #   "null" sont sortis, et les données qui restent sont préparées pour l'analyse.
    #La paramètre k symbolise le nombre des classes la fonction Jenks va créer pour
    #   l'utilisateur.
    #La paramètre initial est le semence de la fonction Jenks.  Un valeur grand va
    #   converger la fonction plus vite; un valeur petit, d'autre part, va être plus exact.

    print "Vérification s'il y avait calculs précédents des champs de valeurs Jenks..."
    try:
        arcpy.DeleteField_management(fc, "Jenks")
        print "Calculs précédents des champs de valeurs Jenks effacés..."

    except Exception as e:
        print "Aucuns champs des valeurs Jenks trouvés..."
    #Cette iteration "try-except" efface les calculs précédents s'ils existent. Si un champ
        ungent_sample.append(True)
    else:
        ungent_sample.append(False)
    if float(ct_num) in ungent_cts_all:
        ungent_all.append(True)
    else:
        ungent_all.append(False)
df_map['is_gent'] = is_gent
df_map['ungent_sample'] = ungent_sample
df_map['ungent_all'] = ungent_all

# In[39]:

# Calculate Jenks natural breaks for density
breaks = nb(df_map[df_map['num_cafes'].notnull()].num_cafes.values,
            initial=300,
            k=5)
# the notnull method lets us match indices when joining
jb = pd.DataFrame({'jenk_bins': breaks.yb},
                  index=df_map[df_map['num_cafes'].notnull()].index)
df_map = df_map.join(
    jb)  # these are already compleeted. Running 2nd time causes errors
#df_map.jenks_bins.fillna(-1, inplace=True)

# In[40]:

# Calculate Jenks natural breaks for density
breaks2 = nb(df_map[df_map['num_bizs'].notnull()].num_bizs.values,
             initial=10,
             k=5)
# the notnull method lets us match indices when joining
### Binning

# change False to True to use Jenks binning
jenks = True

# specify variable that will be plotted
var_2_analyze = 'state_results'

if jenks == True:
    # Calculate Jenks natural breaks for each polygon
    breaks = nb(
        # set the data to use
        df_map[df_map[var_2_analyze].notnull()][var_2_analyze].values,

        # since this is an optimization function we need to give it a number of initial solutions to find. 
        # you can adjust this number if you are unsatisfied with the bin results
        initial=300,

        # k is the number of natural breaks you would like to apply. I've set it to 10, but you can change.
        k=10)

else:
    # Define my own breaks [even split each 20 percentage points] Note that the bins are the top range so >20, >40, etc
    # you can change the bins to whatever you like, though they should be based on the data you are analyzing
    # since I am going to plot data on a 0 to 100 scale, I chose these break points
    my_bins = [20,40,60,80,100]
    
    # Calculate the user defined breaks for our defined bins
    breaks = mapclassify.User_Defined(
               
            # set the data to use 
Example #18
0
bounds_dataframe.columns = ['MinX', 'MinY', 'MaxX', 'MaxY']
min_x = bounds_dataframe['MinX'].min()
min_y = bounds_dataframe['MinY'].min()
max_x = bounds_dataframe['MaxX'].max()
max_y = bounds_dataframe['MaxY'].max()

lower_point = m(min_x, min_y, inverse=True)
upper_point = m(max_x, max_y, inverse=True)

llcrnrlon = lower_point[0]
llcrnrlat = lower_point[1]
urcrnrlon = upper_point[0]
urcrnrlat = upper_point[1]

breaks = nb(
        df_map[df_map['density_km'].notnull()].density_km.values,
        initial=300,
        k=6)

jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map['density_km'].notnull()].index)
df_map = df_map.join(jb)
df_map.jenks_bins.fillna(-1, inplace=True)

jenks_labels = ["<= %.3f/km$^2$(%s districts)" % (b, c) for b, c in zip(breaks.bins, breaks.counts)]
jenks_labels.insert(0, 'Parking density (%s districts)' % len(df_map[df_map['density_km'].isnull()]))

plt.clf()
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='w', frame_on=False)

# use a blue colour ramp - we'll be converting it to a map using cmap()
cmap = plt.get_cmap('Blues')
    else:
        ungent_sample.append(False)
    if float(ct_num) in ungent_cts_all:
        ungent_all.append(True)
    else:
        ungent_all.append(False)
df_map['is_gent'] = is_gent
df_map['ungent_sample'] = ungent_sample
df_map['ungent_all'] = ungent_all


# In[39]:

# Calculate Jenks natural breaks for density
breaks = nb(
    df_map[df_map['num_cafes'].notnull()].num_cafes.values,
    initial=300,
    k=5)
# the notnull method lets us match indices when joining
jb = pd.DataFrame({'jenk_bins': breaks.yb}, index=df_map[df_map['num_cafes'].notnull()].index)
df_map = df_map.join(jb) # these are already compleeted. Running 2nd time causes errors
#df_map.jenks_bins.fillna(-1, inplace=True)


# In[40]:

# Calculate Jenks natural breaks for density
breaks2 = nb(
    df_map[df_map['num_bizs'].notnull()].num_bizs.values,
    initial=10,
    k=5)
# the notnull method lets us match indices when joining
df_map = pd.DataFrame({
    'poly': [Polygon(xy) for xy in m.Milano],
    'square_id': [square['ID'] for square in m.Milano_info]})
df_map['area_m'] = df_map['poly'].map(lambda x: x.area)
df_map['area_km'] = df_map['area_m'] / 1000000

calls = pd.merge(groupedCalls, df_map, how = 'left', on = 'square_id', sort = False)
calls['density_km'] = calls['callsOut'] / calls['area_km']
calls.replace(to_replace={'density_km': {0: np.nan}}, inplace=True)


# Calculate Jenks natural breaks for density
# Classification scheme for choropleth mapping
cuts = 5
breaks = nb(
    calls[calls['density_km'].notnull()].density_km.values,
    initial = 300, # number of initial solutions to generate
    k = cuts) # number of classes required
# The notnull method lets match indices when joining
jb = pd.DataFrame({'jenks_bins': breaks.yb}, index = calls[calls['density_km'].notnull()].index)
calls = calls.join(jb)

binlevels = range(cuts) + [-1] # Possible levels of the bins


# Create a sensible label for classes
# Show density/square km, as well as the number of squares in the class
jenks_labels = ["<= %0.1f/km$^2$" % (b) for b in breaks.bins]
jenks_labels.insert(0, 'No calls made')

# Sorted list of the 15 min time intervals
times = sorted(list(set(calls['time']))) # 96 time intervals (15 x 4 x 24)
Example #21
0
# could take merge approach  + test with zagaz
pd_df_dpts['dpt_nb_stations'] = np.nan
grouped_dpt = pd_df_master_info.groupby('dpt')
for dpt, group in grouped_dpt:
  pd_df_dpts['dpt_nb_stations'].ix[dpt] = len(group)

# density (different definitions)
pd_df_dpts['density_area'] = pd_df_dpts['dpt_nb_stations'] / pd_df_dpts['area']
pd_df_dpts['density_pop'] = pd_df_dpts['dpt_nb_stations'] /\
                              pd_df_dpts['Population municipale 2007 POP_MUN_2007']

for density_field in ('density_area', 'density_pop'):
  # Easier to work with NaN values when classifying
  pd_df_dpts.replace(to_replace={density_field: {0: np.nan}}, inplace=True)
  # Calculate Jenks natural breaks for density
  breaks = nb(pd_df_dpts[pd_df_dpts[density_field].notnull()][density_field].values, initial=300, k=5)
  # The notnull method lets us match indices when joining
  jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=pd_df_dpts[pd_df_dpts[density_field].notnull()].index)
  pd_df_dpts = pd_df_dpts.join(jb)
  pd_df_dpts.jenks_bins.fillna(-1, inplace=True)
  
  fig, ax = plt.subplots()
  m_france.drawcountries()
  m_france.drawcoastlines()
  pd_df_dpts['patches'] = pd_df_dpts['poly'].map(lambda x: PolygonPatch(x,
                                                                        fc='#555555',
                                                                        ec='#787878', 
                                                                        lw=.25, 
                                                                        alpha=.9,
                                                                        zorder=4))
  cmap = plt.get_cmap('Blues')
Example #22
0
def mP_data(flnm, colName, df, imp = None):
    num_colors = 10

    if imp is None:
        import pandas as pd
        import matplotlib.pyplot as plt
        import matplotlib.colors as colors
        from shapely.geometry import Point, Polygon, MultiPoint, MultiPolygon
        from pysal.esda.mapclassify import Natural_Breaks as nb
        from matplotlib.collections import PatchCollection
        from descartes import PolygonPatch
        import fiona
        from itertools import chain

    shp = fiona.open(flnm+'.shp')
    bds = shp.bounds
    extra = 0.02

    if 'units' in shp.crs and shp.crs['units'] == 'm':
        print 'Unit is meters, converting boundaries'
        conv = Basemap()
        ll = conv(bds[0],bds[1],inverse=True)
        ur = conv(bds[2],bds[3],inverse=True)
        print shp.crs
    else:
        ll = (bds[0], bds[1])
        ur = (bds[2], bds[3])

#    shp.close()
    coords = list(chain(ll, ur))

    w, h = coords[2] - coords[0], coords[3] - coords[1]
#    print coords; print extra

# Check proj4, .prj file...
    m = Basemap(
        projection='tmerc',
        lon_0=-2.,
        lat_0=49.,
        ellps = 'WGS84',
        llcrnrlon=coords[0] - extra * w,
        llcrnrlat=coords[1] - extra + 0.01 * h,
        urcrnrlon=coords[2] + extra * w,
        urcrnrlat=coords[3] + extra + 0.01 * h,
        lat_ts=0,
        resolution='i',
        suppress_ticks=False)

    m.readshapefile(
        flnm,
        'map',
        color='none',
        zorder=2)

# Setup a dataframe that imports the dictionary of map properties and then
# selects rows corresponding to the imported dataframe df
    temp_df = pd.DataFrame()

    for dicti in m.map_info:
        temp_df = temp_df.append(pd.Series(dicti),ignore_index=True)
#    print temp_df; quit()

    i1 = temp_df.set_index('label').index
    i2 = df.set_index('Sector').index

    temp_df = temp_df[i1.isin(i2)]

    # set up a map dataframe
    df_map = pd.DataFrame({'poly': [Polygon(xy) for xy in m.map]})

    df_map['area_m'] = df_map['poly'].map(lambda x: x.area)

#Select only the part that corresonds to the imported dataframe of data
    df_map = pd.concat([df_map, temp_df], axis=1, join='inner')

    df_map['area_km'] = df_map['area_m'] / 10000.

    if len(df_map.index) == len(df.index):
        print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
        print '!! WARNING : SHAPE OF DATAFRAMES NOT CONSISTENT !!'
        print '!! --- check: df_map  and df in mapDataPlot --- !!'
        print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'

# Merge dataframes
    df_map = pd.merge(left=df_map, right=df, left_on='label', right_on='Sector',
                      how='inner')

    print '... built map frame ...'
## Calculates Jenks natural breaks, over notnull column

    prices = df_map[df_map[colName].notnull()][colName].tolist()

    breaks = nb ( prices,
        initial=250, #number of initial solutions in iteriative Jenks algo
        k=num_colors )

    print 'Calculting Jenks Natural breaks for binning'
    jenbin = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map[colName].notnull()].index)
    df_map = df_map.join(jenbin)
    df_map.jenks_bins.fillna(-1, inplace=True)

    # draw ward patches from polygons

    print 'Building Patches'

    df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(
        x,
        fc='0.33',
        edgecolor='black', lw=.33,
        alpha=.9))

    print 'Last touches of color, with Jenks'

# Set colors using Jenks breaks
## Setup ColorMap
    colorm = plt.get_cmap('bwr')

    norm = colors.Normalize()
    pc = PatchCollection(df_map['patches'], match_original=True)

    pc.set_facecolor(colorm(norm(df_map['jenks_bins'].values)))

    print 'labels and scale'

    # Prepare the plt plot and axes
    plt.clf()
    fig = plt.figure()
    ax = fig.add_subplot(111, fc='w', frame_on=False)

    # Add a colour bar
    cb = colorbar_index(num_colors, colorm, shrink=0.5)
    cb.ax.tick_params(labelsize=6)

    print "get_bounds"

    newcoords = get_bounds(m,df_map)

    print newcoords
    # DOES'T WORK, DOES NOT ACCEPT OSGB COORDS.
    # m.drawmapscale(
    #     newcoords[0], newcoords[1],
    #     coords[0], coords[1],
    #     10.,
    #     barstyle='fancy', labelstyle='simple',
    #     fillcolor1='w', fillcolor2='#555555',
    #     fontcolor='#555555',
    #     zorder=5)

    #Primitive Scale
    legendy  = newcoords[2]*0.98
    legendx0 = newcoords[0]*0.98
    leglength =np.floor((newcoords[1]-newcoords[0])/4/1000)

    legendx1 = newcoords[0]*0.98+leglength*1000.

    ax.plot([legendx0,legendx1],[legendy, legendy],  'k-', lw=2)
    ax.text(legendx0,legendy*0.96,'0')
    ax.text(legendx1,legendy*0.96,str(int(leglength)))

    ax.add_collection(pc)

    print "axes and plotting!"

    ax.axis('auto'); ax.axis('off')
    #set aspect ratio to latitude-longitude read
    ax.set_aspect( (newcoords[1]-newcoords[0]) / (newcoords[3]-newcoords[2]) )
    plt.show()

    return;
Example #23
0
    def computeTransportNaturalBreaks(self, rawtransfile):
        print("Computing Natural breaks on Transport..")

        myDataDownloader = DataHelper.DataDownloader()
        localfile = myDataDownloader.downloadFiles([config.settings['aoi']])

        with fiona.open(localfile, "r") as aoi:
            geoms = [feature["geometry"] for feature in aoi]

        classfiedtranstmppath = os.path.join(self.cwd,config.settings['outputdirectory'],'tmp','classified-transport.tiff')
            
        with rasterio.open(rawtransfile) as src:
            profile = src.profile
            bands = src.read()
            for band in bands:
                b = band[(band != np.array(None)) & (np.logical_not(np.isnan(band))) ]
                breaks = nb(b.ravel(),k=4,initial=1)
                bins = breaks.bins.tolist()
        
        # bins.insert(1,-1) # add -1 to the beginning of the breaks
        # print bins
        print("Writing new Transport with Natural break classes..")
        with rasterio.open(rawtransfile) as src:
            profile = src.profile
            bands = src.read(masked=True)
            for band in bands: 

                for x in np.nditer(band, op_flags=['readwrite']):
                    x[...] = np.digitize(x,bins)

                # Reproject and write each band

            with rasterio.open(classfiedtranstmppath, 'w', **profile) as dst:
                dst.write(bands)

        classfiedtranspath = os.path.join(self.cwd,config.settings['outputdirectory'],'classified-transport.tiff')
            
        print("Cropping Transport..")
        with rasterio.open(classfiedtranstmppath) as trans_src:
            trans_out_image, trans_out_transform = mask(trans_src, geoms, crop=True)
            trans_out_meta = trans_src.meta.copy()
            trans_out_meta.update({"driver": "GTiff",
                             "height": trans_out_image.shape[1],
                             "width": trans_out_image.shape[2],
                             "transform": trans_out_transform})

        with rasterio.open(classfiedtranspath, "w", **trans_out_meta) as trans_dest:
            trans_dest.write(trans_out_image)

        TransClassification = dict([(1,2),(2,3),(3,1),(4,1)])

        print("Reclassing Transport file..")

        finaltransevalpath = os.path.join(self.cwd,config.settings['outputdirectory'],'evals','TRANS', 'TRANS.tiff')
            
        with rasterio.open(classfiedtranspath) as transnogdhsrc:
            classifiedprofile = transnogdhsrc.profile
            classifiedbands = transnogdhsrc.read()
            classifiedbands1 = np.vectorize(TransClassification.get)(classifiedbands)
            classifiedbands2 = classifiedbands1.astype(np.float32)

            with rasterio.open(finaltransevalpath, 'w', **classifiedprofile) as classifieddst:
                classifieddst.write(classifiedbands2)
        print("Reclassing completed")
        print("...")
    def GenerateMap(self, inputFile):

        start = time.time()

        #Create file which hold statistics for each inputFile (containing mean/median travel times, std, min, max etc.)
        self.statistics = self.createStatistics()
        self.basename = os.path.basename(inputFile)[:-4]
        AttributeParameter = self.A
        coords = self.coords

        #Format figure
        plt.clf()
        fig = plt.figure()

        #Picture frame for Map
        gs = gridspec.GridSpec(12, 12)
        ax = plt.subplot(gs[:,:],axisbg='w', frame_on=False)

        try:
            #Read MetropAccess-matka-aikamatriisi data in
            MatrixData = pd.read_csv(inputFile, sep=';')

            #Join data to shapefile (pandas 'merge' function)
            df_map = pd.merge(left=self.Y, right=MatrixData, how='outer', left_on='YKR_ID', right_on='from_id')

            #CLASSIFY MATRIX DATA
            #Replace -1 values
            df_map.replace(to_replace={AttributeParameter: {-1: np.nan}}, inplace=True)

            #Data for histogram
            histData = pd.DataFrame(df_map[df_map[AttributeParameter].notnull()][AttributeParameter].values)

            maxBin = max(df_map[df_map[AttributeParameter].notnull()][AttributeParameter].values) #.AttributeParameter.values)
            NoData = int(maxBin+1)
            NullCount = len(df_map[df_map[AttributeParameter].isnull()])
            NullP = (NullCount/13230.0)*100

            #Fill NoData values with maxBin+1 value
            df_map[AttributeParameter].fillna(NoData, inplace=True)

            #Manual classification
            if not self.Cl in ['Natural Breaks', 'Quantiles', "Fisher's Jenks"]:
                #Create bins for classification based on chosen classification method
                Manual = True

                if "time" in AttributeParameter:
                    measure = "min"
                    measure2 = "minutes" #Another string-form for summary
                    titleMeas = "time"

                    if self.Cl == "10 Minute Equal Intervals":

                        #Calculate the highest class (10 minutes * Number of classes)
                        maxClass = 10*self.Nclasses

                        #Create 'higher than' info for the colorbar
                        maxClassInfo = str(maxClass-10)

                        #Create array of bins from 0 to highest class with increments of 10
                        bins = np.arange(10, maxClass, 10)

                        #Add extra classes for No Data and higher than maxClass values
                        if maxBin < maxClass:
                            bins = list(np.append(bins, [maxClass+1, maxClass+2]))
                        else:
                            bins = list(np.append(bins, [maxBin, maxBin+1]))

                    elif self.Cl == "5 Minute Equal Intervals":

                        #Calculate the highest class (10 minutes * Number of classes)
                        maxClass = 5*self.Nclasses

                        #Create 'higher than' info for the colorbar
                        maxClassInfo = str(maxClass-5)

                        #Create array of bins from 0 to highest class with increments of 5
                        bins = np.arange(5, maxClass, 5)

                        #Add extra classes for No Data and higher than maxClass values
                        if maxBin < maxClass:
                            bins = list(np.append(bins, [maxClass+1, maxClass+2]))
                        else:
                            bins = list(np.append(bins, [maxBin, maxBin+1]))

                elif "dist" in AttributeParameter:

                    measure = "km"
                    measure2 = "kilometers"
                    titleMeas = "distance"

                    if self.Cl == "5 Km Equal Intervals":

                        #Calculate the highest class (5000 meters * Number of classes)
                        maxClass = 5000*self.Nclasses

                        #Create 'higher than' info for the colorbar
                        maxClassInfo = str((maxClass-5000)/1000)

                        #Create array of bins from 0 to highest class with increments of 5000 (meters)
                        bins = np.arange(5000, maxClass, 5000)

                        #Add extra classes for No Data and higher than maxClass values
                        if maxBin < maxClass:
                            bins = list(np.append(bins, [maxClass+1, maxClass+2]))
                        else:
                            bins = list(np.append(bins, [maxBin, maxBin+1]))

                    elif self.Cl == "10 Km Equal Intervals":

                        #Calculate the highest class (5000 meters * Number of classes)
                        maxClass = 10000*self.Nclasses

                        #Create 'higher than' info for the colorbar
                        maxClassInfo = str((maxClass-10000)/1000)

                        #Create array of bins from 0 to highest class with increments of 5000 (meters)
                        bins = np.arange(0, maxClass, 10000)

                        #Add extra classes for No Data and higher than maxClass values
                        if maxBin < maxClass:
                            bins = list(np.append(bins, [maxClass+1, maxClass+2]))
                        else:
                            bins = list(np.append(bins, [maxBin, maxBin+1]))

                #Classify data based on bins
                breaks = mc.User_Defined(df_map[df_map[AttributeParameter].notnull()][AttributeParameter], bins)

            else:
                Manual = False

                if self.Cl == 'Natural Breaks':
                    breaks = nb(df_map[df_map[AttributeParameter].notnull()][AttributeParameter],initial=100, k=self.Nclasses)
                elif self.Cl == 'Quantiles':
                    breaks = Quantiles(df_map[df_map[AttributeParameter].notnull()][AttributeParameter], k=self.Nclasses)
                elif self.Cl == "Fisher's Jenks":
                    breaks = fj(df_map[df_map[AttributeParameter].notnull()][AttributeParameter], k=self.Nclasses)

                bins = list(breaks.bins)

                if "time" in AttributeParameter:
                    measure = "min"
                    measure2 = "minutes" #Another string-form for summary
                    titleMeas = "time"
                    maxClassInfo = str(bins[-2])
                else:
                    measure = "km"
                    measure2 = "kilometers"
                    titleMeas = "distance"
                    maxClassInfo = str(bins[-2]/1000)

                bins.append(maxBin)
                bins.append(maxBin)


            #the notnull method lets us match indices when joining
            jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map[AttributeParameter].notnull()].index)
            df_map = df_map.join(jb)

            brksBins = bins[:-1]#breaks.bins[:-1] #Do not take into account NoData values

            if measure2 == "kilometers": #Convert meters (in data) to kilometers for legend
                b = [round((x/1000),0) for x in brksBins]
                brksBins = b
                del b

            brksCounts = breaks.counts[:-1] #Do not take into account NoData values

            #Check if brksCounts and brksBins dismatches --> insert 0 values if necessary (to match the counts)
            if len(brksBins) != len(brksCounts):
                dif = len(brksBins)-len(brksCounts)
                brksCounts = np.append(brksCounts,[0 for x in xrange(dif)])
            else:
                dif=0

            #List for measures which will be inserted to class labels
            measureList = [measure for x in xrange(len(brksBins))]

            #Class labels
            jenks_labels = ["%0.0f %s (%0.1f %%)" % (b, msr, (c/13230.0)*100) for b, msr, c in zip(brksBins[:-1],measureList[:-1],brksCounts[:-1])]

            if Manual == True:
                if "dist" in AttributeParameter:
                    jenks_labels.insert(int(maxBin), '>' + maxClassInfo +' km (%0.1f %%)' % ((brksCounts[-1]/13230.0)*100))
                else:
                    jenks_labels.insert(int(maxBin), '>'+ maxClassInfo +' min (%0.1f %%)' % ((brksCounts[-1]/13230.0)*100))

            jenks_labels.insert(NoData, 'NoData (%0.1f %%)' % (NullP))

            #Use modified colormap ('my_colormap') - Choose here the default colormap which is used as a startpoint --> cm.YourColor'sName (eg. cm.Blues) - See available Colormaps: http://matplotlib.org/examples/color/colormaps_reference.html
            cmap = self.my_colormap(cm.RdYlBu, len(bins))

            #Draw grid with grey outlines
            df_map['Grid'] = df_map['poly'].map(lambda x: PolygonPatch(x, ec='#555555', lw=.2, alpha=1., zorder=4)) #RGB color-codes can be found at http://www.rapidtables.com/web/color/RGB_Color.htm
            pc = PatchCollection(df_map['Grid'], match_original=True)

            #-----------------------------
            #Reclassify data to value range 0.0-1.0 (--> colorRange is 0.0-1.0)
            if Manual == True:

                colbins = np.linspace(0.0,1.0, len(bins))
                colbins = colbins-0.001
                colbins[0], colbins[-1] = 0.0001, 1.0

                reclassification = {}
                for index in range(len(bins)):
                    reclassification[index] = colbins[index]

                reclassification['_reclassify'] = self.reclassify

                reclass = []
                dataList = list(df_map['jenks_bins'])

                for value in dataList:
                    reclass.append(self.reclassify(reclassification, value))

                df_map['jenks_binsR'] = reclass
            else:
                norm = Normalize()
                df_map['jenks_binsR'] = norm(df_map['jenks_bins'].values)

            #-----------------------------

            #Impose colour map onto the patch collection
            pc.set_facecolor(cmap(df_map['jenks_binsR'].values))

            #Add colored Grid to map
            ax.add_collection(pc)

            #Add coastline to the map
            self.C['Polys'] = self.C['poly'].map(lambda x: PolygonPatch(x, fc='#606060', ec='#555555', lw=.25, alpha=.88, zorder=4)) #Alpha adjusts transparency, fc='facecolor', ec='edgecolor'
            cpc = PatchCollection(self.C['Polys'], match_original=True)
            ax.add_collection(cpc)

            #Add roads to the map
            for feature in self.R:
                xx,yy=feature.xy
                self.B.plot(xx,yy, linestyle='solid', color='#606060', linewidth=0.7, alpha=.6)

            #Add metro to the map
            for line in self.M: #metroLines is a shapely MultiLineString object consisting of multiple lines (is iterable)
                x,y=line.xy
                self.B.plot(x,y, color='#FF2F2F', linewidth=0.65, alpha=.4)

            #----------------------
            #GENERATE TARGET POINT
            #----------------------
            #Generate YKR_ID from csv name
            ykrID = int(self.basename.split('_')[2])

            #Find index of target YKR_ID
            tIndex = df_map.YKR_ID[df_map.YKR_ID == ykrID].index.tolist()
            trow = df_map[tIndex[0]:tIndex[0]+1]
            targetPolygon = trow.poly
            centroid = targetPolygon.values[0].centroid #Get centroid of the polygon --> Returns shapely polygon point-type object

            self.B.plot(
                centroid.x,centroid.y,
                'go', markersize=3, label="= Destination")

            #-----------------------------
            #LEGEND
            #-----------------------------

            #Draw a map scale
            self.B.drawmapscale(
                coords[0] + 0.47, coords[1] + 0.013, #Etäisyys vasemmalta, etäisyys alhaalta: plussataan koordinaatteihin asteissa
                coords[0], coords[1],
                #10.,
                10.,
                barstyle='fancy', labelstyle='simple',yoffset=200, #yoffset determines the height of the mapscale
                fillcolor1='w', fillcolor2='#909090', fontsize=6,  # black= #000000
                fontcolor='#202020',
                zorder=5)

            #Set up title
            if "PT" in AttributeParameter:
                tMode = "public transportation"
            elif "Car" in AttributeParameter:
                tMode = "car"
            elif "Walk" in AttributeParameter:
                tMode = "walking"

            titleText = "Travel %s to %s (YKR-ID) \n by %s" % (titleMeas,str(ykrID),tMode)
            plt.figtext(.852,.735,
                        titleText, size=9.5)


            #Plot copyright texts
            copyr = "%s MetropAccess project, University of Helsinki, 2014\nLicensed under a Creative Commons Attribution 4.0 International License" % (unichr(0xa9))

            plt.figtext(.24,.078,copyr,fontsize=4.5)

            #----------------
            #Add a colour bar
            #----------------

            #Set arbitary location (and size) for the colorbar
            axColor = plt.axes([.86, .15, .016,.52]) #([DistFromLeft, DistFromBottom, Width, Height])

            cb = self.colorbar_index(ncolors=len(jenks_labels), cmap=cmap, labels=jenks_labels, cax=axColor)#, shrink=0.5)#, orientation="vertical", pad=0.05,aspect=20)#,cax=cbaxes) #This is a function --> see at the beginning of the code. #, cax=cbaxes shrink=0.5,
            cb.ax.tick_params(labelsize=5.5)

            #Inform travel sum of the whole grid (i.e. centrality of the location)
            #Travel time
            if measure2 == "minutes":
                tMean = histData.mean().values[0]
                tMedian=histData.median().values[0]
                tMax = histData.max().values[0]
                tMin = histData.min().values[0]
                tStd=histData.std().values[0]
                travelSummary = "Summary:"
                travelMean = "Mean: %0.0f %s" % (tMean, measure2)
                travelMedian = "Median: %0.0f %s" % (tMedian, measure2)
                travelStd = "Std: %0.0f %s" % (tStd,measure2)
                travelRange = "Range: %0.0f-%0.0f %s" % (tMin,tMax,measure2)

            #Travel distance
            else:
                h = histData.values/1000
                histData = pd.DataFrame(h)
                del h
                tMean = histData.mean().values[0]
                tMedian=histData.median().values[0]
                tMax = histData.max().values[0]
                tMin = histData.min().values[0]
                tStd=histData.std().values[0]
                travelSummary = "Summary:"
                travelMean = "Mean: %0.1f %s" % (tMean, measure2)
                travelMedian = "Median: %0.1f %s" % (tMedian, measure2)
                travelStd = "Std: %0.1f %s" % (tStd,measure2)
                travelRange = "Range: %0.1f-%0.1f %s" % (tMin,tMax,measure2)


            #Write information to a statistics file
            mInfo = "%s;%0.0f;%0.0f;%0.0f;%0.0f;%0.0f\n" % ( str(ykrID), tMean, tMedian, tStd, tMin, tMax)
            self.writeStatistics(mInfo)

            #Helper variables for moving Summary statistic texts
            initialPos = .58 #.15  #.44
            initialXPos = .975 #.20 #.97
            textSize = 5.25
            split = 0.018

            #Plot Travel Summary title
            plt.figtext(initialXPos, initialPos+split*4,
                       travelSummary, ha='left', va='bottom', color='#404040', size=textSize, style='normal',fontweight='bold')

            #Plot Travel Summary mean
            plt.figtext(initialXPos, initialPos+split*3,
                       travelMean,ha='left', va='bottom', size=textSize, color='b')

            #Plot Travel Summary median
            plt.figtext(initialXPos, initialPos+split*2,
                       travelMedian,ha='left', va='bottom', size=textSize, color='r')

            #Plot Travel Summary Standard deviation
            plt.figtext(initialXPos, initialPos+split,
                       travelStd,ha='left', va='bottom', size=textSize)

            #Plot Travel Summary Range
            plt.figtext(initialXPos, initialPos,
                       travelRange,ha='left', va='bottom', size=textSize)

            #Plot Legend symbol
            ax.legend(bbox_to_anchor=(.97, 0.07), fontsize=5.5, frameon=False, numpoints=1) #1.265     bbox_to_anchor=(x,y)  --> arbitary location for legend, more info: http://matplotlib.org/api/legend_api.html

            #--------------------------------------------------------
            #Travel time and population (catchment areas) histograms
            #--------------------------------------------------------

            #New axes for travel time/distance histogram
            ax = plt.axes([.98, .39, .16, .14], axisbg='w') #([DistFromLeft, DistFromBottom, Width, Height])

            #Add histogram
            n, bins, patches = ax.hist(histData.values, 100, normed=False, facecolor='green', alpha=0.75, rwidth=0.5, orientation="vertical")
            ax.axvline(histData.median(), color='r', linestyle='solid', linewidth=1.8)
            ax.axvline(histData.mean(), color='b', linestyle='solid', linewidth=1.0)

            if measure2 == "minutes":
                ax.set_xlabel("t(min)", fontsize=5,labelpad=1.5)
                xupLim = 250 #upper limit for x-axis
            else:
                ax.set_xlabel("km", fontsize=5,labelpad=1.5)
                xupLim = 100 #upper limit for x-axis

            #Set valuelimits for axes
            ax.set_xlim(0,xupLim-30)

            if max(n) < 1000: #ymax will be set to 1000 if count of individual bin is under 1000, else 1500
                yMax = 1000
            else:
                yMax = 1600

            ax.set_ylim(0,yMax)

            #Set histogram title
            plt.figtext(.975, .535,
                        "Travel %s histogram" % titleMeas,ha='left', va='bottom', size=5.7, style='italic')

            #Adjust tick font sizes and set yaxis to right
            ax.tick_params(axis='both', direction="out",labelsize=4.5, pad=1,
                           labelright=True,labelleft=False, top=False, left=False,
                           color='k', length=3, width=.9)

            ax.xaxis.set_ticks(np.arange(0,xupLim-30,30))

            gridlines = ax.get_xgridlines()
            gridlines.extend( ax.get_ygridlines() )

            for line in gridlines:
                line.set_linewidth(.28)
                line.set_linestyle('dotted')

            ax.grid(True)

            #----------------------------------------------------
            #New axes for population diagram

            ax = plt.axes([.98, .17, .16, .14], axisbg='w') #([DistFromLeft, DistFromBottom, Width, Height])

            #Make dataframe from Ykr-population
            pop = pd.read_csv(self.Ypop, sep=';')

            #Use original Matrix without NoData values
            MatrixData.replace(to_replace={AttributeParameter: {-1: np.nan}}, inplace=True)

            #Join population information and time matrix
            join = pd.merge(left=MatrixData, right=pop, how='outer', left_on='from_id', right_on='YKR_ID')

            #Sort data by attribute parameter
            sorted = join.sort(columns=[AttributeParameter])

            #Aggregate data by AttributeParameter
            aggre = pd.DataFrame(sorted.groupby(AttributeParameter).sum().Population)

            #Create attribute from index
            aggre[AttributeParameter] = aggre.index

            #Create cumulative population attribute
            aggre['cumPop'] = aggre['Population'].cumsum()

            #Reset index and determine AttributeParameter as float (matplotlib requires for it to work)
            aggre.reset_index(inplace=True, drop=True)
            aggre[AttributeParameter].astype(float)

            #print aggre[0:10]

            #Create filled curve plot from the cumulative population
            ax.fill_between(aggre.index,aggre['cumPop']/1000,0, interpolate=True, lw=1, facecolor='green', alpha=0.6)

            #Set valuelimits for axes
            ax.set_xlim(0,xupLim-50)
            ax.set_ylim(-10,aggre['cumPop'].max()/1000+50)


            gridlines = ax.get_xgridlines()
            gridlines.extend( ax.get_ygridlines() )

            for line in gridlines:
                line.set_linewidth(.28)
                line.set_linestyle('dotted')

            ax.grid(True)
            ax.tick_params(axis='both', direction="out",labelsize=4.5, pad=1,
                                        labelright=True,labelleft=False, top=False, left=False,
                                        color='k', length=3, width=.9)
            ax.xaxis.set_ticks(np.arange(0,xupLim-30,30))

            if measure2 == "minutes":
                ax.set_xlabel("t(min)", fontsize=5,labelpad=1.5)
                measure3 = 'minutes'
            else:
                measure3 = 'km'
                ax.set_xlabel("km", fontsize=5,labelpad=1.5)

            #Set histogram title
            plt.figtext(.975, .315,
                        "Population (per 1000) reached within (x) %s" % measure3,ha='left', va='bottom', size=5.7, style='italic')

            #-----------------------
            #Save map to disk
            #-----------------------

            fig.set_size_inches(9.22, 6.35) #(Width, Height)

            outputPath = os.path.join(self.outputFolder, self.basename) + AttributeParameter + ".png"

            plt.savefig(outputPath, dpi=300, alpha=True, bbox_inches='tight')
            plt.close() #or plt.close('all') --> closes all figure windows

            end = time.time()
            lasted = int(end-start)
            return lasted

        except Exception as e:
            return e
    def GenerateMap(self, inputFile):

        start = time.time()

        #Create file which hold statistics for each inputFile (containing mean/median travel times, std, min, max etc.)
        self.statistics = self.createStatistics()
        self.basename = os.path.basename(inputFile)[:-4]
        AttributeParameter = self.A
        coords = self.coords

        #Format figure
        plt.clf()
        fig = plt.figure()

        #Picture frame for Map
        gs = gridspec.GridSpec(12, 12)
        ax = plt.subplot(gs[:, :], axisbg='w', frame_on=False)

        try:
            #Read MetropAccess-matka-aikamatriisi data in
            MatrixData = pd.read_csv(inputFile, sep=';')

            #Join data to shapefile (pandas 'merge' function)
            df_map = pd.merge(left=self.Y,
                              right=MatrixData,
                              how='outer',
                              left_on='YKR_ID',
                              right_on='from_id')

            #CLASSIFY MATRIX DATA
            #Replace -1 values
            df_map.replace(to_replace={AttributeParameter: {
                -1: np.nan
            }},
                           inplace=True)

            #Data for histogram
            histData = pd.DataFrame(
                df_map[df_map[AttributeParameter].notnull()]
                [AttributeParameter].values)

            maxBin = max(
                df_map[df_map[AttributeParameter].notnull()]
                [AttributeParameter].values)  #.AttributeParameter.values)
            NoData = int(maxBin + 1)
            NullCount = len(df_map[df_map[AttributeParameter].isnull()])
            NullP = (NullCount / 13230.0) * 100

            #Fill NoData values with maxBin+1 value
            df_map[AttributeParameter].fillna(NoData, inplace=True)

            #Manual classification
            if not self.Cl in [
                    'Natural Breaks', 'Quantiles', "Fisher's Jenks"
            ]:
                #Create bins for classification based on chosen classification method
                Manual = True

                if "time" in AttributeParameter:
                    measure = "min"
                    measure2 = "minutes"  #Another string-form for summary
                    titleMeas = "time"

                    if self.Cl == "10 Minute Equal Intervals":

                        #Calculate the highest class (10 minutes * Number of classes)
                        maxClass = 10 * self.Nclasses

                        #Create 'higher than' info for the colorbar
                        maxClassInfo = str(maxClass - 10)

                        #Create array of bins from 0 to highest class with increments of 10
                        bins = np.arange(10, maxClass, 10)

                        #Add extra classes for No Data and higher than maxClass values
                        if maxBin < maxClass:
                            bins = list(
                                np.append(bins, [maxClass + 1, maxClass + 2]))
                        else:
                            bins = list(np.append(bins, [maxBin, maxBin + 1]))

                    elif self.Cl == "5 Minute Equal Intervals":

                        #Calculate the highest class (10 minutes * Number of classes)
                        maxClass = 5 * self.Nclasses

                        #Create 'higher than' info for the colorbar
                        maxClassInfo = str(maxClass - 5)

                        #Create array of bins from 0 to highest class with increments of 5
                        bins = np.arange(5, maxClass, 5)

                        #Add extra classes for No Data and higher than maxClass values
                        if maxBin < maxClass:
                            bins = list(
                                np.append(bins, [maxClass + 1, maxClass + 2]))
                        else:
                            bins = list(np.append(bins, [maxBin, maxBin + 1]))

                elif "dist" in AttributeParameter:

                    measure = "km"
                    measure2 = "kilometers"
                    titleMeas = "distance"

                    if self.Cl == "5 Km Equal Intervals":

                        #Calculate the highest class (5000 meters * Number of classes)
                        maxClass = 5000 * self.Nclasses

                        #Create 'higher than' info for the colorbar
                        maxClassInfo = str((maxClass - 5000) / 1000)

                        #Create array of bins from 0 to highest class with increments of 5000 (meters)
                        bins = np.arange(5000, maxClass, 5000)

                        #Add extra classes for No Data and higher than maxClass values
                        if maxBin < maxClass:
                            bins = list(
                                np.append(bins, [maxClass + 1, maxClass + 2]))
                        else:
                            bins = list(np.append(bins, [maxBin, maxBin + 1]))

                    elif self.Cl == "10 Km Equal Intervals":

                        #Calculate the highest class (5000 meters * Number of classes)
                        maxClass = 10000 * self.Nclasses

                        #Create 'higher than' info for the colorbar
                        maxClassInfo = str((maxClass - 10000) / 1000)

                        #Create array of bins from 0 to highest class with increments of 5000 (meters)
                        bins = np.arange(0, maxClass, 10000)

                        #Add extra classes for No Data and higher than maxClass values
                        if maxBin < maxClass:
                            bins = list(
                                np.append(bins, [maxClass + 1, maxClass + 2]))
                        else:
                            bins = list(np.append(bins, [maxBin, maxBin + 1]))

                #Classify data based on bins
                breaks = mc.User_Defined(
                    df_map[df_map[AttributeParameter].notnull()]
                    [AttributeParameter], bins)

            else:
                Manual = False

                if self.Cl == 'Natural Breaks':
                    breaks = nb(df_map[df_map[AttributeParameter].notnull()]
                                [AttributeParameter],
                                initial=100,
                                k=self.Nclasses)
                elif self.Cl == 'Quantiles':
                    breaks = Quantiles(
                        df_map[df_map[AttributeParameter].notnull()]
                        [AttributeParameter],
                        k=self.Nclasses)
                elif self.Cl == "Fisher's Jenks":
                    breaks = fj(df_map[df_map[AttributeParameter].notnull()]
                                [AttributeParameter],
                                k=self.Nclasses)

                bins = list(breaks.bins)

                if "time" in AttributeParameter:
                    measure = "min"
                    measure2 = "minutes"  #Another string-form for summary
                    titleMeas = "time"
                    maxClassInfo = str(bins[-2])
                else:
                    measure = "km"
                    measure2 = "kilometers"
                    titleMeas = "distance"
                    maxClassInfo = str(bins[-2] / 1000)

                bins.append(maxBin)
                bins.append(maxBin)

            #the notnull method lets us match indices when joining
            jb = pd.DataFrame(
                {'jenks_bins': breaks.yb},
                index=df_map[df_map[AttributeParameter].notnull()].index)
            df_map = df_map.join(jb)

            brksBins = bins[:
                            -1]  #breaks.bins[:-1] #Do not take into account NoData values

            if measure2 == "kilometers":  #Convert meters (in data) to kilometers for legend
                b = [round((x / 1000), 0) for x in brksBins]
                brksBins = b
                del b

            brksCounts = breaks.counts[:
                                       -1]  #Do not take into account NoData values

            #Check if brksCounts and brksBins dismatches --> insert 0 values if necessary (to match the counts)
            if len(brksBins) != len(brksCounts):
                dif = len(brksBins) - len(brksCounts)
                brksCounts = np.append(brksCounts, [0 for x in xrange(dif)])
            else:
                dif = 0

            #List for measures which will be inserted to class labels
            measureList = [measure for x in xrange(len(brksBins))]

            #Class labels
            jenks_labels = [
                "%0.0f %s (%0.1f %%)" % (b, msr, (c / 13230.0) * 100) for b,
                msr, c in zip(brksBins[:-1], measureList[:-1], brksCounts[:-1])
            ]

            if Manual == True:
                if "dist" in AttributeParameter:
                    jenks_labels.insert(
                        int(maxBin), '>' + maxClassInfo + ' km (%0.1f %%)' %
                        ((brksCounts[-1] / 13230.0) * 100))
                else:
                    jenks_labels.insert(
                        int(maxBin), '>' + maxClassInfo + ' min (%0.1f %%)' %
                        ((brksCounts[-1] / 13230.0) * 100))

            jenks_labels.insert(NoData, 'NoData (%0.1f %%)' % (NullP))

            #Use modified colormap ('my_colormap') - Choose here the default colormap which is used as a startpoint --> cm.YourColor'sName (eg. cm.Blues) - See available Colormaps: http://matplotlib.org/examples/color/colormaps_reference.html
            cmap = self.my_colormap(cm.RdYlBu, len(bins))

            #Draw grid with grey outlines
            df_map['Grid'] = df_map['poly'].map(
                lambda x: PolygonPatch(
                    x, ec='#555555', lw=.2, alpha=1., zorder=4)
            )  #RGB color-codes can be found at http://www.rapidtables.com/web/color/RGB_Color.htm
            pc = PatchCollection(df_map['Grid'], match_original=True)

            #-----------------------------
            #Reclassify data to value range 0.0-1.0 (--> colorRange is 0.0-1.0)
            if Manual == True:

                colbins = np.linspace(0.0, 1.0, len(bins))
                colbins = colbins - 0.001
                colbins[0], colbins[-1] = 0.0001, 1.0

                reclassification = {}
                for index in range(len(bins)):
                    reclassification[index] = colbins[index]

                reclassification['_reclassify'] = self.reclassify

                reclass = []
                dataList = list(df_map['jenks_bins'])

                for value in dataList:
                    reclass.append(self.reclassify(reclassification, value))

                df_map['jenks_binsR'] = reclass
            else:
                norm = Normalize()
                df_map['jenks_binsR'] = norm(df_map['jenks_bins'].values)

            #-----------------------------

            #Impose colour map onto the patch collection
            pc.set_facecolor(cmap(df_map['jenks_binsR'].values))

            #Add colored Grid to map
            ax.add_collection(pc)

            #Add coastline to the map
            self.C['Polys'] = self.C['poly'].map(lambda x: PolygonPatch(
                x, fc='#606060', ec='#555555', lw=.25, alpha=.88, zorder=4
            ))  #Alpha adjusts transparency, fc='facecolor', ec='edgecolor'
            cpc = PatchCollection(self.C['Polys'], match_original=True)
            ax.add_collection(cpc)

            #Add roads to the map
            for feature in self.R:
                xx, yy = feature.xy
                self.B.plot(xx,
                            yy,
                            linestyle='solid',
                            color='#606060',
                            linewidth=0.7,
                            alpha=.6)

            #Add metro to the map
            for line in self.M:  #metroLines is a shapely MultiLineString object consisting of multiple lines (is iterable)
                x, y = line.xy
                self.B.plot(x, y, color='#FF2F2F', linewidth=0.65, alpha=.4)

            #----------------------
            #GENERATE TARGET POINT
            #----------------------
            #Generate YKR_ID from csv name
            ykrID = int(self.basename.split('_')[2])

            #Find index of target YKR_ID
            tIndex = df_map.YKR_ID[df_map.YKR_ID == ykrID].index.tolist()
            trow = df_map[tIndex[0]:tIndex[0] + 1]
            targetPolygon = trow.poly
            centroid = targetPolygon.values[
                0].centroid  #Get centroid of the polygon --> Returns shapely polygon point-type object

            self.B.plot(centroid.x,
                        centroid.y,
                        'go',
                        markersize=3,
                        label="= Destination")

            #-----------------------------
            #LEGEND
            #-----------------------------

            #Draw a map scale
            self.B.drawmapscale(
                coords[0] + 0.47,
                coords[1] +
                0.013,  #Etäisyys vasemmalta, etäisyys alhaalta: plussataan koordinaatteihin asteissa
                coords[0],
                coords[1],
                #10.,
                10.,
                barstyle='fancy',
                labelstyle='simple',
                yoffset=200,  #yoffset determines the height of the mapscale
                fillcolor1='w',
                fillcolor2='#909090',
                fontsize=6,  # black= #000000
                fontcolor='#202020',
                zorder=5)

            #Set up title
            if "PT" in AttributeParameter:
                tMode = "public transportation"
            elif "Car" in AttributeParameter:
                tMode = "car"
            elif "Walk" in AttributeParameter:
                tMode = "walking"

            titleText = "Travel %s to %s (YKR-ID) \n by %s" % (
                titleMeas, str(ykrID), tMode)
            plt.figtext(.852, .735, titleText, size=9.5)

            #Plot copyright texts
            copyr = "%s MetropAccess project, University of Helsinki, 2014\nLicensed under a Creative Commons Attribution 4.0 International License" % (
                unichr(0xa9))

            plt.figtext(.24, .078, copyr, fontsize=4.5)

            #----------------
            #Add a colour bar
            #----------------

            #Set arbitary location (and size) for the colorbar
            axColor = plt.axes(
                [.86, .15, .016,
                 .52])  #([DistFromLeft, DistFromBottom, Width, Height])

            cb = self.colorbar_index(
                ncolors=len(jenks_labels),
                cmap=cmap,
                labels=jenks_labels,
                cax=axColor
            )  #, shrink=0.5)#, orientation="vertical", pad=0.05,aspect=20)#,cax=cbaxes) #This is a function --> see at the beginning of the code. #, cax=cbaxes shrink=0.5,
            cb.ax.tick_params(labelsize=5.5)

            #Inform travel sum of the whole grid (i.e. centrality of the location)
            #Travel time
            if measure2 == "minutes":
                tMean = histData.mean().values[0]
                tMedian = histData.median().values[0]
                tMax = histData.max().values[0]
                tMin = histData.min().values[0]
                tStd = histData.std().values[0]
                travelSummary = "Summary:"
                travelMean = "Mean: %0.0f %s" % (tMean, measure2)
                travelMedian = "Median: %0.0f %s" % (tMedian, measure2)
                travelStd = "Std: %0.0f %s" % (tStd, measure2)
                travelRange = "Range: %0.0f-%0.0f %s" % (tMin, tMax, measure2)

            #Travel distance
            else:
                h = histData.values / 1000
                histData = pd.DataFrame(h)
                del h
                tMean = histData.mean().values[0]
                tMedian = histData.median().values[0]
                tMax = histData.max().values[0]
                tMin = histData.min().values[0]
                tStd = histData.std().values[0]
                travelSummary = "Summary:"
                travelMean = "Mean: %0.1f %s" % (tMean, measure2)
                travelMedian = "Median: %0.1f %s" % (tMedian, measure2)
                travelStd = "Std: %0.1f %s" % (tStd, measure2)
                travelRange = "Range: %0.1f-%0.1f %s" % (tMin, tMax, measure2)

            #Write information to a statistics file
            mInfo = "%s;%0.0f;%0.0f;%0.0f;%0.0f;%0.0f\n" % (
                str(ykrID), tMean, tMedian, tStd, tMin, tMax)
            self.writeStatistics(mInfo)

            #Helper variables for moving Summary statistic texts
            initialPos = .58  #.15  #.44
            initialXPos = .975  #.20 #.97
            textSize = 5.25
            split = 0.018

            #Plot Travel Summary title
            plt.figtext(initialXPos,
                        initialPos + split * 4,
                        travelSummary,
                        ha='left',
                        va='bottom',
                        color='#404040',
                        size=textSize,
                        style='normal',
                        fontweight='bold')

            #Plot Travel Summary mean
            plt.figtext(initialXPos,
                        initialPos + split * 3,
                        travelMean,
                        ha='left',
                        va='bottom',
                        size=textSize,
                        color='b')

            #Plot Travel Summary median
            plt.figtext(initialXPos,
                        initialPos + split * 2,
                        travelMedian,
                        ha='left',
                        va='bottom',
                        size=textSize,
                        color='r')

            #Plot Travel Summary Standard deviation
            plt.figtext(initialXPos,
                        initialPos + split,
                        travelStd,
                        ha='left',
                        va='bottom',
                        size=textSize)

            #Plot Travel Summary Range
            plt.figtext(initialXPos,
                        initialPos,
                        travelRange,
                        ha='left',
                        va='bottom',
                        size=textSize)

            #Plot Legend symbol
            ax.legend(
                bbox_to_anchor=(.97, 0.07),
                fontsize=5.5,
                frameon=False,
                numpoints=1
            )  #1.265     bbox_to_anchor=(x,y)  --> arbitary location for legend, more info: http://matplotlib.org/api/legend_api.html

            #--------------------------------------------------------
            #Travel time and population (catchment areas) histograms
            #--------------------------------------------------------

            #New axes for travel time/distance histogram
            ax = plt.axes(
                [.98, .39, .16, .14],
                axisbg='w')  #([DistFromLeft, DistFromBottom, Width, Height])

            #Add histogram
            n, bins, patches = ax.hist(histData.values,
                                       100,
                                       normed=False,
                                       facecolor='green',
                                       alpha=0.75,
                                       rwidth=0.5,
                                       orientation="vertical")
            ax.axvline(histData.median(),
                       color='r',
                       linestyle='solid',
                       linewidth=1.8)
            ax.axvline(histData.mean(),
                       color='b',
                       linestyle='solid',
                       linewidth=1.0)

            if measure2 == "minutes":
                ax.set_xlabel("t(min)", fontsize=5, labelpad=1.5)
                xupLim = 250  #upper limit for x-axis
            else:
                ax.set_xlabel("km", fontsize=5, labelpad=1.5)
                xupLim = 100  #upper limit for x-axis

            #Set valuelimits for axes
            ax.set_xlim(0, xupLim - 30)

            if max(
                    n
            ) < 1000:  #ymax will be set to 1000 if count of individual bin is under 1000, else 1500
                yMax = 1000
            else:
                yMax = 1600

            ax.set_ylim(0, yMax)

            #Set histogram title
            plt.figtext(.975,
                        .535,
                        "Travel %s histogram" % titleMeas,
                        ha='left',
                        va='bottom',
                        size=5.7,
                        style='italic')

            #Adjust tick font sizes and set yaxis to right
            ax.tick_params(axis='both',
                           direction="out",
                           labelsize=4.5,
                           pad=1,
                           labelright=True,
                           labelleft=False,
                           top=False,
                           left=False,
                           color='k',
                           length=3,
                           width=.9)

            ax.xaxis.set_ticks(np.arange(0, xupLim - 30, 30))

            gridlines = ax.get_xgridlines()
            gridlines.extend(ax.get_ygridlines())

            for line in gridlines:
                line.set_linewidth(.28)
                line.set_linestyle('dotted')

            ax.grid(True)

            #----------------------------------------------------
            #New axes for population diagram

            ax = plt.axes(
                [.98, .17, .16, .14],
                axisbg='w')  #([DistFromLeft, DistFromBottom, Width, Height])

            #Make dataframe from Ykr-population
            pop = pd.read_csv(self.Ypop, sep=';')

            #Use original Matrix without NoData values
            MatrixData.replace(to_replace={AttributeParameter: {
                -1: np.nan
            }},
                               inplace=True)

            #Join population information and time matrix
            join = pd.merge(left=MatrixData,
                            right=pop,
                            how='outer',
                            left_on='from_id',
                            right_on='YKR_ID')

            #Sort data by attribute parameter
            sorted = join.sort(columns=[AttributeParameter])

            #Aggregate data by AttributeParameter
            aggre = pd.DataFrame(
                sorted.groupby(AttributeParameter).sum().Population)

            #Create attribute from index
            aggre[AttributeParameter] = aggre.index

            #Create cumulative population attribute
            aggre['cumPop'] = aggre['Population'].cumsum()

            #Reset index and determine AttributeParameter as float (matplotlib requires for it to work)
            aggre.reset_index(inplace=True, drop=True)
            aggre[AttributeParameter].astype(float)

            #print aggre[0:10]

            #Create filled curve plot from the cumulative population
            ax.fill_between(aggre.index,
                            aggre['cumPop'] / 1000,
                            0,
                            interpolate=True,
                            lw=1,
                            facecolor='green',
                            alpha=0.6)

            #Set valuelimits for axes
            ax.set_xlim(0, xupLim - 50)
            ax.set_ylim(-10, aggre['cumPop'].max() / 1000 + 50)

            gridlines = ax.get_xgridlines()
            gridlines.extend(ax.get_ygridlines())

            for line in gridlines:
                line.set_linewidth(.28)
                line.set_linestyle('dotted')

            ax.grid(True)
            ax.tick_params(axis='both',
                           direction="out",
                           labelsize=4.5,
                           pad=1,
                           labelright=True,
                           labelleft=False,
                           top=False,
                           left=False,
                           color='k',
                           length=3,
                           width=.9)
            ax.xaxis.set_ticks(np.arange(0, xupLim - 30, 30))

            if measure2 == "minutes":
                ax.set_xlabel("t(min)", fontsize=5, labelpad=1.5)
                measure3 = 'minutes'
            else:
                measure3 = 'km'
                ax.set_xlabel("km", fontsize=5, labelpad=1.5)

            #Set histogram title
            plt.figtext(.975,
                        .315,
                        "Population (per 1000) reached within (x) %s" %
                        measure3,
                        ha='left',
                        va='bottom',
                        size=5.7,
                        style='italic')

            #-----------------------
            #Save map to disk
            #-----------------------

            fig.set_size_inches(9.22, 6.35)  #(Width, Height)

            outputPath = os.path.join(
                self.outputFolder, self.basename) + AttributeParameter + ".png"

            plt.savefig(outputPath, dpi=300, alpha=True, bbox_inches='tight')
            plt.close()  #or plt.close('all') --> closes all figure windows

            end = time.time()
            lasted = int(end - start)
            return lasted

        except Exception as e:
            return e
Example #26
0
watershed_with_rivers = sjoin(watershed, river, how='inner', op='intersects')

watershedSumbyRiver = watershed_with_rivers.groupby([
    "HYBAS_ID",
]).agg(dict(length="sum")).reset_index()

watershed['riverlength'] = watershed.HYBAS_ID.map(
    watershedSumbyRiver.set_index('HYBAS_ID')['length'].to_dict())
# replace NaN with a very small number
watershed['riverlength'].fillna(1, inplace=True)
watershed['density'] = watershed.apply(lambda row:
                                       (row.area / row.riverlength),
                                       axis=1)

ii = watershed.as_matrix(['density'])
breaks = nb(ii.ravel(), k=3, initial=1)

digitizedbins = np.digitize(watershed.density, bins=breaks.bins.tolist())
watershed['areatype'] = digitizedbins

watershed['areatype'] = watershed['areatype'].map({
    3: 'green',
    2: 'green2',
    1: 'green2',
    0: 'green3'
})

inter = geopandas.overlay(watershed, aoi, how='intersection')

with open(outputgeojson, 'w') as f:
    f.write(inter.to_json())
Example #27
0
        sum(df.loc[lambda df: (df.county == county['NAME_TAG'])]['hours'])
        for county in clean_counties_info
    ]
})

# Create Point objects in map coordinates from dataframe lon and lat values
map_points = pd.Series([
    Point(m(mapped_x, mapped_y))
    for mapped_x, mapped_y in zip(df['lon'], df['lat'])
])
rec_points = MultiPoint(list(map_points.values))
counties_polygon = prep(MultiPolygon(list(df_map['poly'].values)))
county_points = filter(counties_polygon.contains, rec_points)

# Calculate Jenks natural breaks for density
breaks = nb(df_map[df_map['hours'].notnull()].hours.values, initial=300, k=6)

# the notnull method lets us match indices when joining
jb = pd.DataFrame({'jenks_bins': breaks.yb},
                  index=df_map[df_map['hours'].notnull()].index)
df_map = df_map.join(jb)
df_map.jenks_bins.fillna(-1, inplace=True)

labels = ['No recording'
          ] + ["> %d hours" % (perc) for perc in breaks.bins[:-1]]

plt.clf()
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='w', frame_on=False)

cmap = plt.get_cmap('Blues')
Example #28
0
		thecount += 1
try:
	myDataFrame = pd.DataFrame({"TheData": myArray})
	#La fonction "GetParameterAsText" invite l'utilisateur de nommer le fichier géographique
	#  ("feature class" ou "fc") sur lequel les opérations vont commencer.
	#Ce script utilise "IQH_FINAL" comme le champ des données sur lequel les opérations vont
	#   commencer ("field").
	#La calculation utilise les progiciels arcpy.da (analyse des données) et numpy.
	#Nonobstant que le tableau numérique consiste en nombres entiers, le tableau est
	#   transformé au format de point flottant ("float") parce que le progiciel PySAL
	#   a besoin de cette transformation pour l'intégrer avec la fonction KMEANS.
	#L'iteration "for-if-else" trie les valeurs "null" des vraies données, et après cette
	#   sortation, les données sont transferées en format de cadre des données pandas.

	print "Calcul des Jenks natural breaks..."
	breaks = nb(myDataFrame["TheData"].dropna().values,k=4,initial=20)
	#Le calcul des valeurs Jenks est produit par le progiciel pysal.  Tous les valeurs
	#   "null" sont sortis, et les données qui restent sont préparées pour l'analyse.
	#La paramètre k symbolise le nombre des classes la fonction Jenks va créer pour
	#   l'utilisateur.
	#La paramètre initial est le semence de la fonction Jenks.  Un valeur grand va
	#   converger la fonction plus vite; un valeur petit, d'autre part, va être plus exact. 

	print "Vérification s'il y avait calculs précédents des champs de valeurs Jenks..."
	try:
		arcpy.DeleteField_management(fc, "Jenks")
		print "Calculs précédents des champs de valeurs Jenks effacés..."

	except Exception as e:
		print "Aucuns champs des valeurs Jenks trouvés..."
	#Cette iteration "try-except" efface les calculs précédents s'ils existent. Si un champ
# #########################
# MAPS
# #########################

dict_df_areas['c_insee'] = df_com

dict_titles = {'nb_stores' : 'Nb of stores',
               'store_surface' : 'Cumulated store surface'}

# todo: generalize nb of stores and store surface (or other loop?)
for area, df_area in dict_df_areas.items():
  df_area_temp = df_area[~pd.isnull(df_area['poly'])].copy()
  for field in ['nb_stores', 'store_surface']:
    # Calculate Jenks natural breaks for density
    breaks = nb(df_area_temp[df_area_temp[field].notnull()][field].values,
                initial=300,
                k=5)
    
    # zero excluded from natural breaks... specific class with val -1 (added later)
    df_area_temp.replace(to_replace={field: {0: np.nan}}, inplace=True)
    
    # the notnull method lets us match indices when joining
    jb = pd.DataFrame({'jenks_bins': breaks.yb},
                      index=df_area_temp[df_area_temp[field].notnull()].index)
    
    # need to drop duplicate index in jb, todo: check why need area here (MI?)
    jb = jb.reset_index().drop_duplicates(subset=[area],
                                          take_last=True).set_index(area)
    # propagated to all rows in df_com with same index
    df_area_temp['jenks_bins'] = jb['jenks_bins']
    df_area_temp.jenks_bins.fillna(-1, inplace=True)