def draw_map(df, measure, Colors, Title): plt.clf() fig = plt.figure() ax = fig.add_subplot(111, axisbg='None') ax.set_xlim(minx - 0.2 * w, maxx + 0.2 * w) ax.set_ylim(miny - 0.2 * h, maxy + 0.2 * h) ax.set_aspect(1) cmap = plt.get_cmap(Colors) df['patches'] = df['Poly'].map( lambda x: PolygonPatch(x, ec='#555555', lw=0.2, alpha=0.5, zorder=4)) pc = PatchCollection(df['patches'].values, match_original=True) norm = mpl.colors.Normalize() pc.set_facecolor(cmap(norm(df[measure].values))) ax.add_collection(pc) plt.title(Title) #Add a colorbar for the PolyCollection #Classified the prices into 7 classes using natural break breaks = nb(df[df[measure].notnull()][measure].values, initial=500, k=7) jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df[df[measure].notnull()].index) df = df.join(jb) df.jenks_bins.fillna(-1, inplace=True) jenks_labels = ["<= Above %0.1f" % b for b in breaks.bins] cb = colorbar_index(ncolors=len(jenks_labels), cmap=cmap, shrink=0.5, labels=jenks_labels) cb.ax.tick_params(labelsize=8) fig.set_size_inches(10, 10) plt.savefig(Title, ext='png', close=True, dpi=400, bbox_inches='tight')
def normalise(df, element): df[element].fillna(0, inplace=True) df[f'{element}_nn'] = pd.to_numeric(df[f'{element}']) df[f'{element}_fn'] = df[f'{element}_nn'].apply( lambda x: neg_conversions(x)) df[f'{element}_log'] = df[f'{element}_fn'].apply(lambda x: log10(x)) median = df[f'{element}_log'].median() # Mean Absolute Deviation: Tukey, J.W., 1977. Exploratory Data Analysis. Addison-Wesley, Reading, 688 pp mad = df[f'{element}_log'].mad() # Set threshold: http://crcleme.org.au/Pubs/guides/gawler/a7_id_anomalies.pdf threshold = median + 2 * mad min_value = df[f'{element}_log'].min() df['normalised'] = df[f'{element}_log'].apply( lambda x: scaling(x, min_value, threshold)) classifier = nb(df[f'{element}_log'], 7) df['classifications'] = df[f'{element}_log'].apply(classifier) df.classifications.replace([1, 2, 3, 4, 5, 6, 7], [ '#82817d', '#55b1d9', '#5bd955', '#e6a94e', '#e02d2d', '#da2de0', '#af00b5' ], inplace=True) return df
def computeNDVINaturalBreaks(self, rawndvifile): print("Computing Natural breaks on NDVI..") self.files['ndvi_file'] = rawndvifile with rasterio.open(rawndvifile) as src: profile = src.profile bands = src.read() for band in bands: b = band[(band != np.array(None)) & (np.logical_not(np.isnan(band))) ] breaks = nb(b.ravel(),k=4,initial=1) bins = breaks.bins.tolist() bins.insert(0,-1) # add -1 to the beginning of the breaks classfiedndvitmppath = os.path.join(self.cwd,config.settings['outputdirectory'],'tmp','classified-ndvi.tiff') print("Writing new NDVI with Natural break classes..") with rasterio.open(rawndvifile) as src: profile = src.profile bands = src.read(masked=True) for band in bands: # b = band[(band != np.array(None)) & (np.logical_not(np.isnan(band))) ] for x in np.nditer(band, op_flags=['readwrite']): x[...] = np.digitize(x,bins) # Reproject and write each band with rasterio.open(classfiedndvitmppath, 'w', **profile) as dst: dst.write(bands)
def IndicatorHeatMap(YEAR, LOCATION="Maryland", INDICATOR="None"): # Using a CSV of economic indicators, create a heatmap that # shows the data. This will be used under scatter and hexbin maps # if an indicator is selected. # This can probably be turned into an if/elif/else statement that # sets a common variable to a string based on the indicator chosen # and passes that variable through the breaks and jenks process. df_map = pd.merge(df_map, indicator_list, on="county_name", how="left") if INDICATOR == "None": pass elif INDICATOR == "Median Household Income": # Select county or neighborhood MHHI data from the passed year breaks = nb( df_map[df_map["mhhi"].notnull()].mhhi.values, initial=300, k=5) jb = pd.DataFrame({"jenks_bins":breaks.yb}, index=df_map[df_map["mhhi"].notnull()].index) df_map = df_map.join(jb) df_map.jenks_bins.fillna(-1, inplace=True) jenks_labels = ["Median Household Income:\n<= %f" % b for b in breaks.bins] elif INDICATOR == "Millennial Population Growth": # Select county or neighborhood Millennial population data from the passed year pass else: print "The %s indicator is not yet available in this program." % INDICATOR ax = fig.add_subplot(111, axisbg = "w", frame_on = False) # Change get_cmap color based on INDICATOR cmap = plt.get_cmap("Blues") df_map["patches"] = df_map["poly"].map(lambda x: PolygonPatch(x, ec="#555555", lw=.2, alpha=1, zorder=4)) pc = PatchCollection(df_map["patches"], match_original=True) norm = Normalize() pc.set_facecolor(cmap(norm(df_map["jenks_bins"].values))) ax.add_collection(pc) cb = colorbar_index(ncolors=len(jenks_labels), cmap=cmap, shrink=0.5, labels=jenks_labels) cb.ax.tick_params(labelsize=8) m.drawmapscale( -125, 20, -125, 20, 10., barstyle = "fancy", labelstyle = "simple", fillcolor1 = "w", fillcolor2 = "w", fontcolor = "w", zorder=9, units = "m", fontsize =7)
def jenks_breaks(df_map, field): # Calculate Jenks natural breaks for density breaks = nb( df_map[df_map[field].notnull()][field].values, initial=300, k=5) # the notnull method lets us match indices when joining jb = pd.DataFrame({'bins': breaks.yb}, index=df_map[df_map[field].notnull()].index) df_map = df_map.join(jb) df_map.jenks_bins.fillna(-1, inplace=True) jenks_labels = ["up to %0.f%% (%s EDs)" % (b*100, c) for b, c in zip( breaks.bins, breaks.counts)] #jenks_labels.insert(0, 'No plaques (%s wards)' % len(df_map[df_map['density_km'].isnull()])) return df_map, jenks_labels
def dataDF(self): super(GreaterBostonDensity, self).dataDF() self.df_map['count'] = self.df_map['poly'].map(lambda x: int(len(filter(prep(x).contains, self.dataPoints)))) self.df_map['density_m'] = self.df_map['count'] / self.df_map['area_m'] self.df_map['density_km'] = self.df_map['count'] / self.df_map['area_km'] # it's easier to work with NaN values when classifying self.df_map.replace(to_replace={'density_m': {0: np.nan}, 'density_km': {0: np.nan}}, inplace=True) self.breaks = nb( self.df_map[self.df_map['density_km'].notnull()].density_km.values, initial=300, k=5) # the notnull method lets us match indices when joining jb = pd.DataFrame({'jenks_bins': self.breaks.yb}, index=self.df_map[self.df_map['density_km'].notnull()].index) self.df_map = self.df_map.join(jb) self.df_map.jenks_bins.fillna(-1, inplace=True) return
def dataDF(self): temp = [] for d in self.dataPoints: temp.append(int(d['CT_ID'])) self.dataPoints = np.array(temp) self.df_map['count'] = self.df_map['CT_ID_10'].map(lambda x: int((self.dataPoints == int(x)).sum())) self.df_map['POP100'] = self.df_map['POP100'].apply(float) self.df_map['density'] = (self.df_map['count'] * 1000) / self.df_map['POP100'] # it's easier to work with NaN values when classifying self.df_map.replace(to_replace={'density': {0: np.nan}}, inplace=True) self.breaks = nb( self.df_map[self.df_map['density'].notnull()].density.values, initial=300, k=5) # the notnull method lets us match indices when joining jb = pd.DataFrame({'jenks_bins': self.breaks.yb}, index=self.df_map[self.df_map['density'].notnull()].index) self.df_map = self.df_map.join(jb) self.df_map.jenks_bins.fillna(-1, inplace=True) return
def chloropleth(self, query, color = "Blues"): """shows a chloropleth map of crimes Args: query: name of sql """ self.load() data = pd.read_sql_query(con=self.con, sql=query) points = self.gen_points(data, self.data_map) self.data_map['count'] = self.data_map['poly'].map(lambda x: len(list(filter(prep(x).contains, points)))) self.data_map['density_m'] = self.data_map['count'] / self.data_map['area_m'] self.data_map['density_km'] = self.data_map['count'] / self.data_map['area_km'] self.data_map.replace(to_replace={'density_m': {0: np.nan}, 'density_km': {0: np.nan}}, inplace=True) breaks = nb( self.data_map[self.data_map['density_km'].notnull()].density_km.values, initial=300, k=5) jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=self.data_map[self.data_map['density_km'].notnull()].index) self.data_map = self.data_map.join(jb) self.data_map.jenks_bins.fillna(-1, inplace=True) jenks_labels = ["<= %0.1f/km$^2$(%s communities)" % (b, c) for b, c in zip( breaks.bins, breaks.counts)] jenks_labels.insert(0, 'None (%s communities)' % len(self.data_map[self.data_map['density_km'].isnull()])) cmap = plt.get_cmap(color) self.data_map['patches'] = self.data_map['poly'].map(lambda x: PolygonPatch(x, ec='#555555', lw=.2, alpha=1., zorder=4)) pc = PatchCollection(self.data_map['patches'], match_original=True) norm = Normalize() pc.set_facecolor(cmap(norm(self.data_map['jenks_bins'].values))) self.ax.add_collection(pc) cb = self.gen_colorbar(colors=len(jenks_labels), color_map=cmap, shrink=0.5, labels=jenks_labels) cb.ax.tick_params(labelsize=6) plt.tight_layout() plt.show()
""" Makes a choropelth map of a given shapefile and a numerical column (val_col) of a dataframe. Shapefile and dataframe must both have a matching index_column for joining. Uses Jenks natural breaks by PySAL for classifying. If you want to highlight a polygon differently, pass it to main_poly and uncomment lines 43-47. Based on this tutorial: http://ramiro.org/notebook/basemap-choropleth/ """ num_colors = num_breaks cm = plt.get_cmap(color_ramp) scheme = [cm(i / num_colors) for i in range(num_colors)] # Create bins for color values breaks = nb( dataframe[val_col], initial=200, k = num_colors - 1) bins = breaks.bins frame['bin'] = breaks.yb mpl.style.use('seaborn-muted') fig = plt.figure(figsize=(22, 12)) ax = fig.add_subplot(111, axisbg='w', frame_on=False) fig.suptitle('Map of {}'.format(title), fontsize=30, y=.95) m = Basemap(lon_0=0, projection='robin') m.drawmapboundary(color='w') m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2) for info, shape in zip(m.units_info, m.units): idx = info[index_col]
def HeatMap(YEAR, EAGB_INDUSTRY, LANDMARK="None", LOCATION="Maryland", INDICATOR="None", SAVE=False, DRAFT=False): # Get basemap and df_map m, df_map = GeographySelector()[0], GeographySelector()[1] # Get establishment points for year and industry with ClusterPoints() map_estab_points = ClusterPoints(YEAR, EAGB_INDUSTRY)[0] #Find the density of establishments for EAGB_INDUSTRY in YEAR for each county df_map["count"] = df_map["poly"].map(lambda x: int(len(filter(prep(x).contains, map_estab_points)))) df_map["density_m"] = df_map["count"]/df_map["area_m"] df_map.replace(to_replace={"density_m": {0:np.nan}}, inplace=True) # Calculate Jenks natural breaks for density breaks = nb( df_map[df_map["density_m"].notnull()].density_m.values, initial = 300, # Number of bins to sort counties into: k = 5) # The notnull method lets us match indices when joining jb = pd.DataFrame({"jenks_bins": breaks.yb}, index=df_map[df_map["density_m"].notnull()].index) df_map = df_map.join(jb) df_map.jenks_bins.fillna(-1, inplace=True) jenks_labels = ["<= %0.1f/m$^2$ (%s counties)" % (b, c) for b,c in zip( breaks.bins, breaks.counts)] jenks_labels.insert(0, "No %(1)s Establishments (%(2)s counties)" % {"1":EAGB_INDUSTRY, "2":len(df_map[df_map["density_m"].isnull()])}) plt.clf() fig = plt.figure() ax = fig.add_subplot(111, axisbg="w", frameon=False) # Use a color ramp determined by EAGB_INDUSTRY with an if statement # Change "Blues" to some variable cmap = plt.get_cmap("Blues") # Draw counties with grey outlines df_map["patches"] = df_map["poly"].map(lambda x: PolygonPatch(x, ec="#555555", lw=0.2, alpha=1.0, zorder=4)) pc = PatchCollection(df_map["patches"], match_original=True) # Impose color map onto patch collection norm = Normalize() pc.set_facecolor(cmap(norm(df_map["jenks_bins"].values))) ax.add_collection(pc,zorder=5) # Add a color bar cb = colorbar_index(ncolors=len(jenks_labels), cmap=cmap, shrink=0.5, labels=jenks_labels) cb.ax.tick_params(labelsize=8) ''' # Show highest densities in descending order highest = "\n".join( value[1] for _, value in df_map[(df_map["jenks_bins"] == 4)][:10].sort().iterrows()) highest = "Most Dense Counties:\n\n" + highest details = cb.ax.text( -1., 0-0.007, highest, ha="right", va="bottom", size = 8, color = "#555555") ''' # Copyright and source data info smallprint = ax.text( 0.02, 0, ha="left", va = "bottom", size = 10, color = "#555555", transform = ax.transAxes, s = "Classification Method: Jenks Natural Breaks\nTotal Points: %(1)s\nLandmarks: %(2)s\nEconomic Indicator: %(3)s\nContains NETS Database data\n$\copyright$ EAGB copyright and database right 2015" % {"1":len(ClusterPoints(YEAR, EAGB_INDUSTRY, LANDMARK)[0]), "2":LANDMARK, "3":INDICATOR} ) ''' m.drawmapscale( coords[0] + 0.08, coords[1] + 0.015, coords[0], coords[1], 10., barstyle = "fancy", labelstyle = "simple", fillcolor1 = "w", fillcolor2 = "#555555", fontcolor = "#555555", zorder=11, units = "m", fontsize = 7) ''' plt.tight_layout() fig.set_size_inches(10,10) plt.title("%(1)s Establishment Density, %(2)s\n%(3)s"), {"1":EAGB_INDUSTRY, "2":YEAR, "3":LOCATION} #Passed argument tells program whether to save maps if SAVE == True: plt.savefig("All %(1)s %(2)s_heatmap.eps" % {"1":EAGB_INDUSTRY, "2":YEAR}, alpha = True) plt.savefig("All %(1)s %(2)s_heatmap.png" % {"1":EAGB_INDUSTRY, "2":YEAR}, alpha = True) else: pass plt.show()
def show_density(genre): infile=folder + genre + ' Restaurant.txt' #Extract all the data into a dict output = dict() output['lon']=[] output['lat']=[] output['rat']=[] with open(infile) as f: for line in f: LineList = line.split('\t') output['rat'].append(LineList[2].split()[0]) output['lat'].append(LineList[6].split(',')[0]) output['lon'].append(LineList[6].split(',')[1]) #Create a Pandas DataFrame df = pd.DataFrame(output) #Drop data contains None(Just for practice) df = df.dropna() df[['rat','lat','lon']] = df[['rat','lat','lon']].astype(float) #Create Point objects in map coordinates from dataframe lon and lat values map_points = pd.Series([Point(m(mapped_x, mapped_y)) for mapped_x , mapped_y in zip(df['lon'], df['lat'])]) rstrnt_points = MultiPoint(list(map_points.values)) #print len(m.newyork[1]),type(m.newyork),m.newyork_info df_map = pd.DataFrame({ 'poly':[Polygon(xy) for xy in m.newyork]}) df_map['area_m'] = df_map['poly'].map(lambda x:x.area) df_map['area_km'] = df_map['area_m']/1000000 #prepared object wards_polygon = prep(MultiPolygon(list(df_map['poly'].values))) #Calculate points that fall within the New York boundary ny_points = filter(wards_polygon.contains,rstrnt_points) #########Creating a Choropleth Map, Normalised by Ward Area df_map['count'] = df_map['poly'].map(lambda x: int(len(filter(prep(x).contains, ny_points)))) df_map['density_m'] = df_map['count'] / df_map['area_m'] df_map['density_km'] = df_map['count'] / df_map['area_km'] # it's easier to work with NaN values when classifying df_map.replace(to_replace={'density_m': {0: np.nan}, 'density_km': {0: np.nan}}, inplace=True) # Calculate Jenks natural breaks for density breaks = nb( df_map[df_map['density_km'].notnull()].density_km.values, initial=300, k=5) # the notnull method lets us match indices when joining jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map['density_km'].notnull()].index) df_map = df_map.join(jb) df_map.jenks_bins.fillna(-1, inplace=True) jenks_labels = ["<= %0.1f/km$^2$(%s blocks)" % (b, c) for b, c in zip( breaks.bins, breaks.counts)] jenks_labels.insert(0, 'No restaurant (%s blocks)' % len(df_map[df_map['density_km'].isnull()])) plt.close() fig = plt.figure() ax = fig.add_subplot(111, axisbg='w', frame_on=False) # use a blue colour ramp - we'll be converting it to a map using cmap() cmap = plt.get_cmap('Blues') # draw wards with grey outlines df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(x, ec='#555555', lw=.2, alpha=1., zorder=4)) pc = PatchCollection(df_map['patches'], match_original=True) # impose our colour map onto the patch collection norm = Normalize() pc.set_facecolor(cmap(norm(df_map['jenks_bins'].values))) ax.add_collection(pc) # Add a colour bar cb = colorbar_index(ncolors=len(jenks_labels), cmap=cmap, shrink=0.5, labels=jenks_labels) cb.ax.tick_params(labelsize=6) # Show highest densities, in descending order highest = '\n'.join( str(value[1]) for value in df_map[(df_map['jenks_bins'] == 4)][:10].sort().iterrows()) highest = 'Most Dense Blocks:\n\n' + highest # Draw a map scale m.drawmapscale( coords[0] + 0.19, coords[1] + 0.015, coords[0], coords[1], 10., barstyle='fancy', labelstyle='simple', fillcolor1='w', fillcolor2='#555555', fontcolor='#555555', zorder=5) # this will set the image width to 722px at 100dpi plt.title(genre + " Restaurant Density, New York") plt.tight_layout() fig.set_size_inches(7.22, 5.25) plt.savefig('image/' + genre+'_Restaurants_Density_NewYork.png', dpi=100, alpha=True) plt.show()
def create_figure(year): data1 = pd.read_csv('mappedData.csv') data = data1[data1['year'] == year] # load the shape file as shp. Here I have saved my shapefile in the folder 'LKA_adm_2' within my working directory. # your shapefile should end in .shp # shp = fiona.open('LKA_adm_2/LKA_adm1.shp') shp = fiona.open('static/shapefile/SG_NHS_HealthBoards_2018_WGS84.shp') osgb36 = pyproj.Proj("+init=EPSG:27700") wgs84 = pyproj.Proj("+init=EPSG:4326") # we can access the boundaries (the 2 lat,long pairs) using shp.bounds bds = shp.bounds # close the shp file shp.close() # define a variable called extra which we will use for padding the map when we display it (in this case I've selected a 10% pad) extra = 0.1 # ll = pyproj.transform(osgb36,wgs84,bds[0],bds[1]) # define the lower left hand boundary (longitude, latitude) ll = (bds[0], bds[1]) # define the upper right hand boundary (longitude, latitude) ur = (bds[2], bds[3]) # ur = pyproj.transform(osgb36,wgs84,bds[2], bds[3]) # concatenate the lower left and upper right into a variable called coordinates coords = list(chain(ll, ur)) # define variables for the width and the height of the map w, h = coords[2] - coords[0], coords[3] - coords[1] m = Basemap( # set projection to 'tmerc' which is apparently less distorting when close-in projection='tmerc', # set longitude as average of lower, upper longitude bounds # lon_0=np.average(pyproj.transform(osgb36,wgs84,bds[0], bds[2])), lon_0=np.average([bds[0], bds[2]]), lat_0=np.average([bds[1], bds[3]]), # set latitude as average of lower,upper latitude bounds # lat_0=np.average(pyproj.transform(osgb36,wgs84,bds[1], bds[3])), # string describing ellipsoid (‘GRS80’ or ‘WGS84’, for example). Not sure what this does... ellps='WGS84', # set the map boundaries. Note that we use the extra variable to provide a 10% buffer around the map llcrnrlon=coords[0] - extra * w, llcrnrlat=coords[1] - extra + 0.01 * h, urcrnrlon=coords[2] + extra * w, urcrnrlat=coords[3] + extra + 0.01 * h, # provide latitude of 'true scale.' Not sure what this means, I would check the Basemap API if you are a GIS guru lat_ts=0, # resolution of boundary database to use. Can be c (crude), l (low), i (intermediate), h (high), f (full) or None. resolution='i', # don't show the axis ticks automatically suppress_ticks=True) m.readshapefile( # provide the path to the shapefile, but leave off the .shp extension 'static/shapefile/SG_NHS_HealthBoards_2018_WGS84', # name your map something useful (I named this 'srilanka') 'scotland', # set the default shape boundary coloring (default is black) and the zorder (layer order) color='none', zorder=2) # set up a map dataframe df_map = pd.DataFrame({ # access the x,y coords and define a polygon for each item in m.scotland 'poly': [Polygon(xy) for xy in m.scotland], # conver HBCode to a column called 'boardcode' 'boardcode': [boardcode['HBCode'] for boardcode in m.scotland_info] }) # add the polygon area df_map['area_m'] = df_map['poly'].map(lambda x: x.area / 1000) # convert meters to miles df_map['area_miles'] = df_map['area_m'] * 0.000621371 data = data.rename(columns={'code': 'boardcode'}) df_map = pd.merge(df_map, data, on='boardcode') jenks = True var_2_analyze = 'alcandmental' if jenks == True: # Calculate Jenks natural breaks for each polygon breaks = nb( # set the data to use df_map[df_map[var_2_analyze].notnull()][var_2_analyze].values, # since this is an optimization function we need to give it a number of initial solutions to find. # you can adjust this number if you are unsatisfied with the bin results initial=300, # k is the number of natural breaks you would like to apply. I've set it to 10, but you can change. k=14) else: # Define my own breaks [even split each 20 percentage points] Note that the bins are the top range so >20, >40, etc # you can change the bins to whatever you like, though they should be based on the data you are analyzing # since I am going to plot data on a 0 to 100 scale, I chose these break points my_bins = [25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75] # Calculate the user defined breaks for our defined bins breaks = mapclassify.User_Defined( # set the data to use df_map[df_map[var_2_analyze].notnull()][var_2_analyze].values, # use my bins my_bins) # check if 'bins' already exists and drop it if it does so that we can recreate it using our new break information if 'bins' in df_map.columns: df_map = df_map.drop('bins', 1) # print ('Bins column already existed, so we dropped the bins column') # the notnull method lets us match indices when joining # b is a dataframe of the bins with the var_2_analyze index b = pd.DataFrame({'bins': breaks.yb}, index=df_map[df_map[var_2_analyze].notnull()].index) # join b back to df_map df_map = df_map.join(b) # and handle our NA's if there are any df_map.bins.fillna(-1, inplace=True) # check if this is a jenks or user-defined break if jenks == True: # if jenks, use these labels bin_labels = ["<= %0.0f" % b for b in breaks.bins] else: # if user defined, use these ones bin_labels = ["< %0.0f" % b for b in breaks.bins] # initialize the plot plt.clf() # define the figure and set the facecolor (e.g. background) to white fig = plt.figure(facecolor='white') # ad a subplot called 'ax' ax = fig.add_subplot(111, facecolor='w', frame_on=False) # use a blue colour ramp ('Blues') - we'll be converting it to a map using cmap() # you could also use 'Oranges' or 'Greens' cmap = plt.get_cmap('Purples').reversed() # draw district with grey outlines df_map['patches'] = df_map['poly'].map( lambda x: PolygonPatch(x, ec='#555555', lw=.2, alpha=1., zorder=4)) # set the PatchCollection with our defined 'patches' pc = PatchCollection(df_map['patches'], match_original=True) # normalize our bins between the min and max values within the bins norm = Normalize(vmin=df_map['bins'].min(), vmax=df_map['bins'].max()) # impose our color map onto the patch collection pc.set_facecolor(cmap(norm(df_map['bins'].values))) ax.add_collection(pc) # Add a color bar which has our bin_labels applied cb = colorbar_index(ncolors=len(bin_labels), cmap=cmap, shrink=0.5, labels=bin_labels) # set the font size of the labels (set to size 10 here) cb.ax.tick_params(labelsize=10) # Draw a map scale m.drawmapscale( #set the coordinates where the scale should appear coords[0] + 0.08, coords[1] + 0.215, coords[0], coords[1], # what is the max value of the scale (here it's set to 25 for 25 miles) 1., barstyle='fancy', labelstyle='simple', fillcolor1='w', fillcolor2='#555555', fontcolor='#555555', zorder=5, # what units would you like to use. Defaults to km units='mi') # set the layout to maximally fit the bounding area plt.tight_layout() # define the size of the figure fig.set_size_inches(5, 6) mapName = 'map_' + str(year) + 'combinedRatio.png' # save the figure. Increase the dpi to increase the quality of the output .png. For example, dpi=1000 is super high quality # note that the figure will be saved as 'sri_lanka_' then the name of the variable under analysis # you can change this to whatever you want plt.savefig('static/' + mapName, dpi=500, alpha=True) return fig
right_index = True, how = 'right') # not fully satisfactory... loss of polygons ls_disp_com_rg = ['available_surface_%s' %rg for rg in ls_rgs] +\ ['surface_%s' %rg for rg in ls_rgs] # ################### # DRAW AVAIL SURFACE # ################### for retail_group in ls_rgs: field = 'available_surface_%s' %retail_group breaks = nb(df_com[df_com[field].notnull()][field].values, initial=20, k=5) # zero excluded from natural breaks... specific class with val -1 (added later) df_com.replace(to_replace={'surface_%s' %retail_group: {0: np.nan}}, inplace=True) # the notnull method lets us match indices when joining jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_com[df_com[field].notnull()].index) # need to drop duplicate index in jb jb = jb.reset_index().drop_duplicates(subset=['index'], take_last=True).set_index('index') # propagated to all rows in df_com with same index df_com['jenks_bins'] = jb['jenks_bins'] df_com.jenks_bins.fillna(-1, inplace=True) jenks_labels = ["<= {:,.0f} avail surf. ({:d} mun.)".format(b, c)\
if type(cmap) == str: cmap = get_cmap(cmap) colors_i = np.concatenate((np.linspace(0, 1., N), (0., 0., 0., 0.))) colors_rgba = cmap(colors_i) indices = np.linspace(0, 1., N + 1) cdict = {} for ki, key in enumerate(('red', 'green', 'blue')): cdict[key] = [(indices[i], colors_rgba[i - 1, ki], colors_rgba[i, ki]) for i in xrange(N + 1)] return matplotlib.colors.LinearSegmentedColormap(cmap.name + "_%d" % N, cdict, 1024) #Let's make the map df_map['Price'] = df_map['poly'].map(lambda x: np.mean([d_price[(i.x, i.y)] for i in filter(prep(x).contains, price_points)])) #Calculate Jenks natural breaks for price breaks = nb( df_map[df_map['Price'].notnull()].Price.values, initial=300, k=5) #the notnull method lets us match indices when joining jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map['Price'].notnull()].index) df_map = df_map.join(jb) df_map.jenks_bins.fillna(-1, inplace=True) #Let's convert prices in a more readable format (e.g GBP 1,500,000) locale.setlocale(locale.LC_ALL, '') locale.currency(7000000, grouping=True ) jenks_labels = [u"\xA3" + "%s (%s wards)" % (locale.currency(b, grouping = True)[1:-3], c) for b, c in zip( breaks.bins, breaks.counts)] jenks_labels.insert(0, 'No property sales registered\n(%s wards)' % len(df_map[df_map['Price'].isnull()]))
thecount += 1 try: myDataFrame = pd.DataFrame({"TheData": myArray}) #La fonction "GetParameterAsText" invite l'utilisateur de nommer le fichier géographique # ("feature class" ou "fc") sur lequel les opérations vont commencer. #Ce script utilise "IQH_FINAL" comme le champ des données sur lequel les opérations vont # commencer ("field"). #La calculation utilise les progiciels arcpy.da (analyse des données) et numpy. #Nonobstant que le tableau numérique consiste en nombres entiers, le tableau est # transformé au format de point flottant ("float") parce que le progiciel PySAL # a besoin de cette transformation pour l'intégrer avec la fonction KMEANS. #L'iteration "for-if-else" trie les valeurs "null" des vraies données, et après cette # sortation, les données sont transferées en format de cadre des données pandas. print "Calcul des Jenks natural breaks..." breaks = nb(myDataFrame["TheData"].dropna().values, k=4, initial=20) #Le calcul des valeurs Jenks est produit par le progiciel pysal. Tous les valeurs # "null" sont sortis, et les données qui restent sont préparées pour l'analyse. #La paramètre k symbolise le nombre des classes la fonction Jenks va créer pour # l'utilisateur. #La paramètre initial est le semence de la fonction Jenks. Un valeur grand va # converger la fonction plus vite; un valeur petit, d'autre part, va être plus exact. print "Vérification s'il y avait calculs précédents des champs de valeurs Jenks..." try: arcpy.DeleteField_management(fc, "Jenks") print "Calculs précédents des champs de valeurs Jenks effacés..." except Exception as e: print "Aucuns champs des valeurs Jenks trouvés..." #Cette iteration "try-except" efface les calculs précédents s'ils existent. Si un champ
ungent_sample.append(True) else: ungent_sample.append(False) if float(ct_num) in ungent_cts_all: ungent_all.append(True) else: ungent_all.append(False) df_map['is_gent'] = is_gent df_map['ungent_sample'] = ungent_sample df_map['ungent_all'] = ungent_all # In[39]: # Calculate Jenks natural breaks for density breaks = nb(df_map[df_map['num_cafes'].notnull()].num_cafes.values, initial=300, k=5) # the notnull method lets us match indices when joining jb = pd.DataFrame({'jenk_bins': breaks.yb}, index=df_map[df_map['num_cafes'].notnull()].index) df_map = df_map.join( jb) # these are already compleeted. Running 2nd time causes errors #df_map.jenks_bins.fillna(-1, inplace=True) # In[40]: # Calculate Jenks natural breaks for density breaks2 = nb(df_map[df_map['num_bizs'].notnull()].num_bizs.values, initial=10, k=5) # the notnull method lets us match indices when joining
### Binning # change False to True to use Jenks binning jenks = True # specify variable that will be plotted var_2_analyze = 'state_results' if jenks == True: # Calculate Jenks natural breaks for each polygon breaks = nb( # set the data to use df_map[df_map[var_2_analyze].notnull()][var_2_analyze].values, # since this is an optimization function we need to give it a number of initial solutions to find. # you can adjust this number if you are unsatisfied with the bin results initial=300, # k is the number of natural breaks you would like to apply. I've set it to 10, but you can change. k=10) else: # Define my own breaks [even split each 20 percentage points] Note that the bins are the top range so >20, >40, etc # you can change the bins to whatever you like, though they should be based on the data you are analyzing # since I am going to plot data on a 0 to 100 scale, I chose these break points my_bins = [20,40,60,80,100] # Calculate the user defined breaks for our defined bins breaks = mapclassify.User_Defined( # set the data to use
bounds_dataframe.columns = ['MinX', 'MinY', 'MaxX', 'MaxY'] min_x = bounds_dataframe['MinX'].min() min_y = bounds_dataframe['MinY'].min() max_x = bounds_dataframe['MaxX'].max() max_y = bounds_dataframe['MaxY'].max() lower_point = m(min_x, min_y, inverse=True) upper_point = m(max_x, max_y, inverse=True) llcrnrlon = lower_point[0] llcrnrlat = lower_point[1] urcrnrlon = upper_point[0] urcrnrlat = upper_point[1] breaks = nb( df_map[df_map['density_km'].notnull()].density_km.values, initial=300, k=6) jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map['density_km'].notnull()].index) df_map = df_map.join(jb) df_map.jenks_bins.fillna(-1, inplace=True) jenks_labels = ["<= %.3f/km$^2$(%s districts)" % (b, c) for b, c in zip(breaks.bins, breaks.counts)] jenks_labels.insert(0, 'Parking density (%s districts)' % len(df_map[df_map['density_km'].isnull()])) plt.clf() fig = plt.figure() ax = fig.add_subplot(111, axisbg='w', frame_on=False) # use a blue colour ramp - we'll be converting it to a map using cmap() cmap = plt.get_cmap('Blues')
else: ungent_sample.append(False) if float(ct_num) in ungent_cts_all: ungent_all.append(True) else: ungent_all.append(False) df_map['is_gent'] = is_gent df_map['ungent_sample'] = ungent_sample df_map['ungent_all'] = ungent_all # In[39]: # Calculate Jenks natural breaks for density breaks = nb( df_map[df_map['num_cafes'].notnull()].num_cafes.values, initial=300, k=5) # the notnull method lets us match indices when joining jb = pd.DataFrame({'jenk_bins': breaks.yb}, index=df_map[df_map['num_cafes'].notnull()].index) df_map = df_map.join(jb) # these are already compleeted. Running 2nd time causes errors #df_map.jenks_bins.fillna(-1, inplace=True) # In[40]: # Calculate Jenks natural breaks for density breaks2 = nb( df_map[df_map['num_bizs'].notnull()].num_bizs.values, initial=10, k=5) # the notnull method lets us match indices when joining
df_map = pd.DataFrame({ 'poly': [Polygon(xy) for xy in m.Milano], 'square_id': [square['ID'] for square in m.Milano_info]}) df_map['area_m'] = df_map['poly'].map(lambda x: x.area) df_map['area_km'] = df_map['area_m'] / 1000000 calls = pd.merge(groupedCalls, df_map, how = 'left', on = 'square_id', sort = False) calls['density_km'] = calls['callsOut'] / calls['area_km'] calls.replace(to_replace={'density_km': {0: np.nan}}, inplace=True) # Calculate Jenks natural breaks for density # Classification scheme for choropleth mapping cuts = 5 breaks = nb( calls[calls['density_km'].notnull()].density_km.values, initial = 300, # number of initial solutions to generate k = cuts) # number of classes required # The notnull method lets match indices when joining jb = pd.DataFrame({'jenks_bins': breaks.yb}, index = calls[calls['density_km'].notnull()].index) calls = calls.join(jb) binlevels = range(cuts) + [-1] # Possible levels of the bins # Create a sensible label for classes # Show density/square km, as well as the number of squares in the class jenks_labels = ["<= %0.1f/km$^2$" % (b) for b in breaks.bins] jenks_labels.insert(0, 'No calls made') # Sorted list of the 15 min time intervals times = sorted(list(set(calls['time']))) # 96 time intervals (15 x 4 x 24)
# could take merge approach + test with zagaz pd_df_dpts['dpt_nb_stations'] = np.nan grouped_dpt = pd_df_master_info.groupby('dpt') for dpt, group in grouped_dpt: pd_df_dpts['dpt_nb_stations'].ix[dpt] = len(group) # density (different definitions) pd_df_dpts['density_area'] = pd_df_dpts['dpt_nb_stations'] / pd_df_dpts['area'] pd_df_dpts['density_pop'] = pd_df_dpts['dpt_nb_stations'] /\ pd_df_dpts['Population municipale 2007 POP_MUN_2007'] for density_field in ('density_area', 'density_pop'): # Easier to work with NaN values when classifying pd_df_dpts.replace(to_replace={density_field: {0: np.nan}}, inplace=True) # Calculate Jenks natural breaks for density breaks = nb(pd_df_dpts[pd_df_dpts[density_field].notnull()][density_field].values, initial=300, k=5) # The notnull method lets us match indices when joining jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=pd_df_dpts[pd_df_dpts[density_field].notnull()].index) pd_df_dpts = pd_df_dpts.join(jb) pd_df_dpts.jenks_bins.fillna(-1, inplace=True) fig, ax = plt.subplots() m_france.drawcountries() m_france.drawcoastlines() pd_df_dpts['patches'] = pd_df_dpts['poly'].map(lambda x: PolygonPatch(x, fc='#555555', ec='#787878', lw=.25, alpha=.9, zorder=4)) cmap = plt.get_cmap('Blues')
def mP_data(flnm, colName, df, imp = None): num_colors = 10 if imp is None: import pandas as pd import matplotlib.pyplot as plt import matplotlib.colors as colors from shapely.geometry import Point, Polygon, MultiPoint, MultiPolygon from pysal.esda.mapclassify import Natural_Breaks as nb from matplotlib.collections import PatchCollection from descartes import PolygonPatch import fiona from itertools import chain shp = fiona.open(flnm+'.shp') bds = shp.bounds extra = 0.02 if 'units' in shp.crs and shp.crs['units'] == 'm': print 'Unit is meters, converting boundaries' conv = Basemap() ll = conv(bds[0],bds[1],inverse=True) ur = conv(bds[2],bds[3],inverse=True) print shp.crs else: ll = (bds[0], bds[1]) ur = (bds[2], bds[3]) # shp.close() coords = list(chain(ll, ur)) w, h = coords[2] - coords[0], coords[3] - coords[1] # print coords; print extra # Check proj4, .prj file... m = Basemap( projection='tmerc', lon_0=-2., lat_0=49., ellps = 'WGS84', llcrnrlon=coords[0] - extra * w, llcrnrlat=coords[1] - extra + 0.01 * h, urcrnrlon=coords[2] + extra * w, urcrnrlat=coords[3] + extra + 0.01 * h, lat_ts=0, resolution='i', suppress_ticks=False) m.readshapefile( flnm, 'map', color='none', zorder=2) # Setup a dataframe that imports the dictionary of map properties and then # selects rows corresponding to the imported dataframe df temp_df = pd.DataFrame() for dicti in m.map_info: temp_df = temp_df.append(pd.Series(dicti),ignore_index=True) # print temp_df; quit() i1 = temp_df.set_index('label').index i2 = df.set_index('Sector').index temp_df = temp_df[i1.isin(i2)] # set up a map dataframe df_map = pd.DataFrame({'poly': [Polygon(xy) for xy in m.map]}) df_map['area_m'] = df_map['poly'].map(lambda x: x.area) #Select only the part that corresonds to the imported dataframe of data df_map = pd.concat([df_map, temp_df], axis=1, join='inner') df_map['area_km'] = df_map['area_m'] / 10000. if len(df_map.index) == len(df.index): print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' print '!! WARNING : SHAPE OF DATAFRAMES NOT CONSISTENT !!' print '!! --- check: df_map and df in mapDataPlot --- !!' print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' # Merge dataframes df_map = pd.merge(left=df_map, right=df, left_on='label', right_on='Sector', how='inner') print '... built map frame ...' ## Calculates Jenks natural breaks, over notnull column prices = df_map[df_map[colName].notnull()][colName].tolist() breaks = nb ( prices, initial=250, #number of initial solutions in iteriative Jenks algo k=num_colors ) print 'Calculting Jenks Natural breaks for binning' jenbin = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map[colName].notnull()].index) df_map = df_map.join(jenbin) df_map.jenks_bins.fillna(-1, inplace=True) # draw ward patches from polygons print 'Building Patches' df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch( x, fc='0.33', edgecolor='black', lw=.33, alpha=.9)) print 'Last touches of color, with Jenks' # Set colors using Jenks breaks ## Setup ColorMap colorm = plt.get_cmap('bwr') norm = colors.Normalize() pc = PatchCollection(df_map['patches'], match_original=True) pc.set_facecolor(colorm(norm(df_map['jenks_bins'].values))) print 'labels and scale' # Prepare the plt plot and axes plt.clf() fig = plt.figure() ax = fig.add_subplot(111, fc='w', frame_on=False) # Add a colour bar cb = colorbar_index(num_colors, colorm, shrink=0.5) cb.ax.tick_params(labelsize=6) print "get_bounds" newcoords = get_bounds(m,df_map) print newcoords # DOES'T WORK, DOES NOT ACCEPT OSGB COORDS. # m.drawmapscale( # newcoords[0], newcoords[1], # coords[0], coords[1], # 10., # barstyle='fancy', labelstyle='simple', # fillcolor1='w', fillcolor2='#555555', # fontcolor='#555555', # zorder=5) #Primitive Scale legendy = newcoords[2]*0.98 legendx0 = newcoords[0]*0.98 leglength =np.floor((newcoords[1]-newcoords[0])/4/1000) legendx1 = newcoords[0]*0.98+leglength*1000. ax.plot([legendx0,legendx1],[legendy, legendy], 'k-', lw=2) ax.text(legendx0,legendy*0.96,'0') ax.text(legendx1,legendy*0.96,str(int(leglength))) ax.add_collection(pc) print "axes and plotting!" ax.axis('auto'); ax.axis('off') #set aspect ratio to latitude-longitude read ax.set_aspect( (newcoords[1]-newcoords[0]) / (newcoords[3]-newcoords[2]) ) plt.show() return;
def computeTransportNaturalBreaks(self, rawtransfile): print("Computing Natural breaks on Transport..") myDataDownloader = DataHelper.DataDownloader() localfile = myDataDownloader.downloadFiles([config.settings['aoi']]) with fiona.open(localfile, "r") as aoi: geoms = [feature["geometry"] for feature in aoi] classfiedtranstmppath = os.path.join(self.cwd,config.settings['outputdirectory'],'tmp','classified-transport.tiff') with rasterio.open(rawtransfile) as src: profile = src.profile bands = src.read() for band in bands: b = band[(band != np.array(None)) & (np.logical_not(np.isnan(band))) ] breaks = nb(b.ravel(),k=4,initial=1) bins = breaks.bins.tolist() # bins.insert(1,-1) # add -1 to the beginning of the breaks # print bins print("Writing new Transport with Natural break classes..") with rasterio.open(rawtransfile) as src: profile = src.profile bands = src.read(masked=True) for band in bands: for x in np.nditer(band, op_flags=['readwrite']): x[...] = np.digitize(x,bins) # Reproject and write each band with rasterio.open(classfiedtranstmppath, 'w', **profile) as dst: dst.write(bands) classfiedtranspath = os.path.join(self.cwd,config.settings['outputdirectory'],'classified-transport.tiff') print("Cropping Transport..") with rasterio.open(classfiedtranstmppath) as trans_src: trans_out_image, trans_out_transform = mask(trans_src, geoms, crop=True) trans_out_meta = trans_src.meta.copy() trans_out_meta.update({"driver": "GTiff", "height": trans_out_image.shape[1], "width": trans_out_image.shape[2], "transform": trans_out_transform}) with rasterio.open(classfiedtranspath, "w", **trans_out_meta) as trans_dest: trans_dest.write(trans_out_image) TransClassification = dict([(1,2),(2,3),(3,1),(4,1)]) print("Reclassing Transport file..") finaltransevalpath = os.path.join(self.cwd,config.settings['outputdirectory'],'evals','TRANS', 'TRANS.tiff') with rasterio.open(classfiedtranspath) as transnogdhsrc: classifiedprofile = transnogdhsrc.profile classifiedbands = transnogdhsrc.read() classifiedbands1 = np.vectorize(TransClassification.get)(classifiedbands) classifiedbands2 = classifiedbands1.astype(np.float32) with rasterio.open(finaltransevalpath, 'w', **classifiedprofile) as classifieddst: classifieddst.write(classifiedbands2) print("Reclassing completed") print("...")
def GenerateMap(self, inputFile): start = time.time() #Create file which hold statistics for each inputFile (containing mean/median travel times, std, min, max etc.) self.statistics = self.createStatistics() self.basename = os.path.basename(inputFile)[:-4] AttributeParameter = self.A coords = self.coords #Format figure plt.clf() fig = plt.figure() #Picture frame for Map gs = gridspec.GridSpec(12, 12) ax = plt.subplot(gs[:,:],axisbg='w', frame_on=False) try: #Read MetropAccess-matka-aikamatriisi data in MatrixData = pd.read_csv(inputFile, sep=';') #Join data to shapefile (pandas 'merge' function) df_map = pd.merge(left=self.Y, right=MatrixData, how='outer', left_on='YKR_ID', right_on='from_id') #CLASSIFY MATRIX DATA #Replace -1 values df_map.replace(to_replace={AttributeParameter: {-1: np.nan}}, inplace=True) #Data for histogram histData = pd.DataFrame(df_map[df_map[AttributeParameter].notnull()][AttributeParameter].values) maxBin = max(df_map[df_map[AttributeParameter].notnull()][AttributeParameter].values) #.AttributeParameter.values) NoData = int(maxBin+1) NullCount = len(df_map[df_map[AttributeParameter].isnull()]) NullP = (NullCount/13230.0)*100 #Fill NoData values with maxBin+1 value df_map[AttributeParameter].fillna(NoData, inplace=True) #Manual classification if not self.Cl in ['Natural Breaks', 'Quantiles', "Fisher's Jenks"]: #Create bins for classification based on chosen classification method Manual = True if "time" in AttributeParameter: measure = "min" measure2 = "minutes" #Another string-form for summary titleMeas = "time" if self.Cl == "10 Minute Equal Intervals": #Calculate the highest class (10 minutes * Number of classes) maxClass = 10*self.Nclasses #Create 'higher than' info for the colorbar maxClassInfo = str(maxClass-10) #Create array of bins from 0 to highest class with increments of 10 bins = np.arange(10, maxClass, 10) #Add extra classes for No Data and higher than maxClass values if maxBin < maxClass: bins = list(np.append(bins, [maxClass+1, maxClass+2])) else: bins = list(np.append(bins, [maxBin, maxBin+1])) elif self.Cl == "5 Minute Equal Intervals": #Calculate the highest class (10 minutes * Number of classes) maxClass = 5*self.Nclasses #Create 'higher than' info for the colorbar maxClassInfo = str(maxClass-5) #Create array of bins from 0 to highest class with increments of 5 bins = np.arange(5, maxClass, 5) #Add extra classes for No Data and higher than maxClass values if maxBin < maxClass: bins = list(np.append(bins, [maxClass+1, maxClass+2])) else: bins = list(np.append(bins, [maxBin, maxBin+1])) elif "dist" in AttributeParameter: measure = "km" measure2 = "kilometers" titleMeas = "distance" if self.Cl == "5 Km Equal Intervals": #Calculate the highest class (5000 meters * Number of classes) maxClass = 5000*self.Nclasses #Create 'higher than' info for the colorbar maxClassInfo = str((maxClass-5000)/1000) #Create array of bins from 0 to highest class with increments of 5000 (meters) bins = np.arange(5000, maxClass, 5000) #Add extra classes for No Data and higher than maxClass values if maxBin < maxClass: bins = list(np.append(bins, [maxClass+1, maxClass+2])) else: bins = list(np.append(bins, [maxBin, maxBin+1])) elif self.Cl == "10 Km Equal Intervals": #Calculate the highest class (5000 meters * Number of classes) maxClass = 10000*self.Nclasses #Create 'higher than' info for the colorbar maxClassInfo = str((maxClass-10000)/1000) #Create array of bins from 0 to highest class with increments of 5000 (meters) bins = np.arange(0, maxClass, 10000) #Add extra classes for No Data and higher than maxClass values if maxBin < maxClass: bins = list(np.append(bins, [maxClass+1, maxClass+2])) else: bins = list(np.append(bins, [maxBin, maxBin+1])) #Classify data based on bins breaks = mc.User_Defined(df_map[df_map[AttributeParameter].notnull()][AttributeParameter], bins) else: Manual = False if self.Cl == 'Natural Breaks': breaks = nb(df_map[df_map[AttributeParameter].notnull()][AttributeParameter],initial=100, k=self.Nclasses) elif self.Cl == 'Quantiles': breaks = Quantiles(df_map[df_map[AttributeParameter].notnull()][AttributeParameter], k=self.Nclasses) elif self.Cl == "Fisher's Jenks": breaks = fj(df_map[df_map[AttributeParameter].notnull()][AttributeParameter], k=self.Nclasses) bins = list(breaks.bins) if "time" in AttributeParameter: measure = "min" measure2 = "minutes" #Another string-form for summary titleMeas = "time" maxClassInfo = str(bins[-2]) else: measure = "km" measure2 = "kilometers" titleMeas = "distance" maxClassInfo = str(bins[-2]/1000) bins.append(maxBin) bins.append(maxBin) #the notnull method lets us match indices when joining jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map[AttributeParameter].notnull()].index) df_map = df_map.join(jb) brksBins = bins[:-1]#breaks.bins[:-1] #Do not take into account NoData values if measure2 == "kilometers": #Convert meters (in data) to kilometers for legend b = [round((x/1000),0) for x in brksBins] brksBins = b del b brksCounts = breaks.counts[:-1] #Do not take into account NoData values #Check if brksCounts and brksBins dismatches --> insert 0 values if necessary (to match the counts) if len(brksBins) != len(brksCounts): dif = len(brksBins)-len(brksCounts) brksCounts = np.append(brksCounts,[0 for x in xrange(dif)]) else: dif=0 #List for measures which will be inserted to class labels measureList = [measure for x in xrange(len(brksBins))] #Class labels jenks_labels = ["%0.0f %s (%0.1f %%)" % (b, msr, (c/13230.0)*100) for b, msr, c in zip(brksBins[:-1],measureList[:-1],brksCounts[:-1])] if Manual == True: if "dist" in AttributeParameter: jenks_labels.insert(int(maxBin), '>' + maxClassInfo +' km (%0.1f %%)' % ((brksCounts[-1]/13230.0)*100)) else: jenks_labels.insert(int(maxBin), '>'+ maxClassInfo +' min (%0.1f %%)' % ((brksCounts[-1]/13230.0)*100)) jenks_labels.insert(NoData, 'NoData (%0.1f %%)' % (NullP)) #Use modified colormap ('my_colormap') - Choose here the default colormap which is used as a startpoint --> cm.YourColor'sName (eg. cm.Blues) - See available Colormaps: http://matplotlib.org/examples/color/colormaps_reference.html cmap = self.my_colormap(cm.RdYlBu, len(bins)) #Draw grid with grey outlines df_map['Grid'] = df_map['poly'].map(lambda x: PolygonPatch(x, ec='#555555', lw=.2, alpha=1., zorder=4)) #RGB color-codes can be found at http://www.rapidtables.com/web/color/RGB_Color.htm pc = PatchCollection(df_map['Grid'], match_original=True) #----------------------------- #Reclassify data to value range 0.0-1.0 (--> colorRange is 0.0-1.0) if Manual == True: colbins = np.linspace(0.0,1.0, len(bins)) colbins = colbins-0.001 colbins[0], colbins[-1] = 0.0001, 1.0 reclassification = {} for index in range(len(bins)): reclassification[index] = colbins[index] reclassification['_reclassify'] = self.reclassify reclass = [] dataList = list(df_map['jenks_bins']) for value in dataList: reclass.append(self.reclassify(reclassification, value)) df_map['jenks_binsR'] = reclass else: norm = Normalize() df_map['jenks_binsR'] = norm(df_map['jenks_bins'].values) #----------------------------- #Impose colour map onto the patch collection pc.set_facecolor(cmap(df_map['jenks_binsR'].values)) #Add colored Grid to map ax.add_collection(pc) #Add coastline to the map self.C['Polys'] = self.C['poly'].map(lambda x: PolygonPatch(x, fc='#606060', ec='#555555', lw=.25, alpha=.88, zorder=4)) #Alpha adjusts transparency, fc='facecolor', ec='edgecolor' cpc = PatchCollection(self.C['Polys'], match_original=True) ax.add_collection(cpc) #Add roads to the map for feature in self.R: xx,yy=feature.xy self.B.plot(xx,yy, linestyle='solid', color='#606060', linewidth=0.7, alpha=.6) #Add metro to the map for line in self.M: #metroLines is a shapely MultiLineString object consisting of multiple lines (is iterable) x,y=line.xy self.B.plot(x,y, color='#FF2F2F', linewidth=0.65, alpha=.4) #---------------------- #GENERATE TARGET POINT #---------------------- #Generate YKR_ID from csv name ykrID = int(self.basename.split('_')[2]) #Find index of target YKR_ID tIndex = df_map.YKR_ID[df_map.YKR_ID == ykrID].index.tolist() trow = df_map[tIndex[0]:tIndex[0]+1] targetPolygon = trow.poly centroid = targetPolygon.values[0].centroid #Get centroid of the polygon --> Returns shapely polygon point-type object self.B.plot( centroid.x,centroid.y, 'go', markersize=3, label="= Destination") #----------------------------- #LEGEND #----------------------------- #Draw a map scale self.B.drawmapscale( coords[0] + 0.47, coords[1] + 0.013, #Etäisyys vasemmalta, etäisyys alhaalta: plussataan koordinaatteihin asteissa coords[0], coords[1], #10., 10., barstyle='fancy', labelstyle='simple',yoffset=200, #yoffset determines the height of the mapscale fillcolor1='w', fillcolor2='#909090', fontsize=6, # black= #000000 fontcolor='#202020', zorder=5) #Set up title if "PT" in AttributeParameter: tMode = "public transportation" elif "Car" in AttributeParameter: tMode = "car" elif "Walk" in AttributeParameter: tMode = "walking" titleText = "Travel %s to %s (YKR-ID) \n by %s" % (titleMeas,str(ykrID),tMode) plt.figtext(.852,.735, titleText, size=9.5) #Plot copyright texts copyr = "%s MetropAccess project, University of Helsinki, 2014\nLicensed under a Creative Commons Attribution 4.0 International License" % (unichr(0xa9)) plt.figtext(.24,.078,copyr,fontsize=4.5) #---------------- #Add a colour bar #---------------- #Set arbitary location (and size) for the colorbar axColor = plt.axes([.86, .15, .016,.52]) #([DistFromLeft, DistFromBottom, Width, Height]) cb = self.colorbar_index(ncolors=len(jenks_labels), cmap=cmap, labels=jenks_labels, cax=axColor)#, shrink=0.5)#, orientation="vertical", pad=0.05,aspect=20)#,cax=cbaxes) #This is a function --> see at the beginning of the code. #, cax=cbaxes shrink=0.5, cb.ax.tick_params(labelsize=5.5) #Inform travel sum of the whole grid (i.e. centrality of the location) #Travel time if measure2 == "minutes": tMean = histData.mean().values[0] tMedian=histData.median().values[0] tMax = histData.max().values[0] tMin = histData.min().values[0] tStd=histData.std().values[0] travelSummary = "Summary:" travelMean = "Mean: %0.0f %s" % (tMean, measure2) travelMedian = "Median: %0.0f %s" % (tMedian, measure2) travelStd = "Std: %0.0f %s" % (tStd,measure2) travelRange = "Range: %0.0f-%0.0f %s" % (tMin,tMax,measure2) #Travel distance else: h = histData.values/1000 histData = pd.DataFrame(h) del h tMean = histData.mean().values[0] tMedian=histData.median().values[0] tMax = histData.max().values[0] tMin = histData.min().values[0] tStd=histData.std().values[0] travelSummary = "Summary:" travelMean = "Mean: %0.1f %s" % (tMean, measure2) travelMedian = "Median: %0.1f %s" % (tMedian, measure2) travelStd = "Std: %0.1f %s" % (tStd,measure2) travelRange = "Range: %0.1f-%0.1f %s" % (tMin,tMax,measure2) #Write information to a statistics file mInfo = "%s;%0.0f;%0.0f;%0.0f;%0.0f;%0.0f\n" % ( str(ykrID), tMean, tMedian, tStd, tMin, tMax) self.writeStatistics(mInfo) #Helper variables for moving Summary statistic texts initialPos = .58 #.15 #.44 initialXPos = .975 #.20 #.97 textSize = 5.25 split = 0.018 #Plot Travel Summary title plt.figtext(initialXPos, initialPos+split*4, travelSummary, ha='left', va='bottom', color='#404040', size=textSize, style='normal',fontweight='bold') #Plot Travel Summary mean plt.figtext(initialXPos, initialPos+split*3, travelMean,ha='left', va='bottom', size=textSize, color='b') #Plot Travel Summary median plt.figtext(initialXPos, initialPos+split*2, travelMedian,ha='left', va='bottom', size=textSize, color='r') #Plot Travel Summary Standard deviation plt.figtext(initialXPos, initialPos+split, travelStd,ha='left', va='bottom', size=textSize) #Plot Travel Summary Range plt.figtext(initialXPos, initialPos, travelRange,ha='left', va='bottom', size=textSize) #Plot Legend symbol ax.legend(bbox_to_anchor=(.97, 0.07), fontsize=5.5, frameon=False, numpoints=1) #1.265 bbox_to_anchor=(x,y) --> arbitary location for legend, more info: http://matplotlib.org/api/legend_api.html #-------------------------------------------------------- #Travel time and population (catchment areas) histograms #-------------------------------------------------------- #New axes for travel time/distance histogram ax = plt.axes([.98, .39, .16, .14], axisbg='w') #([DistFromLeft, DistFromBottom, Width, Height]) #Add histogram n, bins, patches = ax.hist(histData.values, 100, normed=False, facecolor='green', alpha=0.75, rwidth=0.5, orientation="vertical") ax.axvline(histData.median(), color='r', linestyle='solid', linewidth=1.8) ax.axvline(histData.mean(), color='b', linestyle='solid', linewidth=1.0) if measure2 == "minutes": ax.set_xlabel("t(min)", fontsize=5,labelpad=1.5) xupLim = 250 #upper limit for x-axis else: ax.set_xlabel("km", fontsize=5,labelpad=1.5) xupLim = 100 #upper limit for x-axis #Set valuelimits for axes ax.set_xlim(0,xupLim-30) if max(n) < 1000: #ymax will be set to 1000 if count of individual bin is under 1000, else 1500 yMax = 1000 else: yMax = 1600 ax.set_ylim(0,yMax) #Set histogram title plt.figtext(.975, .535, "Travel %s histogram" % titleMeas,ha='left', va='bottom', size=5.7, style='italic') #Adjust tick font sizes and set yaxis to right ax.tick_params(axis='both', direction="out",labelsize=4.5, pad=1, labelright=True,labelleft=False, top=False, left=False, color='k', length=3, width=.9) ax.xaxis.set_ticks(np.arange(0,xupLim-30,30)) gridlines = ax.get_xgridlines() gridlines.extend( ax.get_ygridlines() ) for line in gridlines: line.set_linewidth(.28) line.set_linestyle('dotted') ax.grid(True) #---------------------------------------------------- #New axes for population diagram ax = plt.axes([.98, .17, .16, .14], axisbg='w') #([DistFromLeft, DistFromBottom, Width, Height]) #Make dataframe from Ykr-population pop = pd.read_csv(self.Ypop, sep=';') #Use original Matrix without NoData values MatrixData.replace(to_replace={AttributeParameter: {-1: np.nan}}, inplace=True) #Join population information and time matrix join = pd.merge(left=MatrixData, right=pop, how='outer', left_on='from_id', right_on='YKR_ID') #Sort data by attribute parameter sorted = join.sort(columns=[AttributeParameter]) #Aggregate data by AttributeParameter aggre = pd.DataFrame(sorted.groupby(AttributeParameter).sum().Population) #Create attribute from index aggre[AttributeParameter] = aggre.index #Create cumulative population attribute aggre['cumPop'] = aggre['Population'].cumsum() #Reset index and determine AttributeParameter as float (matplotlib requires for it to work) aggre.reset_index(inplace=True, drop=True) aggre[AttributeParameter].astype(float) #print aggre[0:10] #Create filled curve plot from the cumulative population ax.fill_between(aggre.index,aggre['cumPop']/1000,0, interpolate=True, lw=1, facecolor='green', alpha=0.6) #Set valuelimits for axes ax.set_xlim(0,xupLim-50) ax.set_ylim(-10,aggre['cumPop'].max()/1000+50) gridlines = ax.get_xgridlines() gridlines.extend( ax.get_ygridlines() ) for line in gridlines: line.set_linewidth(.28) line.set_linestyle('dotted') ax.grid(True) ax.tick_params(axis='both', direction="out",labelsize=4.5, pad=1, labelright=True,labelleft=False, top=False, left=False, color='k', length=3, width=.9) ax.xaxis.set_ticks(np.arange(0,xupLim-30,30)) if measure2 == "minutes": ax.set_xlabel("t(min)", fontsize=5,labelpad=1.5) measure3 = 'minutes' else: measure3 = 'km' ax.set_xlabel("km", fontsize=5,labelpad=1.5) #Set histogram title plt.figtext(.975, .315, "Population (per 1000) reached within (x) %s" % measure3,ha='left', va='bottom', size=5.7, style='italic') #----------------------- #Save map to disk #----------------------- fig.set_size_inches(9.22, 6.35) #(Width, Height) outputPath = os.path.join(self.outputFolder, self.basename) + AttributeParameter + ".png" plt.savefig(outputPath, dpi=300, alpha=True, bbox_inches='tight') plt.close() #or plt.close('all') --> closes all figure windows end = time.time() lasted = int(end-start) return lasted except Exception as e: return e
def GenerateMap(self, inputFile): start = time.time() #Create file which hold statistics for each inputFile (containing mean/median travel times, std, min, max etc.) self.statistics = self.createStatistics() self.basename = os.path.basename(inputFile)[:-4] AttributeParameter = self.A coords = self.coords #Format figure plt.clf() fig = plt.figure() #Picture frame for Map gs = gridspec.GridSpec(12, 12) ax = plt.subplot(gs[:, :], axisbg='w', frame_on=False) try: #Read MetropAccess-matka-aikamatriisi data in MatrixData = pd.read_csv(inputFile, sep=';') #Join data to shapefile (pandas 'merge' function) df_map = pd.merge(left=self.Y, right=MatrixData, how='outer', left_on='YKR_ID', right_on='from_id') #CLASSIFY MATRIX DATA #Replace -1 values df_map.replace(to_replace={AttributeParameter: { -1: np.nan }}, inplace=True) #Data for histogram histData = pd.DataFrame( df_map[df_map[AttributeParameter].notnull()] [AttributeParameter].values) maxBin = max( df_map[df_map[AttributeParameter].notnull()] [AttributeParameter].values) #.AttributeParameter.values) NoData = int(maxBin + 1) NullCount = len(df_map[df_map[AttributeParameter].isnull()]) NullP = (NullCount / 13230.0) * 100 #Fill NoData values with maxBin+1 value df_map[AttributeParameter].fillna(NoData, inplace=True) #Manual classification if not self.Cl in [ 'Natural Breaks', 'Quantiles', "Fisher's Jenks" ]: #Create bins for classification based on chosen classification method Manual = True if "time" in AttributeParameter: measure = "min" measure2 = "minutes" #Another string-form for summary titleMeas = "time" if self.Cl == "10 Minute Equal Intervals": #Calculate the highest class (10 minutes * Number of classes) maxClass = 10 * self.Nclasses #Create 'higher than' info for the colorbar maxClassInfo = str(maxClass - 10) #Create array of bins from 0 to highest class with increments of 10 bins = np.arange(10, maxClass, 10) #Add extra classes for No Data and higher than maxClass values if maxBin < maxClass: bins = list( np.append(bins, [maxClass + 1, maxClass + 2])) else: bins = list(np.append(bins, [maxBin, maxBin + 1])) elif self.Cl == "5 Minute Equal Intervals": #Calculate the highest class (10 minutes * Number of classes) maxClass = 5 * self.Nclasses #Create 'higher than' info for the colorbar maxClassInfo = str(maxClass - 5) #Create array of bins from 0 to highest class with increments of 5 bins = np.arange(5, maxClass, 5) #Add extra classes for No Data and higher than maxClass values if maxBin < maxClass: bins = list( np.append(bins, [maxClass + 1, maxClass + 2])) else: bins = list(np.append(bins, [maxBin, maxBin + 1])) elif "dist" in AttributeParameter: measure = "km" measure2 = "kilometers" titleMeas = "distance" if self.Cl == "5 Km Equal Intervals": #Calculate the highest class (5000 meters * Number of classes) maxClass = 5000 * self.Nclasses #Create 'higher than' info for the colorbar maxClassInfo = str((maxClass - 5000) / 1000) #Create array of bins from 0 to highest class with increments of 5000 (meters) bins = np.arange(5000, maxClass, 5000) #Add extra classes for No Data and higher than maxClass values if maxBin < maxClass: bins = list( np.append(bins, [maxClass + 1, maxClass + 2])) else: bins = list(np.append(bins, [maxBin, maxBin + 1])) elif self.Cl == "10 Km Equal Intervals": #Calculate the highest class (5000 meters * Number of classes) maxClass = 10000 * self.Nclasses #Create 'higher than' info for the colorbar maxClassInfo = str((maxClass - 10000) / 1000) #Create array of bins from 0 to highest class with increments of 5000 (meters) bins = np.arange(0, maxClass, 10000) #Add extra classes for No Data and higher than maxClass values if maxBin < maxClass: bins = list( np.append(bins, [maxClass + 1, maxClass + 2])) else: bins = list(np.append(bins, [maxBin, maxBin + 1])) #Classify data based on bins breaks = mc.User_Defined( df_map[df_map[AttributeParameter].notnull()] [AttributeParameter], bins) else: Manual = False if self.Cl == 'Natural Breaks': breaks = nb(df_map[df_map[AttributeParameter].notnull()] [AttributeParameter], initial=100, k=self.Nclasses) elif self.Cl == 'Quantiles': breaks = Quantiles( df_map[df_map[AttributeParameter].notnull()] [AttributeParameter], k=self.Nclasses) elif self.Cl == "Fisher's Jenks": breaks = fj(df_map[df_map[AttributeParameter].notnull()] [AttributeParameter], k=self.Nclasses) bins = list(breaks.bins) if "time" in AttributeParameter: measure = "min" measure2 = "minutes" #Another string-form for summary titleMeas = "time" maxClassInfo = str(bins[-2]) else: measure = "km" measure2 = "kilometers" titleMeas = "distance" maxClassInfo = str(bins[-2] / 1000) bins.append(maxBin) bins.append(maxBin) #the notnull method lets us match indices when joining jb = pd.DataFrame( {'jenks_bins': breaks.yb}, index=df_map[df_map[AttributeParameter].notnull()].index) df_map = df_map.join(jb) brksBins = bins[: -1] #breaks.bins[:-1] #Do not take into account NoData values if measure2 == "kilometers": #Convert meters (in data) to kilometers for legend b = [round((x / 1000), 0) for x in brksBins] brksBins = b del b brksCounts = breaks.counts[: -1] #Do not take into account NoData values #Check if brksCounts and brksBins dismatches --> insert 0 values if necessary (to match the counts) if len(brksBins) != len(brksCounts): dif = len(brksBins) - len(brksCounts) brksCounts = np.append(brksCounts, [0 for x in xrange(dif)]) else: dif = 0 #List for measures which will be inserted to class labels measureList = [measure for x in xrange(len(brksBins))] #Class labels jenks_labels = [ "%0.0f %s (%0.1f %%)" % (b, msr, (c / 13230.0) * 100) for b, msr, c in zip(brksBins[:-1], measureList[:-1], brksCounts[:-1]) ] if Manual == True: if "dist" in AttributeParameter: jenks_labels.insert( int(maxBin), '>' + maxClassInfo + ' km (%0.1f %%)' % ((brksCounts[-1] / 13230.0) * 100)) else: jenks_labels.insert( int(maxBin), '>' + maxClassInfo + ' min (%0.1f %%)' % ((brksCounts[-1] / 13230.0) * 100)) jenks_labels.insert(NoData, 'NoData (%0.1f %%)' % (NullP)) #Use modified colormap ('my_colormap') - Choose here the default colormap which is used as a startpoint --> cm.YourColor'sName (eg. cm.Blues) - See available Colormaps: http://matplotlib.org/examples/color/colormaps_reference.html cmap = self.my_colormap(cm.RdYlBu, len(bins)) #Draw grid with grey outlines df_map['Grid'] = df_map['poly'].map( lambda x: PolygonPatch( x, ec='#555555', lw=.2, alpha=1., zorder=4) ) #RGB color-codes can be found at http://www.rapidtables.com/web/color/RGB_Color.htm pc = PatchCollection(df_map['Grid'], match_original=True) #----------------------------- #Reclassify data to value range 0.0-1.0 (--> colorRange is 0.0-1.0) if Manual == True: colbins = np.linspace(0.0, 1.0, len(bins)) colbins = colbins - 0.001 colbins[0], colbins[-1] = 0.0001, 1.0 reclassification = {} for index in range(len(bins)): reclassification[index] = colbins[index] reclassification['_reclassify'] = self.reclassify reclass = [] dataList = list(df_map['jenks_bins']) for value in dataList: reclass.append(self.reclassify(reclassification, value)) df_map['jenks_binsR'] = reclass else: norm = Normalize() df_map['jenks_binsR'] = norm(df_map['jenks_bins'].values) #----------------------------- #Impose colour map onto the patch collection pc.set_facecolor(cmap(df_map['jenks_binsR'].values)) #Add colored Grid to map ax.add_collection(pc) #Add coastline to the map self.C['Polys'] = self.C['poly'].map(lambda x: PolygonPatch( x, fc='#606060', ec='#555555', lw=.25, alpha=.88, zorder=4 )) #Alpha adjusts transparency, fc='facecolor', ec='edgecolor' cpc = PatchCollection(self.C['Polys'], match_original=True) ax.add_collection(cpc) #Add roads to the map for feature in self.R: xx, yy = feature.xy self.B.plot(xx, yy, linestyle='solid', color='#606060', linewidth=0.7, alpha=.6) #Add metro to the map for line in self.M: #metroLines is a shapely MultiLineString object consisting of multiple lines (is iterable) x, y = line.xy self.B.plot(x, y, color='#FF2F2F', linewidth=0.65, alpha=.4) #---------------------- #GENERATE TARGET POINT #---------------------- #Generate YKR_ID from csv name ykrID = int(self.basename.split('_')[2]) #Find index of target YKR_ID tIndex = df_map.YKR_ID[df_map.YKR_ID == ykrID].index.tolist() trow = df_map[tIndex[0]:tIndex[0] + 1] targetPolygon = trow.poly centroid = targetPolygon.values[ 0].centroid #Get centroid of the polygon --> Returns shapely polygon point-type object self.B.plot(centroid.x, centroid.y, 'go', markersize=3, label="= Destination") #----------------------------- #LEGEND #----------------------------- #Draw a map scale self.B.drawmapscale( coords[0] + 0.47, coords[1] + 0.013, #Etäisyys vasemmalta, etäisyys alhaalta: plussataan koordinaatteihin asteissa coords[0], coords[1], #10., 10., barstyle='fancy', labelstyle='simple', yoffset=200, #yoffset determines the height of the mapscale fillcolor1='w', fillcolor2='#909090', fontsize=6, # black= #000000 fontcolor='#202020', zorder=5) #Set up title if "PT" in AttributeParameter: tMode = "public transportation" elif "Car" in AttributeParameter: tMode = "car" elif "Walk" in AttributeParameter: tMode = "walking" titleText = "Travel %s to %s (YKR-ID) \n by %s" % ( titleMeas, str(ykrID), tMode) plt.figtext(.852, .735, titleText, size=9.5) #Plot copyright texts copyr = "%s MetropAccess project, University of Helsinki, 2014\nLicensed under a Creative Commons Attribution 4.0 International License" % ( unichr(0xa9)) plt.figtext(.24, .078, copyr, fontsize=4.5) #---------------- #Add a colour bar #---------------- #Set arbitary location (and size) for the colorbar axColor = plt.axes( [.86, .15, .016, .52]) #([DistFromLeft, DistFromBottom, Width, Height]) cb = self.colorbar_index( ncolors=len(jenks_labels), cmap=cmap, labels=jenks_labels, cax=axColor ) #, shrink=0.5)#, orientation="vertical", pad=0.05,aspect=20)#,cax=cbaxes) #This is a function --> see at the beginning of the code. #, cax=cbaxes shrink=0.5, cb.ax.tick_params(labelsize=5.5) #Inform travel sum of the whole grid (i.e. centrality of the location) #Travel time if measure2 == "minutes": tMean = histData.mean().values[0] tMedian = histData.median().values[0] tMax = histData.max().values[0] tMin = histData.min().values[0] tStd = histData.std().values[0] travelSummary = "Summary:" travelMean = "Mean: %0.0f %s" % (tMean, measure2) travelMedian = "Median: %0.0f %s" % (tMedian, measure2) travelStd = "Std: %0.0f %s" % (tStd, measure2) travelRange = "Range: %0.0f-%0.0f %s" % (tMin, tMax, measure2) #Travel distance else: h = histData.values / 1000 histData = pd.DataFrame(h) del h tMean = histData.mean().values[0] tMedian = histData.median().values[0] tMax = histData.max().values[0] tMin = histData.min().values[0] tStd = histData.std().values[0] travelSummary = "Summary:" travelMean = "Mean: %0.1f %s" % (tMean, measure2) travelMedian = "Median: %0.1f %s" % (tMedian, measure2) travelStd = "Std: %0.1f %s" % (tStd, measure2) travelRange = "Range: %0.1f-%0.1f %s" % (tMin, tMax, measure2) #Write information to a statistics file mInfo = "%s;%0.0f;%0.0f;%0.0f;%0.0f;%0.0f\n" % ( str(ykrID), tMean, tMedian, tStd, tMin, tMax) self.writeStatistics(mInfo) #Helper variables for moving Summary statistic texts initialPos = .58 #.15 #.44 initialXPos = .975 #.20 #.97 textSize = 5.25 split = 0.018 #Plot Travel Summary title plt.figtext(initialXPos, initialPos + split * 4, travelSummary, ha='left', va='bottom', color='#404040', size=textSize, style='normal', fontweight='bold') #Plot Travel Summary mean plt.figtext(initialXPos, initialPos + split * 3, travelMean, ha='left', va='bottom', size=textSize, color='b') #Plot Travel Summary median plt.figtext(initialXPos, initialPos + split * 2, travelMedian, ha='left', va='bottom', size=textSize, color='r') #Plot Travel Summary Standard deviation plt.figtext(initialXPos, initialPos + split, travelStd, ha='left', va='bottom', size=textSize) #Plot Travel Summary Range plt.figtext(initialXPos, initialPos, travelRange, ha='left', va='bottom', size=textSize) #Plot Legend symbol ax.legend( bbox_to_anchor=(.97, 0.07), fontsize=5.5, frameon=False, numpoints=1 ) #1.265 bbox_to_anchor=(x,y) --> arbitary location for legend, more info: http://matplotlib.org/api/legend_api.html #-------------------------------------------------------- #Travel time and population (catchment areas) histograms #-------------------------------------------------------- #New axes for travel time/distance histogram ax = plt.axes( [.98, .39, .16, .14], axisbg='w') #([DistFromLeft, DistFromBottom, Width, Height]) #Add histogram n, bins, patches = ax.hist(histData.values, 100, normed=False, facecolor='green', alpha=0.75, rwidth=0.5, orientation="vertical") ax.axvline(histData.median(), color='r', linestyle='solid', linewidth=1.8) ax.axvline(histData.mean(), color='b', linestyle='solid', linewidth=1.0) if measure2 == "minutes": ax.set_xlabel("t(min)", fontsize=5, labelpad=1.5) xupLim = 250 #upper limit for x-axis else: ax.set_xlabel("km", fontsize=5, labelpad=1.5) xupLim = 100 #upper limit for x-axis #Set valuelimits for axes ax.set_xlim(0, xupLim - 30) if max( n ) < 1000: #ymax will be set to 1000 if count of individual bin is under 1000, else 1500 yMax = 1000 else: yMax = 1600 ax.set_ylim(0, yMax) #Set histogram title plt.figtext(.975, .535, "Travel %s histogram" % titleMeas, ha='left', va='bottom', size=5.7, style='italic') #Adjust tick font sizes and set yaxis to right ax.tick_params(axis='both', direction="out", labelsize=4.5, pad=1, labelright=True, labelleft=False, top=False, left=False, color='k', length=3, width=.9) ax.xaxis.set_ticks(np.arange(0, xupLim - 30, 30)) gridlines = ax.get_xgridlines() gridlines.extend(ax.get_ygridlines()) for line in gridlines: line.set_linewidth(.28) line.set_linestyle('dotted') ax.grid(True) #---------------------------------------------------- #New axes for population diagram ax = plt.axes( [.98, .17, .16, .14], axisbg='w') #([DistFromLeft, DistFromBottom, Width, Height]) #Make dataframe from Ykr-population pop = pd.read_csv(self.Ypop, sep=';') #Use original Matrix without NoData values MatrixData.replace(to_replace={AttributeParameter: { -1: np.nan }}, inplace=True) #Join population information and time matrix join = pd.merge(left=MatrixData, right=pop, how='outer', left_on='from_id', right_on='YKR_ID') #Sort data by attribute parameter sorted = join.sort(columns=[AttributeParameter]) #Aggregate data by AttributeParameter aggre = pd.DataFrame( sorted.groupby(AttributeParameter).sum().Population) #Create attribute from index aggre[AttributeParameter] = aggre.index #Create cumulative population attribute aggre['cumPop'] = aggre['Population'].cumsum() #Reset index and determine AttributeParameter as float (matplotlib requires for it to work) aggre.reset_index(inplace=True, drop=True) aggre[AttributeParameter].astype(float) #print aggre[0:10] #Create filled curve plot from the cumulative population ax.fill_between(aggre.index, aggre['cumPop'] / 1000, 0, interpolate=True, lw=1, facecolor='green', alpha=0.6) #Set valuelimits for axes ax.set_xlim(0, xupLim - 50) ax.set_ylim(-10, aggre['cumPop'].max() / 1000 + 50) gridlines = ax.get_xgridlines() gridlines.extend(ax.get_ygridlines()) for line in gridlines: line.set_linewidth(.28) line.set_linestyle('dotted') ax.grid(True) ax.tick_params(axis='both', direction="out", labelsize=4.5, pad=1, labelright=True, labelleft=False, top=False, left=False, color='k', length=3, width=.9) ax.xaxis.set_ticks(np.arange(0, xupLim - 30, 30)) if measure2 == "minutes": ax.set_xlabel("t(min)", fontsize=5, labelpad=1.5) measure3 = 'minutes' else: measure3 = 'km' ax.set_xlabel("km", fontsize=5, labelpad=1.5) #Set histogram title plt.figtext(.975, .315, "Population (per 1000) reached within (x) %s" % measure3, ha='left', va='bottom', size=5.7, style='italic') #----------------------- #Save map to disk #----------------------- fig.set_size_inches(9.22, 6.35) #(Width, Height) outputPath = os.path.join( self.outputFolder, self.basename) + AttributeParameter + ".png" plt.savefig(outputPath, dpi=300, alpha=True, bbox_inches='tight') plt.close() #or plt.close('all') --> closes all figure windows end = time.time() lasted = int(end - start) return lasted except Exception as e: return e
watershed_with_rivers = sjoin(watershed, river, how='inner', op='intersects') watershedSumbyRiver = watershed_with_rivers.groupby([ "HYBAS_ID", ]).agg(dict(length="sum")).reset_index() watershed['riverlength'] = watershed.HYBAS_ID.map( watershedSumbyRiver.set_index('HYBAS_ID')['length'].to_dict()) # replace NaN with a very small number watershed['riverlength'].fillna(1, inplace=True) watershed['density'] = watershed.apply(lambda row: (row.area / row.riverlength), axis=1) ii = watershed.as_matrix(['density']) breaks = nb(ii.ravel(), k=3, initial=1) digitizedbins = np.digitize(watershed.density, bins=breaks.bins.tolist()) watershed['areatype'] = digitizedbins watershed['areatype'] = watershed['areatype'].map({ 3: 'green', 2: 'green2', 1: 'green2', 0: 'green3' }) inter = geopandas.overlay(watershed, aoi, how='intersection') with open(outputgeojson, 'w') as f: f.write(inter.to_json())
sum(df.loc[lambda df: (df.county == county['NAME_TAG'])]['hours']) for county in clean_counties_info ] }) # Create Point objects in map coordinates from dataframe lon and lat values map_points = pd.Series([ Point(m(mapped_x, mapped_y)) for mapped_x, mapped_y in zip(df['lon'], df['lat']) ]) rec_points = MultiPoint(list(map_points.values)) counties_polygon = prep(MultiPolygon(list(df_map['poly'].values))) county_points = filter(counties_polygon.contains, rec_points) # Calculate Jenks natural breaks for density breaks = nb(df_map[df_map['hours'].notnull()].hours.values, initial=300, k=6) # the notnull method lets us match indices when joining jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map['hours'].notnull()].index) df_map = df_map.join(jb) df_map.jenks_bins.fillna(-1, inplace=True) labels = ['No recording' ] + ["> %d hours" % (perc) for perc in breaks.bins[:-1]] plt.clf() fig = plt.figure() ax = fig.add_subplot(111, axisbg='w', frame_on=False) cmap = plt.get_cmap('Blues')
thecount += 1 try: myDataFrame = pd.DataFrame({"TheData": myArray}) #La fonction "GetParameterAsText" invite l'utilisateur de nommer le fichier géographique # ("feature class" ou "fc") sur lequel les opérations vont commencer. #Ce script utilise "IQH_FINAL" comme le champ des données sur lequel les opérations vont # commencer ("field"). #La calculation utilise les progiciels arcpy.da (analyse des données) et numpy. #Nonobstant que le tableau numérique consiste en nombres entiers, le tableau est # transformé au format de point flottant ("float") parce que le progiciel PySAL # a besoin de cette transformation pour l'intégrer avec la fonction KMEANS. #L'iteration "for-if-else" trie les valeurs "null" des vraies données, et après cette # sortation, les données sont transferées en format de cadre des données pandas. print "Calcul des Jenks natural breaks..." breaks = nb(myDataFrame["TheData"].dropna().values,k=4,initial=20) #Le calcul des valeurs Jenks est produit par le progiciel pysal. Tous les valeurs # "null" sont sortis, et les données qui restent sont préparées pour l'analyse. #La paramètre k symbolise le nombre des classes la fonction Jenks va créer pour # l'utilisateur. #La paramètre initial est le semence de la fonction Jenks. Un valeur grand va # converger la fonction plus vite; un valeur petit, d'autre part, va être plus exact. print "Vérification s'il y avait calculs précédents des champs de valeurs Jenks..." try: arcpy.DeleteField_management(fc, "Jenks") print "Calculs précédents des champs de valeurs Jenks effacés..." except Exception as e: print "Aucuns champs des valeurs Jenks trouvés..." #Cette iteration "try-except" efface les calculs précédents s'ils existent. Si un champ
# ######################### # MAPS # ######################### dict_df_areas['c_insee'] = df_com dict_titles = {'nb_stores' : 'Nb of stores', 'store_surface' : 'Cumulated store surface'} # todo: generalize nb of stores and store surface (or other loop?) for area, df_area in dict_df_areas.items(): df_area_temp = df_area[~pd.isnull(df_area['poly'])].copy() for field in ['nb_stores', 'store_surface']: # Calculate Jenks natural breaks for density breaks = nb(df_area_temp[df_area_temp[field].notnull()][field].values, initial=300, k=5) # zero excluded from natural breaks... specific class with val -1 (added later) df_area_temp.replace(to_replace={field: {0: np.nan}}, inplace=True) # the notnull method lets us match indices when joining jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_area_temp[df_area_temp[field].notnull()].index) # need to drop duplicate index in jb, todo: check why need area here (MI?) jb = jb.reset_index().drop_duplicates(subset=[area], take_last=True).set_index(area) # propagated to all rows in df_com with same index df_area_temp['jenks_bins'] = jb['jenks_bins'] df_area_temp.jenks_bins.fillna(-1, inplace=True)