def country(from_key='name', to_key='iso'): """Creates and returns a mapper function to access country data. The mapper function that is returned must be called with one argument. In the default case you call it with a name and it returns a 3-letter ISO_3166-1 code, e. g. called with ``Spain`` it would return ``ESP``. :param from_key: (optional) the country attribute you give as input. Defaults to ``name``. :param to_key: (optional) the country attribute you want as output. Defaults to ``iso``. :return: mapper :rtype: function """ gc = GeonamesCache() dataset = gc.get_dataset_by_key(gc.get_countries(), from_key) def mapper(input): # For country name inputs take the names mapping into account. if 'name' == from_key: input = mappings.country_names.get(input, input) # If there is a record return the demanded attribute. item = dataset.get(input) if item: return item[to_key] return mapper
def get_countries_by_continent(cls, continent): gc = GeonamesCache() continent = [ v for k, v in gc.get_continents().items() if v['toponymName'] == continent ][0] return continent['cc2'].split(',')
def plot_world_chloropleth(datafile, dest, colorscale, bins, nodatacolor='#dddddd', scale=1, projection='robin', resolution='l', usecol='Magnitude', inputkwargs={}): """Format: CSV with 'Country Name', 'Country Code', and 'Magnitude' columns.""" # See http://ramiro.org/notebook/basemap-choropleth/ shapefile = 'ne_10m_admin_0_countries_lakes/ne_10m_admin_0_countries_lakes' num_colors = len(bins) - 1 gc = GeonamesCache() iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys()) df = pd.read_csv(datafile, **inputkwargs) df.set_index('Country Code', inplace=True) df = df.reindex( iso3_codes) #.dropna() # Filter out non-countries and missing values. values = df[usecol] # https://matplotlib.org/api/pyplot_summary.html#matplotlib.pyplot.colormaps cm = plt.get_cmap(colorscale) scheme = [cm(i / num_colors) for i in range(num_colors)] scheme.append(nodatacolor) df['bin'] = np.digitize(values, bins) - 1 df.sort_values('bin', ascending=False).head(10) # This doesn't work, is it important? # mpl.style.use('map') fig = plt.figure(figsize=(default_size * scale, default_size * scale)) ax = fig.add_subplot(111, facecolor='w', frame_on=False) m = Basemap(lon_0=0, projection=projection, resolution=resolution) m.drawmapboundary(linewidth=default_map_linewidth * scale, color='w') m.readshapefile(shapefile, 'units', color='#444444', linewidth=default_border_linewidth * scale) for info, shape in zip(m.units_info, m.units): iso3 = info['ADM0_A3'] if iso3 not in df.index: color = nodatacolor else: color = scheme[df.loc[iso3]['bin']] patches = [Polygon(np.array(shape), True)] pc = PatchCollection(patches) pc.set_facecolor(color) ax.add_collection(pc) plt.savefig(dest, bbox_inches='tight')
def payment_region(self): gc = GeonamesCache() continent = gc.get_countries()[self.country]['continentcode'] if continent == 'EU': return 'EU' elif continent[-1] == 'A' and self.country not in ['US', 'CA']: return 'LA' return 'US'
def country(from_key='name', to_key='iso'): gc = GeonamesCache() dataset = gc.get_dataset_by_key(gc.get_countries(), from_key) def mapper(key): if 'name' == from_key and key in mappings.country_names: key = mappings.country_names[key] item = dataset.get(key) if item: return item[to_key] return mapper
class GeonamesCacheTestSuite(unittest.TestCase): """GeonamesCache test cases.""" def setUp(self): self.geonamescache = GeonamesCache() def test_continents(self): continents = self.geonamescache.get_continents() testdata = (('AF', 'Africa'), ('AN', 'Antarctica'), ('AS', 'Asia'), ('EU', 'Europe'), ('NA', 'North America'), ('OC', 'Oceania'), ('SA', 'South America')) for code, name in testdata: self.assertTrue(code in continents) self.assertEqual(name, continents[code]['name']) for code in ['XX', 'OO']: self.assertTrue(code not in continents) def test_get_countries(self): countries = self.geonamescache.get_countries() testdata = (('ES', 'Spain'), ('FR', 'France'), ('US', 'United States')) for code, name in testdata: self.assertTrue(code in countries) self.assertEqual(name, countries[code]['name']) for code in ['XX', 'OO']: self.assertTrue(code not in countries) def test_us_states(self): us_states = self.geonamescache.get_us_states() testdata = (('NM', 'New Mexico'), ('CA', 'California'), ('NV', 'Nevada')) for code, name in testdata: self.assertTrue(code in us_states) self.assertEqual(name, us_states[code]['name']) for code in ['XX', 'OO']: self.assertTrue(code not in us_states) def test_get_countries_by_names(self): # Length of get_countries_by_names dict and get_countries dict must be # the same, unless country names wouldn't be unique. self.assertTrue(len(self.geonamescache.get_countries_by_names()), len(self.geonamescache.get_countries())) def test_get_cities_by_name(self): cities = self.geonamescache.get_cities() for gid, name in (('3191316', 'Samobor'), ('3107112', 'Rivas-Vaciamadrid')): self.assertEqual(name, cities[gid]['name']) def test_get_cities_by_name_madrid(self): self.assertEqual(2, len(self.geonamescache.get_cities_by_name('Madrid'))) def test_us_counties_len(self): # Make sure there are 3235 counties, which includes Puerto Rico etc. us_counties = self.geonamescache.get_us_counties() self.assertEqual(3235, len(us_counties))
def geo_match2(location_names): """ This function match US city names with corresponding coordinates, basically the same as coordinates_converting.py, check it for comments and description :param location_names: str, content of input file :return: str """ output = {} gc = GeonamesCache() state_dic_abbr = gc.get_us_states() new_data = { 'DC': [38.895, -77.0366667], 'St. Paul': [44.9537, -93.0900] } with open("city_loca.json", 'r') as f2: for line in f2: datum = json.loads(line) if datum['CityNameAccented'] not in new_data: new_data[datum['CityNameAccented']] = [ datum['Latitude'], datum['Longitude'] ] for i in location_names: for name in location_names[i]: if name in new_data: output[i] = new_data[name] break else: continue if i not in new_data: full_state_name = '' for name in location_names[i]: if name in state_dic_abbr: full_state_name = state_dic_abbr[name]['name'] else: if name in capital_dic: full_state_name = name if full_state_name: tmp0 = capital_dic[full_state_name] try: output[i] = new_data[tmp0] break except: continue # print(output) return output
# -*- coding: utf-8 -*- # This is by far the ugliest Python script I ever wrote # withoud cities 14954 location string remain unresovled # with unique city names 6345 (number is not up to date) location string remain unresovled # with largest city name 4623 location string remain unresovled # with manually resolved locations 3333 location string remain unresovled import csv, json, re from geonamescache import GeonamesCache from loclists import check_unresolved unresolved_locations = [] commits_by_countries = {} countries_by_locstr = {} gc = GeonamesCache() countries = gc.get_countries() countries_by_names = gc.get_countries_by_names() us_states = gc.get_us_states() us_states_by_names = gc.get_us_states_by_names() re_ignore = re.compile(r'[\.\(\)\d-]') re_ws = re.compile(r'\s{2,}') def test_locs(locs): for loc in locs: loc = loc.strip().lower() loctitle = loc.title() locupper = loc.upper() if loc in countries_by_names: return loc
#!/usr/bin/env python # -*- coding: utf-8 -*- import pandas as pd from geonamescache import GeonamesCache gc = GeonamesCache() countries = gc.get_countries() df = pd.DataFrame.from_dict(countries, orient='index') df.to_csv('geonamescache/countries.csv', index=False)
def setUp(self): self.geonamescache = GeonamesCache()
class GeonamesCacheTestSuite(unittest.TestCase): """GeonamesCache test cases.""" def setUp(self): self.geonamescache = GeonamesCache() def test_continents(self): continents = self.geonamescache.get_continents() testdata = ( ('AF', 'Africa'), ('AN', 'Antarctica'), ('AS', 'Asia'), ('EU', 'Europe'), ('NA', 'North America'), ('OC', 'Oceania'), ('SA', 'South America') ) for code, name in testdata: self.assertTrue(code in continents) self.assertEqual(name, continents[code]['name']) for code in ['XX', 'OO']: self.assertTrue(code not in continents) def test_get_countries(self): countries = self.geonamescache.get_countries() testdata = (('ES', 'Spain'), ('FR', 'France'), ('US', 'United States')) for code, name in testdata: self.assertTrue(code in countries) self.assertEqual(name, countries[code]['name']) for code in ['XX', 'OO']: self.assertTrue(code not in countries) def test_us_states(self): us_states = self.geonamescache.get_us_states() testdata = ( ('NM', 'New Mexico'), ('CA', 'California'), ('NV', 'Nevada')) for code, name in testdata: self.assertTrue(code in us_states) self.assertEqual(name, us_states[code]['name']) for code in ['XX', 'OO']: self.assertTrue(code not in us_states) def test_get_countries_by_names(self): # Length of get_countries_by_names dict and get_countries dict must be # the same, unless country names wouldn't be unique. self.assertTrue(len(self.geonamescache.get_countries_by_names()), len(self.geonamescache.get_countries())) def test_get_cities_by_name(self): cities = self.geonamescache.get_cities() for gid, name in (('3191316', 'Samobor'), ('3107112', 'Rivas-Vaciamadrid')): self.assertEqual(name, cities[gid]['name']) def test_get_cities_by_name_madrid(self): self.assertEqual( 2, len(self.geonamescache.get_cities_by_name('Madrid'))) def test_us_counties_len(self): # Make sure there are 3235 counties, which includes Puerto Rico etc. us_counties = self.geonamescache.get_us_counties() self.assertEqual(3234, len(us_counties))
#!/usr/bin/env python # -*- coding: utf-8 -*- import argparse import simplemapplot import pandas as pd from geonamescache import GeonamesCache parser = argparse.ArgumentParser( description='Create a Choropleth world map without a legend.') parser.add_argument('file', help='CSV data file') args = parser.parse_args() colors = ["#ffffff", '#ff0000'] countries = GeonamesCache().get_countries_by_names() df = pd.read_csv(args.file) country_data = df['Country'] colorize = {} for name in country_data: colorize[countries[name]['iso'].lower()] = 1 simplemapplot.make_world_country_map(data=colorize, colors=colors)
def plotChoropleth(filename,imgfile,figNum): shapefile = 'data/ne/ne_10m_admin_0_countries' cols = ['CC', 'DISCON'] num_colors = 20 gc = GeonamesCache() iso_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso').keys()) df = pd.read_csv(filename, skiprows=0, usecols=cols) df.set_index('CC', inplace=True) df = df.ix[iso_codes].dropna() # Filter out non-countries and missing values. values = df['DISCON'] cm = plt.get_cmap('Reds') scheme = [cm(float(i) / num_colors) for i in range(num_colors)] #bins = np.linspace(values.min(), values.max(), num_colors) bins = np.linspace(0, 1, num_colors) df['bin'] = np.digitize(values, bins) - 1 df.sort_values('bin', ascending=False)#.head(10) #print(df) mpl.style.use('seaborn-pastel') print('Plotting Figure {0}: {1}'.format(figNum,imgfile)) fig = plt.figure(figNum,figsize=(22, 12)) ax = fig.add_subplot(111, axisbg='w', frame_on=False) #plt.title('Disco Choropleth', fontsize=20)#, y=.95) m = Basemap(lon_0=0, projection='robin') m.drawmapboundary(color='w') m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2) for info, shape in zip(m.units_info, m.units): #iso = info['ADM0_A3'] iso = info['ISO_A2'] #print(iso) try: if iso not in df.index: color = '#dddddd' else: color = scheme[int(df.ix[iso]['bin'])] except TypeError: print(iso) traceback.print_exc() patches = [Polygon(np.array(shape), True)] pc = PatchCollection(patches) pc.set_facecolor(color) ax.add_collection(pc) # Cover up Antarctica so legend can be placed over it. ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2) # Draw color legend. ax_legend = fig.add_axes([0.35, 0.14, 0.3, 0.03], zorder=3) cmap = mpl.colors.ListedColormap(scheme) cb = mpl.colorbar.ColorbarBase(ax_legend, cmap=cmap, ticks=bins, boundaries=bins, orientation='horizontal') cb.ax.set_xticklabels([str(round(i, 2)) for i in bins],rotation='80') # Set the map footer. #plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction') plt.savefig(imgfile, bbox_inches='tight', pad_inches=.2)
def plotChoropleth(filename, imgfile, figNum): shapefile = 'data/ne/ne_10m_admin_0_countries' cols = ['CC', 'DISCON'] num_colors = 20 gc = GeonamesCache() iso_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso').keys()) df = pd.read_csv(filename, skiprows=0, usecols=cols) df.set_index('CC', inplace=True) df = df.ix[iso_codes].dropna( ) # Filter out non-countries and missing values. values = df['DISCON'] cm = plt.get_cmap('Reds') scheme = [cm(float(i) / num_colors) for i in range(num_colors)] #bins = np.linspace(values.min(), values.max(), num_colors) bins = np.linspace(0, 1, num_colors) df['bin'] = np.digitize(values, bins) - 1 df.sort_values('bin', ascending=False) #.head(10) #print(df) mpl.style.use('seaborn-pastel') print('Plotting Figure {0}: {1}'.format(figNum, imgfile)) fig = plt.figure(figNum, figsize=(22, 12)) ax = fig.add_subplot(111, axisbg='w', frame_on=False) #plt.title('Disco Choropleth', fontsize=20)#, y=.95) m = Basemap(lon_0=0, projection='robin') m.drawmapboundary(color='w') m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2) for info, shape in zip(m.units_info, m.units): #iso = info['ADM0_A3'] iso = info['ISO_A2'] #print(iso) try: if iso not in df.index: color = '#dddddd' else: color = scheme[int(df.ix[iso]['bin'])] except TypeError: print(iso) traceback.print_exc() patches = [Polygon(np.array(shape), True)] pc = PatchCollection(patches) pc.set_facecolor(color) ax.add_collection(pc) # Cover up Antarctica so legend can be placed over it. ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2) # Draw color legend. ax_legend = fig.add_axes([0.35, 0.14, 0.3, 0.03], zorder=3) cmap = mpl.colors.ListedColormap(scheme) cb = mpl.colorbar.ColorbarBase(ax_legend, cmap=cmap, ticks=bins, boundaries=bins, orientation='horizontal') cb.ax.set_xticklabels([str(round(i, 2)) for i in bins], rotation='80') # Set the map footer. #plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction') plt.savefig(imgfile, bbox_inches='tight', pad_inches=.2)
def geo_match2(location_names): """ This function match US city names with corresponding coordinates :param location_names: str, content of input file :return: str """ def data_preprocess(data): """ this function preprocess the input data, change the format and datatype of timestamp, and add multiple location names into a single list :param data: json dict :return: json dict """ data_formal = {} for i in data: time = datetime.datetime.strptime(i[:19], "%Y-%m-%dT%H:%M:%S") # the pattern of timestamp could vary for different data source data_formal[str(time)] = [j[0] for j in data[i][0]] return data_formal location_names = data_preprocess(location_names) output = {} # load the coordinates items into a dictionary called new_data bad_items = [] gc = GeonamesCache() state_dic_abbr = gc.get_us_states() new_data = {'DC': [38.895, -77.0366667], 'St. Paul': [44.9537, -93.0900], 'Temcula': [33.4936, -117.1484]} # You can manually input some corner cases with open("city_loca.json", 'r') as f2: # load coordinates from data source for line in f2: datum = json.loads(line) if datum['CityNameAccented'] not in new_data: new_data[datum['CityNameAccented']] = [datum['Latitude'], datum['Longitude']] # traverse through the extracted location names for i in location_names: s = len(output) for name in location_names[i]: if name: new = name.split(' ') # split by space else: new = [] name = '' for j in range(len(new)): if new[j] and new[j] != ' ': new[j] = new[j][0].upper() + new[j][1:] # capitalize the word name += new[j] if j != len(new) - 1: name += ' ' if name in new_data: # deal common cases output[i] = new_data[name] break if name.split(' ')[-1] in state_list: # deal with situation like "New York NY" separator = ' ' name_city = separator.join(name.split(' ')[:-1]) if name_city in new_data: output[i] = new_data[name_city] break if name.split(' ')[0] in directions: # deal with situation like "South west NY" separator = ' ' name_city = separator.join(name.split(' ')[1:]) if name_city in new_data: output[i] = new_data[name_city] break else: continue if i not in output: # if the above method failed to match coordinates full_state_name = '' for name in location_names[i]: if name: new = name.split(' ') else: new = [] name = '' for j in range(len(new)): if new[j] and new[j] != ' ': new[j] = new[j][0].upper() + new[j][1:] name += new[j] if j != len(new) - 1: name += ' ' if name in state_dic_abbr: full_state_name = state_dic_abbr[name]['name'] # use the state name instead else: if name in capital_dic: full_state_name = name if full_state_name: tmp0 = capital_dic[full_state_name] # use capital city to match coordinates try: output[i] = new_data[tmp0] break except: continue e = len(output) if s == e: bad_items.append((i, location_names[i])) # record the location names that can't be converted print(bad_items) with open('coordinates_IstheServicedown_' + 'Verizon' + '.json', 'w') as outfile: json.dump(output, outfile) return 'done'
""" Introduction to Web Science Assignment 5 Question 3 Team : golf Script used to extract data from the article-per-line file and process it to finaly write it in a csv file """ import pandas as pd from geonamescache import GeonamesCache from geonamescache.mappers import country gc = GeonamesCache() # we use the GeonamesCache to get the name of countries # creating a mapper between the iso3 code and the country name mapper = country(from_key='name', to_key='iso3') countries = list(gc.get_dataset_by_key( gc.get_countries(), 'name', ).keys()) # for the US we are going to use the states states = list(gc.get_us_states_by_names()) #print(countries) # any of these key words could indicate that we are reading about a star key_words = ['movie', 'film', 'TV', 'television', 'actor', 'actress'] articles = [] dataset = {} with open('article-per-line.txt', 'r', encoding="utf8") as f: articles = f.read().splitlines()
# Builtin Python modules import csv from pathlib import Path import re import string from typing import Dict, List, Set import unicodedata from geonamescache import GeonamesCache from more_itertools import partitions import pandas as pd from helper import Cities, Countries, City, Country # Cities helper_cities = Cities("./data/cities.json", GeonamesCache()) group_cities_by_word_count: Dict[int, List[str]] = helper_cities.load_json() cities: Dict[str, City] = helper_cities.get_cities() # Countries helper_countries = Countries("./data/countries.json", GeonamesCache()) group_countries_by_word_count: Dict[int, List[str]] = helper_countries.load_json() countries: Dict[str, Country] = helper_countries.get_countries() text_file: Path = Path('./data/headlines.txt') assert text_file.is_file(), f"Wrong file: {text_file}" unique_cities: Set[str] = set() map_country_code_to_city: Dict[str, List[str]] = {}
#!/usr/bin/env python # -*- coding: utf-8 -*- import os import subprocess from geonamescache import GeonamesCache gc = GeonamesCache() for iso2, country in gc.get_countries().items(): iso3 = country['iso3'] geojson = 'ogr2ogr -f GeoJSON -where "ADM0_A3 IN (\'{0}\')" units.json ../shp/ne_10m_admin_1_states_provinces_lakes.shp' subprocess.call(geojson.format(iso3), shell=True) topojson = '../node_modules/topojson/bin/topojson --simplify-proportion .08 --id-property fips -p name=name -o {0}.json units.json' subprocess.call(topojson.format(iso3), shell=True) os.unlink('units.json') subprocess.call('mv *.json ../src/topojson/countries/', shell=True)
filename = 'csv/ag.lnd.frst.zs_Indicator_en_csv_v2/ag.lnd.frst.zs_Indicator_en_csv_v2.csv' shapefile = 'shp/countries/ne_10m_admin_0_countries_lakes' num_colors = 9 year = '2012' cols = ['Country Name', 'Country Code', year] title = 'Forest area as percentage of land area in {}'.format(year) imgfile = 'img/{}.png'.format(slug(title)) description = ''' Forest area is land under natural or planted stands of trees of at least 5 meters in situ, whether productive or not, and excludes tree stands in agricultural production systems (for example, in fruit plantationsand agroforestry systems) and trees in urban parks and gardens. Countries without data are shown in grey. Data: World Bank - worldbank.org • Author: Ramiro Gómez - ramiro.org'''.strip( ) gc = GeonamesCache() iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys()) df = pd.read_csv(filename, skiprows=4, usecols=cols) df.set_index('Country Code', inplace=True) df = df.ix[iso3_codes].dropna() # Filter out non-countries and missing values. values = df[year] cm = plt.get_cmap('Greens') scheme = [cm(i / num_colors) for i in range(num_colors)] bins = np.linspace(values.min(), values.max(), num_colors) df['bin'] = np.digitize(values, bins) - 1 df.sort_values('bin', ascending=False).head(10) mpl.style.use('map') fig = plt.figure(figsize=(22, 12))
def main(): fy.setup_fiscal_calendar(start_month=9) now = fy.FiscalDateTime.now() start_date = now.prev_quarter.start.strftime('%Y-%m-%d') end_date = now.prev_quarter.end.strftime('%Y-%m-%d') print(start_date) print(end_date) parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Retrieve Google Analytics data.") parser.add_argument("-d", "--debug", help="Enable debugging messages", action="store_true") parser.add_argument("output_file", metavar="OUTPUT_FILE", nargs="?", default="sessions.csv", help="Output CSV file") parser.add_argument("-s", "--start-date", default=start_date, help="Start date") parser.add_argument("-e", "--end-date", default=end_date, help="End date") args = parser.parse_args() if args.start_date != start_date: args.start_date = parse_date(args.start_date) print(args.start_date) if args.end_date != end_date: args.end_date = parse_date(args.end_date) print(args.end_date) pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) # pd.set_option('display.float_format', '{:,.0f}'.format) gc = GeonamesCache() global countries countries = gc.get_countries() countries['ZZ'] = {'iso3': 'ZZZ'} mapper = country(from_key='iso', to_key='iso3') scope = ['https://www.googleapis.com/auth/analytics.readonly'] # Authenticate and construct service. service = get_service('analytics', 'v3', scope, 'client_secrets.json') profile_ids = get_profile_ids(service) pprint.pprint(profile_ids) total = pd.DataFrame() for profile_id in profile_ids: results = get_results(service, profile_id, args.start_date, args.end_date) df = create_dataframe(results) with pd.option_context('display.max_rows', None, 'display.max_columns', None): print(df) total = total.add(df, fill_value=0) total.index = [conv_iso_2_to_3(i) for i in total.index] total.index.name = 'iso3' total.columns = [re.sub(r'^ga:', '', col) for col in total.columns] set_int(total) total.to_csv(args.output_file)
def world(data, area_col, value_col, area_to_code=False, bin_mode='linear', log=None, title='', value_col_to_title=True, descripton='', num_colors=9, palette='Reds', filter_outlier=False): '''WORLD MAP PLOT Takes in data where one column is the values and another is either country or 3-alphabet country code according to ISO standard. USE === world(data=emission, area_col='area', value_col=1999, area_to_code=True, title='Emission Intensity of Food Production (co2/kg)', value_col_to_title=False, palette='Reds', num_colors=9) area_col :: the column where is either country name or country code value_col :: the column with the values area_to_code :: must be True if area is not 3-alphabet code num_colors :: the number of colors to be used to describe intensity value_to_col_title :: Useful when the column is a year and you want to have it shown in the title. log :: if true will use log values instead. Only works when bin_mode is linear. ''' from geonamescache import GeonamesCache from ..utils.country_code import country_to_code data = data.copy(deep=True) if filter_outlier == True: data = outliers(data, value_col) if value_col_to_title == True: title = title + ' {}'.format(value_col) descripton.strip() if area_to_code == True: data[area_col] = data[area_col].apply(country_to_code) data.set_index(area_col, inplace=True) if data.index.name != area_col: data.set_index(area_col, inplace=True) # filter data based on geo codes gc = GeonamesCache() iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys()) data = data.loc[iso3_codes] data = data[~data[value_col].isna()] data[value_col] = data[value_col].astype(int) # set plot stuff values = data[value_col].dropna() if log == True: values = np.log1p(values) data[value_col] = np.log(data[value_col]) if bin_mode == 'linear': bins = np.linspace(values.min(), values.max(), num_colors) elif bin_mode == 'quantile': bins = np.nanpercentile(values, np.arange(0, 100, num_colors)) cm = plt.get_cmap(palette) scheme = [cm(i / num_colors) for i in range(num_colors)] # create the bin column data['temp'] = pd.cut(data[value_col], bins) cat_columns = data.select_dtypes(['category']).columns data['bin'] = data[cat_columns].apply(lambda x: x.cat.codes) data.drop('temp', axis=1, inplace=True) p = plt.figure(figsize=(17, 12)) p.patch.set_facecolor('white') ax = p.add_subplot(111, frame_on=False) p.suptitle(title, color='grey', weight='bold', fontsize=26, y=.85) try: from mpl_toolkits.basemap import Basemap m = Basemap(lon_0=0, projection='robin') except ImportError: raise MissingImport( "Install Basemap >> pip install git+https://github.com/matplotlib/basemap.git" ) m.drawmapboundary(color='w') m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2) for info, shape in zip(m.units_info, m.units): iso3 = info['ADM0_A3'] if iso3 not in data.index: color = '#dddddd' else: color = scheme[data.loc[iso3]['bin'].astype(int)] patches = [Polygon(np.array(shape), True)] pc = PatchCollection(patches) pc.set_facecolor(color) ax.add_collection(pc) # Cover up Antarctica so legend can be placed over it. ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2) # Draw color legend. ax_legend = p.add_axes([0.35, 0.24, 0.3, 0.03], zorder=3) cmap = mpl.colors.ListedColormap(scheme) cb = mpl.colorbar.ColorbarBase(ax_legend, cmap=cmap, ticks=bins, boundaries=bins, orientation='horizontal') cb.ax.set_xticklabels([str(round(i, 1)) for i in bins], rotation=45, ha='right') plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction') _thousand_sep(p, ax)
def plot_us_chloropleth(datafile, dest, colorscale, bins, nodatacolor='#dddddd', scale=1, resolution='l', usecol='Magnitude', inputkwargs={}): """Format: CSV with 'Geography', 'Geoid', and 'Magnitude' columns.""" shapefile = 'cb_2017_us_county_500k/cb_2017_us_county_500k' num_colors = len(bins) - 1 gc = GeonamesCache() # iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys()) df = pd.read_csv(datafile, **inputkwargs) geoid_lookup = lookup.geoids if 'Geoid' not in df: geoids = np.empty(df.shape[0], dtype=np.uint32) for index, row in df.iterrows(): if "Geography" in row: geography = row['Geography'] else: state = row['State'] county = row['County'] geography = '%s, %s' % (county, state) state, county, suffix = parse_geography(geography) if suffix and (state, '%s%s' % (county, suffix)) in geoid_lookup: geoids[index] = geoid_lookup[(state, '%s%s' % (county, suffix))] else: geoids[index] = geoid_lookup[(state, county)] df.insert(0, 'Geoid', geoids) df.set_index('Geoid', inplace=True) # df = df.loc[iso3_codes].dropna() # Filter out non-countries and missing values. values = df[usecol] # https://matplotlib.org/api/pyplot_summary.html#matplotlib.pyplot.colormaps cm = plt.get_cmap(colorscale) scheme = [cm(i / num_colors) for i in range(num_colors)] df['bin'] = np.digitize(values, bins) - 1 df.sort_values('bin', ascending=False).head(10) # This doesn't work, is it important? # mpl.style.use('map') fig = plt.figure(figsize=(default_size * scale, default_size * scale)) grid = gs.GridSpec(nrows=10, ncols=10) for lon_0, lat_0, gridpos, llcrnrlon, llcrnrlat, urcrnrlon, urcrnrlat in \ [(-98.5795, 39.828, grid[:-2, :], -121, 22, -64, 47), # Contiguous US (-160, 63.5, grid[-4:, :6], -185.3, 49, -116, 65.5), # Alaska (-158, 21, grid[-3:, 6:], -161, 18, -154, 23)]: # Hawaii m = Basemap(lon_0=lon_0, lat_0=lat_0, projection='ortho', resolution=resolution) ax = fig.add_subplot(gridpos, facecolor='#00000000', frame_on=False) m.readshapefile(shapefile, 'units', color='#444444', linewidth=default_border_linewidth * scale) for info, shape in zip(m.units_info, m.units): geoid = int(info['GEOID']) if geoid in equivalencies and geoid not in df.index: geoid = equivalencies[geoid] if geoid not in df.index: color = nodatacolor else: color = scheme[df.loc[geoid]['bin']] patches = [Polygon(np.array(shape), True)] pc = PatchCollection(patches) pc.set_facecolor(color) ax.add_collection(pc) xmin, ymin = m(llcrnrlon, llcrnrlat) xmax, ymax = m(urcrnrlon, urcrnrlat) ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) plt.savefig(dest, bbox_inches='tight')
def __init__(self, parent=None, width=4, height=3, view_option=ViewOption.QUERY): # Figure表示一个图,axes相当于子图,一个fig可以有多个axes,在这里只有一个 self.fig = Figure(figsize=(width, height), dpi=100) FigureCanvas.__init__(self, self.fig) self.setParent(parent) self.axes = self.fig.add_subplot(111) # 子图 self.point = None # 把查询到的点集初始化为空,方便后续处理 # top-k相关的类 self.top_k = TopK() # 以下三个列表只在查询视图中有效,为了实现查询的子集索引到csv文件索引的映射 self.index_list = [] # 储存经纬度所对应的csv文件中的索引 self.lon_list = [] self.lat_list = [] # 距离查询相关的类 self.radius = Radius() # 视图选项 self.view_option = view_option # 查询视图的map if self.view_option == ViewOption.QUERY: # 地图 self.m = Basemap(ax=self.axes, projection='mill', area_thresh=10000, llcrnrlat=-65, llcrnrlon=-180, urcrnrlat=80, urcrnrlon=180, resolution='c') self.m.fillcontinents(color='#DEDEDE', lake_color='#DEDEDE', zorder=0.1) self.m.drawcoastlines(linewidth=0.2, color='k') self.m.drawcountries(linewidth=0.5, color='k') self.m.drawmapboundary(fill_color='#A0CFDF') # 选中某个点后显示的信息提示框 self.annot = self.axes.annotate("", xy=(0, 0), xytext=(-50, 20), textcoords="offset points", bbox=dict(boxstyle="round", fc="w"), arrowprops=dict(arrowstyle="->")) self.annot.set_visible(False) # 更新信息提示框 def update_annot(ind): index = self.index_list[ind["ind"][0]] # 这里返回的是csv中对应经纬度的索引 pos = self.point.get_offsets()[ind["ind"][0]] self.annot.xy = pos text = marker_label[index] # 设置提示文本 self.annot.set_text(text) self.annot.get_bbox_patch().set_alpha(0.8) # 鼠标移动到某个点的响应函数 def hover(event): if event.inaxes == self.axes and self.point is not None: cont, ind = self.point.contains(event) if cont: update_annot(ind) self.annot.set_visible(True) self.fig.canvas.draw_idle() else: vis = self.annot.get_visible() if vis: self.annot.set_visible(False) self.fig.canvas.draw_idle() # 绑定响应函数 self.fig.canvas.mpl_connect("motion_notify_event", hover) self.axes.set_title('店铺查询') self.axes.title.set_y(1.05) # 时区视图的map if self.view_option == ViewOption.TIMEZONE: self.m = Basemap(ax=self.axes, projection='mill', area_thresh=10000, llcrnrlat=-65, llcrnrlon=-180, urcrnrlat=80, urcrnrlon=180, resolution='c') self.m.fillcontinents(color='#DEDEDE', lake_color='#DEDEDE', zorder=0.1) self.m.drawcoastlines(linewidth=0.2, color='k') self.m.drawcountries(linewidth=0.5, color='k') self.m.drawmapboundary(fill_color='#A0CFDF') zone_dict = {} label = list() xpt, ypt = self.m(lon, lat) # 把经纬度转换为x, y坐标,因为图像输出需要用到坐标 for item in timezone: tz = item.split()[0] if not zone_dict.__contains__(tz): zone_dict[tz] = 1 else: zone_dict[tz] += 1 for item in timezone: tz = item.split()[0] count = zone_dict[tz] # label对应colormap的索引 if count < 1500: label.append(0) elif count < 3000: label.append(1) elif count < 4500: label.append(2) else: label.append(3) def colormap(): return mpl.colors.LinearSegmentedColormap.from_list( 'cmap', ['#FB7C5C', '#F6563E', '#E42F28', '#C3161A'], 256) self.point = self.m.scatter(xpt, ypt, marker='o', s=3, c=label, cmap=colormap(), zorder=1) self.point.set_visible(True) self.annot = self.axes.annotate("", xy=(0, 0), xytext=(-50, 20), textcoords="offset points", bbox=dict(boxstyle="round", fc="w"), arrowprops=dict(arrowstyle="->")) self.annot.set_visible(False) def update_annot(ind): index = ind['ind'][0] pos = self.point.get_offsets()[ind["ind"][0]] self.annot.xy = pos text = marker_label[index] self.annot.set_text(text) self.annot.get_bbox_patch().set_alpha(0.8) def hover(event): vis = self.annot.get_visible() if event.inaxes == self.axes: cont, ind = self.point.contains(event) if cont: update_annot(ind) self.annot.set_visible(True) self.fig.canvas.draw_idle() else: if vis: self.annot.set_visible(False) self.fig.canvas.draw_idle() self.fig.canvas.mpl_connect("motion_notify_event", hover) bounds = [0, 1500, 3000, 4500, 6000] norm = mpl.colors.BoundaryNorm(bounds, colormap().N) ax_cbar = self.fig.add_axes([0.3, 0.17, 0.4, 0.02]) cbar = mpl.colorbar.ColorbarBase(ax_cbar, cmap=colormap(), norm=norm, spacing='uniform', ticks=bounds, boundaries=bounds, orientation='horizontal') cbar.outline.set_linewidth(0.2) cbar.ax.tick_params(labelsize=8, labelcolor='#666666') self.axes.set_title('时区分布') self.axes.title.set_y(1.05) # 密度视图的map if self.view_option == ViewOption.DESTINY: gnc = GeonamesCache() countries = gnc.get_countries() counts = stb_file['Country'].value_counts() country_dict = {} for k, v in counts.iteritems(): # print(k, round(v/countries[k]['areakm2']*1000000)) country_dict[countries[k]['iso3']] = round( v / countries[k]['areakm2'] * 1000000) self.m = Basemap(ax=self.axes, projection='mill', llcrnrlat=-65, llcrnrlon=-180, urcrnrlat=80, urcrnrlon=180, resolution='c') self.m.fillcontinents(color='#DEDEDE', lake_color='#DEDEDE', zorder=0) # m.drawcoastlines(linewidth=0.2, color='k') self.m.drawcountries(linewidth=0.5, color='k') self.m.drawmapboundary(fill_color='#A0CFDF') shapefile = 'ne_110m_admin_0_countries/ne_110m_admin_0_countries' # setup color bar color_num = 5 cmap = mpl.cm.get_cmap('Reds') color_range = [ cmap(i / (color_num + 1)) for i in range(color_num + 1) ] bounds = [0, 10, 100, 1000, 10000, 100000] norm = mpl.colors.BoundaryNorm(bounds, cmap.N) def colormap(): return mpl.colors.LinearSegmentedColormap.from_list( 'cmap', ['#FBA083', '#FB7C5C', '#F6563E', '#E42F28', '#C3161A'], 256) # Read shapefile self.m.readshapefile(shapefile, 'units', color='#DDDDDD', linewidth=0.1) # Add patches for info, shape in zip(self.m.units_info, self.m.units): code = info['ADM0_A3'] if not country_dict.__contains__(code): color = '#DDDDDD' else: if code == 'CHN' or code == 'TWN': color = '#F6563E' elif country_dict[code] < 10: color = '#FBA083' elif country_dict[code] < 100: color = '#FB7C5C' elif country_dict[code] < 1000: color = '#F6563E' elif country_dict[code] < 10000: color = '#E42F28' else: color = '#C3161A' patches = [Polygon(np.array(shape), True)] pc = PatchCollection(patches, facecolor=color, edgecolor='None', linewidth=0) self.axes.add_collection(pc) ax_cbar = self.fig.add_axes([0.3, 0.15, 0.4, 0.02]) cbar = mpl.colorbar.ColorbarBase(ax_cbar, cmap=colormap(), norm=norm, spacing='uniform', ticks=bounds, boundaries=bounds, orientation='horizontal') cbar.outline.set_linewidth(0.2) cbar.ax.tick_params(labelsize=8, labelcolor='#666666') self.axes.set_title('密度分布') self.axes.title.set_y(1.05)
def plot_us_state_chloropleth(datafile, dest, colorscale, bins, nodatacolor='#dddddd', scale=1, resolution='l', usecol='Magnitude', inputkwargs={}): """Format: CSV with 'Geography', 'AFFGEOID', and 'Magnitude' columns.""" shapefile = 'cb_2017_us_state_500k/cb_2017_us_state_500k' num_colors = len(bins) - 1 gc = GeonamesCache() df = pd.read_csv(datafile, **inputkwargs) df.set_index('AFFGEOID', inplace=True) values = df[usecol] # https://matplotlib.org/api/pyplot_summary.html#matplotlib.pyplot.colormaps cm = plt.get_cmap(colorscale) scheme = [cm(i / num_colors) for i in range(num_colors)] df['bin'] = np.digitize(values, bins) - 1 df.sort_values('bin', ascending=False).head(10) fig = plt.figure(figsize=(default_size * scale, default_size * scale)) grid = gs.GridSpec(nrows=10, ncols=10) for lon_0, lat_0, gridpos, llcrnrlon, llcrnrlat, urcrnrlon, urcrnrlat in \ [(-98.5795, 39.828, grid[:-2, :], -121, 22, -64, 47), # Contiguous US (-160, 63.5, grid[-4:, :6], -185.3, 49, -116, 65.5), # Alaska (-158, 21, grid[-3:, 6:], -161, 18, -154, 23)]: # Hawaii m = Basemap(lon_0=lon_0, lat_0=lat_0, projection='ortho', resolution=resolution) ax = fig.add_subplot(gridpos, facecolor='#00000000', frame_on=False) m.readshapefile(shapefile, 'units', color='#444444', linewidth=default_border_linewidth * scale) for info, shape in zip(m.units_info, m.units): geoid = info['AFFGEOID'] if geoid in equivalencies and geoid not in df.index: geoid = equivalencies[geoid] if geoid not in df.index: color = nodatacolor else: color = scheme[df.loc[geoid]['bin']] patches = [Polygon(np.array(shape), True)] pc = PatchCollection(patches) pc.set_facecolor(color) ax.add_collection(pc) xmin, ymin = m(llcrnrlon, llcrnrlat) xmax, ymax = m(urcrnrlon, urcrnrlat) ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) plt.savefig(dest, bbox_inches='tight')