Example #1
0
def country(from_key='name', to_key='iso'):
    """Creates and returns a mapper function to access country data.

    The mapper function that is returned must be called with one argument. In
    the default case you call it with a name and it returns a 3-letter
    ISO_3166-1 code, e. g. called with ``Spain`` it would return ``ESP``.

    :param from_key: (optional) the country attribute you give as input.
        Defaults to ``name``.
    :param to_key: (optional) the country attribute you want as output.
        Defaults to ``iso``.
    :return: mapper
    :rtype: function
    """

    gc = GeonamesCache()
    dataset = gc.get_dataset_by_key(gc.get_countries(), from_key)

    def mapper(input):
        # For country name inputs take the names mapping into account.
        if 'name' == from_key:
            input = mappings.country_names.get(input, input)
        # If there is a record return the demanded attribute.
        item = dataset.get(input)
        if item:
            return item[to_key]

    return mapper
Example #2
0
    def get_countries_by_continent(cls, continent):
        gc = GeonamesCache()
        continent = [
            v for k, v in gc.get_continents().items()
            if v['toponymName'] == continent
        ][0]

        return continent['cc2'].split(',')
Example #3
0
def plot_world_chloropleth(datafile,
                           dest,
                           colorscale,
                           bins,
                           nodatacolor='#dddddd',
                           scale=1,
                           projection='robin',
                           resolution='l',
                           usecol='Magnitude',
                           inputkwargs={}):
    """Format: CSV with 'Country Name', 'Country Code', and 'Magnitude' columns."""

    # See http://ramiro.org/notebook/basemap-choropleth/
    shapefile = 'ne_10m_admin_0_countries_lakes/ne_10m_admin_0_countries_lakes'
    num_colors = len(bins) - 1

    gc = GeonamesCache()
    iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys())

    df = pd.read_csv(datafile, **inputkwargs)
    df.set_index('Country Code', inplace=True)
    df = df.reindex(
        iso3_codes)  #.dropna() # Filter out non-countries and missing values.

    values = df[usecol]
    # https://matplotlib.org/api/pyplot_summary.html#matplotlib.pyplot.colormaps
    cm = plt.get_cmap(colorscale)
    scheme = [cm(i / num_colors) for i in range(num_colors)]
    scheme.append(nodatacolor)
    df['bin'] = np.digitize(values, bins) - 1
    df.sort_values('bin', ascending=False).head(10)

    # This doesn't work, is it important?
    # mpl.style.use('map')
    fig = plt.figure(figsize=(default_size * scale, default_size * scale))

    ax = fig.add_subplot(111, facecolor='w', frame_on=False)

    m = Basemap(lon_0=0, projection=projection, resolution=resolution)
    m.drawmapboundary(linewidth=default_map_linewidth * scale, color='w')

    m.readshapefile(shapefile,
                    'units',
                    color='#444444',
                    linewidth=default_border_linewidth * scale)
    for info, shape in zip(m.units_info, m.units):
        iso3 = info['ADM0_A3']
        if iso3 not in df.index:
            color = nodatacolor
        else:
            color = scheme[df.loc[iso3]['bin']]

        patches = [Polygon(np.array(shape), True)]
        pc = PatchCollection(patches)
        pc.set_facecolor(color)
        ax.add_collection(pc)

    plt.savefig(dest, bbox_inches='tight')
Example #4
0
    def payment_region(self):
        gc = GeonamesCache()
        continent = gc.get_countries()[self.country]['continentcode']

        if continent == 'EU':
            return 'EU'
        elif continent[-1] == 'A' and self.country not in ['US', 'CA']:
            return 'LA'

        return 'US'
def country(from_key='name', to_key='iso'):
    gc = GeonamesCache()
    dataset = gc.get_dataset_by_key(gc.get_countries(), from_key)

    def mapper(key):
        if 'name' == from_key and key in mappings.country_names:
            key = mappings.country_names[key]
        item = dataset.get(key)
        if item:
            return item[to_key]

    return mapper
Example #6
0
class GeonamesCacheTestSuite(unittest.TestCase):
    """GeonamesCache test cases."""
    def setUp(self):
        self.geonamescache = GeonamesCache()

    def test_continents(self):
        continents = self.geonamescache.get_continents()
        testdata = (('AF', 'Africa'), ('AN', 'Antarctica'), ('AS', 'Asia'),
                    ('EU', 'Europe'), ('NA', 'North America'),
                    ('OC', 'Oceania'), ('SA', 'South America'))
        for code, name in testdata:
            self.assertTrue(code in continents)
            self.assertEqual(name, continents[code]['name'])

        for code in ['XX', 'OO']:
            self.assertTrue(code not in continents)

    def test_get_countries(self):
        countries = self.geonamescache.get_countries()

        testdata = (('ES', 'Spain'), ('FR', 'France'), ('US', 'United States'))
        for code, name in testdata:
            self.assertTrue(code in countries)
            self.assertEqual(name, countries[code]['name'])

        for code in ['XX', 'OO']:
            self.assertTrue(code not in countries)

    def test_us_states(self):
        us_states = self.geonamescache.get_us_states()

        testdata = (('NM', 'New Mexico'), ('CA', 'California'), ('NV',
                                                                 'Nevada'))
        for code, name in testdata:
            self.assertTrue(code in us_states)
            self.assertEqual(name, us_states[code]['name'])

        for code in ['XX', 'OO']:
            self.assertTrue(code not in us_states)

    def test_get_countries_by_names(self):
        # Length of get_countries_by_names dict and get_countries dict must be
        # the same, unless country names wouldn't be unique.
        self.assertTrue(len(self.geonamescache.get_countries_by_names()),
                        len(self.geonamescache.get_countries()))

    def test_get_cities_by_name(self):
        cities = self.geonamescache.get_cities()
        for gid, name in (('3191316', 'Samobor'), ('3107112',
                                                   'Rivas-Vaciamadrid')):
            self.assertEqual(name, cities[gid]['name'])

    def test_get_cities_by_name_madrid(self):
        self.assertEqual(2,
                         len(self.geonamescache.get_cities_by_name('Madrid')))

    def test_us_counties_len(self):
        # Make sure there are 3235 counties, which includes Puerto Rico etc.
        us_counties = self.geonamescache.get_us_counties()
        self.assertEqual(3235, len(us_counties))
Example #7
0
 def geo_match2(location_names):
     """
     This function match US city names with corresponding coordinates, basically the same as
     coordinates_converting.py, check it for comments and description
     :param location_names: str, content of input file
     :return: str
     """
     output = {}
     gc = GeonamesCache()
     state_dic_abbr = gc.get_us_states()
     new_data = {
         'DC': [38.895, -77.0366667],
         'St. Paul': [44.9537, -93.0900]
     }
     with open("city_loca.json", 'r') as f2:
         for line in f2:
             datum = json.loads(line)
             if datum['CityNameAccented'] not in new_data:
                 new_data[datum['CityNameAccented']] = [
                     datum['Latitude'], datum['Longitude']
                 ]
     for i in location_names:
         for name in location_names[i]:
             if name in new_data:
                 output[i] = new_data[name]
                 break
             else:
                 continue
         if i not in new_data:
             full_state_name = ''
             for name in location_names[i]:
                 if name in state_dic_abbr:
                     full_state_name = state_dic_abbr[name]['name']
                 else:
                     if name in capital_dic:
                         full_state_name = name
                 if full_state_name:
                     tmp0 = capital_dic[full_state_name]
                     try:
                         output[i] = new_data[tmp0]
                         break
                     except:
                         continue
     # print(output)
     return output
# -*- coding: utf-8 -*-
# This is by far the ugliest Python script I ever wrote
# withoud cities 14954 location string remain unresovled
# with unique city names 6345 (number is not up to date) location string remain unresovled
# with largest city name 4623 location string remain unresovled
# with manually resolved locations 3333 location string remain unresovled

import csv, json, re
from geonamescache import GeonamesCache
from loclists import check_unresolved

unresolved_locations = []
commits_by_countries = {}
countries_by_locstr = {}
gc = GeonamesCache()
countries = gc.get_countries()
countries_by_names = gc.get_countries_by_names()
us_states = gc.get_us_states()
us_states_by_names = gc.get_us_states_by_names()

re_ignore = re.compile(r'[\.\(\)\d-]')
re_ws = re.compile(r'\s{2,}')


def test_locs(locs):
    for loc in locs:
        loc = loc.strip().lower()
        loctitle = loc.title()
        locupper = loc.upper()
        if loc in countries_by_names:
            return loc
Example #9
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pandas as pd
from geonamescache import GeonamesCache

gc = GeonamesCache()
countries = gc.get_countries()
df = pd.DataFrame.from_dict(countries, orient='index')
df.to_csv('geonamescache/countries.csv', index=False)
Example #10
0
 def setUp(self):
     self.geonamescache = GeonamesCache()
Example #11
0
class GeonamesCacheTestSuite(unittest.TestCase):
    """GeonamesCache test cases."""

    def setUp(self):
        self.geonamescache = GeonamesCache()

    def test_continents(self):
        continents = self.geonamescache.get_continents()
        testdata = (
            ('AF', 'Africa'),
            ('AN', 'Antarctica'),
            ('AS', 'Asia'),
            ('EU', 'Europe'),
            ('NA', 'North America'),
            ('OC', 'Oceania'),
            ('SA', 'South America')
        )
        for code, name in testdata:
            self.assertTrue(code in continents)
            self.assertEqual(name, continents[code]['name'])

        for code in ['XX', 'OO']:
            self.assertTrue(code not in continents)

    def test_get_countries(self):
        countries = self.geonamescache.get_countries()

        testdata = (('ES', 'Spain'), ('FR', 'France'), ('US', 'United States'))
        for code, name in testdata:
            self.assertTrue(code in countries)
            self.assertEqual(name, countries[code]['name'])

        for code in ['XX', 'OO']:
            self.assertTrue(code not in countries)

    def test_us_states(self):
        us_states = self.geonamescache.get_us_states()

        testdata = (
            ('NM', 'New Mexico'), ('CA', 'California'), ('NV', 'Nevada'))
        for code, name in testdata:
            self.assertTrue(code in us_states)
            self.assertEqual(name, us_states[code]['name'])

        for code in ['XX', 'OO']:
            self.assertTrue(code not in us_states)

    def test_get_countries_by_names(self):
        # Length of get_countries_by_names dict and get_countries dict must be
        # the same, unless country names wouldn't be unique.
        self.assertTrue(len(self.geonamescache.get_countries_by_names()),
                        len(self.geonamescache.get_countries()))

    def test_get_cities_by_name(self):
        cities = self.geonamescache.get_cities()
        for gid, name in (('3191316', 'Samobor'), ('3107112', 'Rivas-Vaciamadrid')):
            self.assertEqual(name, cities[gid]['name'])

    def test_get_cities_by_name_madrid(self):
        self.assertEqual(
            2, len(self.geonamescache.get_cities_by_name('Madrid')))

    def test_us_counties_len(self):
        # Make sure there are 3235 counties, which includes Puerto Rico etc.
        us_counties = self.geonamescache.get_us_counties()
        self.assertEqual(3234, len(us_counties))
Example #12
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import simplemapplot
import pandas as pd
from geonamescache import GeonamesCache

parser = argparse.ArgumentParser(
    description='Create a Choropleth world map without a legend.')
parser.add_argument('file', help='CSV data file')
args = parser.parse_args()

colors = ["#ffffff", '#ff0000']
countries = GeonamesCache().get_countries_by_names()

df = pd.read_csv(args.file)
country_data = df['Country']

colorize = {}
for name in country_data:
    colorize[countries[name]['iso'].lower()] = 1

simplemapplot.make_world_country_map(data=colorize, colors=colors)
Example #13
0
def plotChoropleth(filename,imgfile,figNum):
    shapefile = 'data/ne/ne_10m_admin_0_countries'
    cols = ['CC', 'DISCON']
    num_colors = 20
    gc = GeonamesCache()
    iso_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso').keys())
    df = pd.read_csv(filename, skiprows=0, usecols=cols)
    df.set_index('CC', inplace=True)
    df = df.ix[iso_codes].dropna() # Filter out non-countries and missing values.
    values = df['DISCON']
    cm = plt.get_cmap('Reds')
    scheme = [cm(float(i) / num_colors) for i in range(num_colors)]
    #bins = np.linspace(values.min(), values.max(), num_colors)
    bins = np.linspace(0, 1, num_colors)
    df['bin'] = np.digitize(values, bins) - 1
    df.sort_values('bin', ascending=False)#.head(10)

    #print(df)

    mpl.style.use('seaborn-pastel')
    print('Plotting Figure {0}: {1}'.format(figNum,imgfile))
    fig = plt.figure(figNum,figsize=(22, 12))

    ax = fig.add_subplot(111, axisbg='w', frame_on=False)
    #plt.title('Disco Choropleth', fontsize=20)#, y=.95)

    m = Basemap(lon_0=0, projection='robin')
    m.drawmapboundary(color='w')

    m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2)
    for info, shape in zip(m.units_info, m.units):
        #iso = info['ADM0_A3']
        iso = info['ISO_A2']
        #print(iso)
        try:
            if iso not in df.index:
                color = '#dddddd'
            else:
                color = scheme[int(df.ix[iso]['bin'])]
        except TypeError:
            print(iso)
            traceback.print_exc()

        patches = [Polygon(np.array(shape), True)]
        pc = PatchCollection(patches)
        pc.set_facecolor(color)
        ax.add_collection(pc)

    # Cover up Antarctica so legend can be placed over it.
    ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2)

    # Draw color legend.
    ax_legend = fig.add_axes([0.35, 0.14, 0.3, 0.03], zorder=3)
    cmap = mpl.colors.ListedColormap(scheme)
    cb = mpl.colorbar.ColorbarBase(ax_legend, cmap=cmap, ticks=bins, boundaries=bins, orientation='horizontal')
    cb.ax.set_xticklabels([str(round(i, 2)) for i in bins],rotation='80')

    # Set the map footer.
    #plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction')

    plt.savefig(imgfile, bbox_inches='tight', pad_inches=.2)
Example #14
0
def plotChoropleth(filename, imgfile, figNum):
    shapefile = 'data/ne/ne_10m_admin_0_countries'
    cols = ['CC', 'DISCON']
    num_colors = 20
    gc = GeonamesCache()
    iso_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso').keys())
    df = pd.read_csv(filename, skiprows=0, usecols=cols)
    df.set_index('CC', inplace=True)
    df = df.ix[iso_codes].dropna(
    )  # Filter out non-countries and missing values.
    values = df['DISCON']
    cm = plt.get_cmap('Reds')
    scheme = [cm(float(i) / num_colors) for i in range(num_colors)]
    #bins = np.linspace(values.min(), values.max(), num_colors)
    bins = np.linspace(0, 1, num_colors)
    df['bin'] = np.digitize(values, bins) - 1
    df.sort_values('bin', ascending=False)  #.head(10)

    #print(df)

    mpl.style.use('seaborn-pastel')
    print('Plotting Figure {0}: {1}'.format(figNum, imgfile))
    fig = plt.figure(figNum, figsize=(22, 12))

    ax = fig.add_subplot(111, axisbg='w', frame_on=False)
    #plt.title('Disco Choropleth', fontsize=20)#, y=.95)

    m = Basemap(lon_0=0, projection='robin')
    m.drawmapboundary(color='w')

    m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2)
    for info, shape in zip(m.units_info, m.units):
        #iso = info['ADM0_A3']
        iso = info['ISO_A2']
        #print(iso)
        try:
            if iso not in df.index:
                color = '#dddddd'
            else:
                color = scheme[int(df.ix[iso]['bin'])]
        except TypeError:
            print(iso)
            traceback.print_exc()

        patches = [Polygon(np.array(shape), True)]
        pc = PatchCollection(patches)
        pc.set_facecolor(color)
        ax.add_collection(pc)

    # Cover up Antarctica so legend can be placed over it.
    ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2)

    # Draw color legend.
    ax_legend = fig.add_axes([0.35, 0.14, 0.3, 0.03], zorder=3)
    cmap = mpl.colors.ListedColormap(scheme)
    cb = mpl.colorbar.ColorbarBase(ax_legend,
                                   cmap=cmap,
                                   ticks=bins,
                                   boundaries=bins,
                                   orientation='horizontal')
    cb.ax.set_xticklabels([str(round(i, 2)) for i in bins], rotation='80')

    # Set the map footer.
    #plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction')

    plt.savefig(imgfile, bbox_inches='tight', pad_inches=.2)
 def setUp(self):
     self.geonamescache = GeonamesCache()
def geo_match2(location_names):
    """
    This function match US city names with corresponding coordinates
    :param location_names: str, content of input file
    :return: str
    """
    def data_preprocess(data):
        """
        this function preprocess the input data, change the format and datatype of timestamp, and add multiple
        location names into a single list
        :param data: json dict
        :return: json dict
        """
        data_formal = {}
        for i in data:
            time = datetime.datetime.strptime(i[:19], "%Y-%m-%dT%H:%M:%S")
            # the pattern of timestamp could vary for different data source
            data_formal[str(time)] = [j[0] for j in data[i][0]]
        return data_formal

    location_names = data_preprocess(location_names)
    output = {}
    # load the coordinates items into a dictionary called new_data
    bad_items = []
    gc = GeonamesCache()
    state_dic_abbr = gc.get_us_states()
    new_data = {'DC': [38.895, -77.0366667], 'St. Paul': [44.9537, -93.0900], 'Temcula': [33.4936, -117.1484]}  # You can manually input some corner cases
    with open("city_loca.json", 'r') as f2:  # load coordinates from data source
        for line in f2:
            datum = json.loads(line)
            if datum['CityNameAccented'] not in new_data:
                new_data[datum['CityNameAccented']] = [datum['Latitude'], datum['Longitude']]
    # traverse through the extracted location names
    for i in location_names:
        s = len(output)
        for name in location_names[i]:
            if name:
                new = name.split(' ')  # split by space
            else:
                new = []
            name = ''
            for j in range(len(new)):
                if new[j] and new[j] != ' ':
                    new[j] = new[j][0].upper() + new[j][1:]  # capitalize the word
                name += new[j]
                if j != len(new) - 1:
                    name += ' '

            if name in new_data:  # deal common cases
                output[i] = new_data[name]
                break

            if name.split(' ')[-1] in state_list:  # deal with situation like "New York NY"
                separator = ' '
                name_city = separator.join(name.split(' ')[:-1])
                if name_city in new_data:
                    output[i] = new_data[name_city]
                    break

            if name.split(' ')[0] in directions:  # deal with situation like "South west NY"
                separator = ' '
                name_city = separator.join(name.split(' ')[1:])
                if name_city in new_data:
                    output[i] = new_data[name_city]
                    break
                else:
                    continue

        if i not in output:  # if the above method failed to match coordinates
            full_state_name = ''
            for name in location_names[i]:
                if name:
                    new = name.split(' ')
                else:
                    new = []
                name = ''
                for j in range(len(new)):
                    if new[j] and new[j] != ' ':
                        new[j] = new[j][0].upper() + new[j][1:]
                    name += new[j]
                    if j != len(new) - 1:
                        name += ' '
                if name in state_dic_abbr:
                    full_state_name = state_dic_abbr[name]['name']  # use the state name instead
                else:
                    if name in capital_dic:
                        full_state_name = name
                if full_state_name:
                    tmp0 = capital_dic[full_state_name]  # use capital city to match coordinates
                    try:
                        output[i] = new_data[tmp0]
                        break
                    except:
                        continue
        e = len(output)
        if s == e:
            bad_items.append((i, location_names[i]))  # record the location names that can't be converted
    print(bad_items)
    with open('coordinates_IstheServicedown_' + 'Verizon' + '.json', 'w') as outfile:
        json.dump(output, outfile)
    return 'done'
Example #17
0
"""
Introduction to Web Science
Assignment 5
Question 3
Team : golf

Script used to extract data from the article-per-line file and process it
to finaly write it in a csv file
"""
import pandas as pd
from geonamescache import GeonamesCache
from geonamescache.mappers import country

gc = GeonamesCache()  # we use the GeonamesCache to get the name of countries

# creating a mapper between the iso3 code and the country name
mapper = country(from_key='name', to_key='iso3')
countries = list(gc.get_dataset_by_key(
    gc.get_countries(),
    'name',
).keys())
# for the US we are going to use the states
states = list(gc.get_us_states_by_names())
#print(countries)
# any of these key words could indicate that we are reading about a star
key_words = ['movie', 'film', 'TV', 'television', 'actor', 'actress']
articles = []
dataset = {}

with open('article-per-line.txt', 'r', encoding="utf8") as f:
    articles = f.read().splitlines()
# -*- coding: utf-8 -*-
# This is by far the ugliest Python script I ever wrote
# withoud cities 14954 location string remain unresovled
# with unique city names 6345 (number is not up to date) location string remain unresovled
# with largest city name 4623 location string remain unresovled
# with manually resolved locations 3333 location string remain unresovled

import csv, json, re
from geonamescache import GeonamesCache
from loclists import check_unresolved

unresolved_locations = []
commits_by_countries = {}
countries_by_locstr = {}
gc = GeonamesCache()
countries = gc.get_countries()
countries_by_names = gc.get_countries_by_names()
us_states = gc.get_us_states()
us_states_by_names = gc.get_us_states_by_names()

re_ignore = re.compile(r'[\.\(\)\d-]')
re_ws = re.compile(r'\s{2,}')


def test_locs(locs):
    for loc in locs:
        loc = loc.strip().lower()
        loctitle = loc.title()
        locupper = loc.upper()
        if loc in countries_by_names:
            return loc
# Builtin Python modules
import csv
from pathlib import Path
import re
import string
from typing import Dict, List, Set
import unicodedata

from geonamescache import GeonamesCache
from more_itertools import partitions
import pandas as pd

from helper import Cities, Countries, City, Country

# Cities
helper_cities = Cities("./data/cities.json", GeonamesCache())
group_cities_by_word_count: Dict[int, List[str]] = helper_cities.load_json()
cities: Dict[str, City] = helper_cities.get_cities()

# Countries
helper_countries = Countries("./data/countries.json", GeonamesCache())
group_countries_by_word_count: Dict[int,
                                    List[str]] = helper_countries.load_json()
countries: Dict[str, Country] = helper_countries.get_countries()

text_file: Path = Path('./data/headlines.txt')
assert text_file.is_file(), f"Wrong file: {text_file}"

unique_cities: Set[str] = set()
map_country_code_to_city: Dict[str, List[str]] = {}
Example #20
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import subprocess
from geonamescache import GeonamesCache

gc = GeonamesCache()

for iso2, country in gc.get_countries().items():
    iso3 = country['iso3']

    geojson = 'ogr2ogr -f GeoJSON -where "ADM0_A3 IN (\'{0}\')" units.json ../shp/ne_10m_admin_1_states_provinces_lakes.shp'
    subprocess.call(geojson.format(iso3), shell=True)

    topojson = '../node_modules/topojson/bin/topojson --simplify-proportion .08 --id-property fips -p name=name -o {0}.json units.json'
    subprocess.call(topojson.format(iso3), shell=True)
    os.unlink('units.json')

subprocess.call('mv *.json ../src/topojson/countries/', shell=True)
Example #21
0
filename = 'csv/ag.lnd.frst.zs_Indicator_en_csv_v2/ag.lnd.frst.zs_Indicator_en_csv_v2.csv'
shapefile = 'shp/countries/ne_10m_admin_0_countries_lakes'
num_colors = 9
year = '2012'
cols = ['Country Name', 'Country Code', year]
title = 'Forest area as percentage of land area in {}'.format(year)
imgfile = 'img/{}.png'.format(slug(title))

description = '''
Forest area is land under natural or planted stands of trees of at least 5 meters in situ, whether 
productive or not, and excludes tree stands in agricultural production systems (for example, in 
fruit plantationsand agroforestry systems) and trees in urban parks and gardens. Countries without 
data are shown in grey. Data: World Bank - worldbank.org • Author: Ramiro Gómez - ramiro.org'''.strip(
)

gc = GeonamesCache()
iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys())

df = pd.read_csv(filename, skiprows=4, usecols=cols)
df.set_index('Country Code', inplace=True)
df = df.ix[iso3_codes].dropna()  # Filter out non-countries and missing values.

values = df[year]
cm = plt.get_cmap('Greens')
scheme = [cm(i / num_colors) for i in range(num_colors)]
bins = np.linspace(values.min(), values.max(), num_colors)
df['bin'] = np.digitize(values, bins) - 1
df.sort_values('bin', ascending=False).head(10)

mpl.style.use('map')
fig = plt.figure(figsize=(22, 12))
Example #22
0
def main():

    fy.setup_fiscal_calendar(start_month=9)
    now = fy.FiscalDateTime.now()
    start_date = now.prev_quarter.start.strftime('%Y-%m-%d')
    end_date = now.prev_quarter.end.strftime('%Y-%m-%d')
    print(start_date)
    print(end_date)

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="Retrieve Google Analytics data.")
    parser.add_argument("-d", "--debug",
        help="Enable debugging messages", action="store_true")
    parser.add_argument("output_file", metavar="OUTPUT_FILE",
        nargs="?",
        default="sessions.csv",
        help="Output CSV file")
    parser.add_argument("-s", "--start-date",
        default=start_date,
        help="Start date")
    parser.add_argument("-e", "--end-date",
        default=end_date,
        help="End date")
    args = parser.parse_args()

    if args.start_date != start_date:
        args.start_date = parse_date(args.start_date)
        print(args.start_date)

    if args.end_date != end_date:
        args.end_date = parse_date(args.end_date)
        print(args.end_date)

    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_colwidth', None)
    # pd.set_option('display.float_format', '{:,.0f}'.format)

    gc = GeonamesCache()
    global countries
    countries = gc.get_countries()
    countries['ZZ'] = {'iso3': 'ZZZ'}

    mapper = country(from_key='iso', to_key='iso3')

    scope = ['https://www.googleapis.com/auth/analytics.readonly']

    # Authenticate and construct service.
    service = get_service('analytics', 'v3', scope, 'client_secrets.json')

    profile_ids = get_profile_ids(service)
    pprint.pprint(profile_ids)

    total = pd.DataFrame()

    for profile_id in profile_ids:
        results = get_results(service, profile_id,
            args.start_date, args.end_date)
        df = create_dataframe(results)
        with pd.option_context('display.max_rows', None,
                'display.max_columns', None):
            print(df)
        total = total.add(df, fill_value=0)

    total.index = [conv_iso_2_to_3(i) for i in total.index]
    total.index.name = 'iso3'
    total.columns = [re.sub(r'^ga:', '', col) for col in total.columns]
    set_int(total)

    total.to_csv(args.output_file)
Example #23
0
def world(data,
          area_col,
          value_col,
          area_to_code=False,
          bin_mode='linear',
          log=None,
          title='',
          value_col_to_title=True,
          descripton='',
          num_colors=9,
          palette='Reds',
          filter_outlier=False):
    '''WORLD MAP PLOT

    Takes in data where one column is the values and another is either
    country or 3-alphabet country code according to ISO standard.

    USE
    ===

    world(data=emission,
          area_col='area',
          value_col=1999,
          area_to_code=True,
          title='Emission Intensity of Food Production (co2/kg)',
          value_col_to_title=False,
          palette='Reds',
          num_colors=9)


    area_col :: the column where is either country name or country code
    value_col :: the column with the values

    area_to_code :: must be True if area is not 3-alphabet code
    num_colors :: the number of colors to be used to describe intensity

    value_to_col_title :: Useful when the column is a year and you want to
                          have it shown in the title.

    log :: if true will use log values instead. Only works when bin_mode is
           linear.

    '''

    from geonamescache import GeonamesCache
    from ..utils.country_code import country_to_code

    data = data.copy(deep=True)

    if filter_outlier == True:
        data = outliers(data, value_col)

    if value_col_to_title == True:
        title = title + ' {}'.format(value_col)

    descripton.strip()

    if area_to_code == True:
        data[area_col] = data[area_col].apply(country_to_code)
        data.set_index(area_col, inplace=True)

    if data.index.name != area_col:
        data.set_index(area_col, inplace=True)

    # filter data based on geo codes
    gc = GeonamesCache()
    iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys())
    data = data.loc[iso3_codes]
    data = data[~data[value_col].isna()]
    data[value_col] = data[value_col].astype(int)

    # set plot stuff
    values = data[value_col].dropna()

    if log == True:
        values = np.log1p(values)
        data[value_col] = np.log(data[value_col])

    if bin_mode == 'linear':
        bins = np.linspace(values.min(), values.max(), num_colors)
    elif bin_mode == 'quantile':
        bins = np.nanpercentile(values, np.arange(0, 100, num_colors))
    cm = plt.get_cmap(palette)
    scheme = [cm(i / num_colors) for i in range(num_colors)]

    # create the bin column
    data['temp'] = pd.cut(data[value_col], bins)
    cat_columns = data.select_dtypes(['category']).columns
    data['bin'] = data[cat_columns].apply(lambda x: x.cat.codes)
    data.drop('temp', axis=1, inplace=True)

    p = plt.figure(figsize=(17, 12))
    p.patch.set_facecolor('white')

    ax = p.add_subplot(111, frame_on=False)
    p.suptitle(title, color='grey', weight='bold', fontsize=26, y=.85)

    try:
        from mpl_toolkits.basemap import Basemap
        m = Basemap(lon_0=0, projection='robin')
    except ImportError:
        raise MissingImport(
            "Install Basemap >> pip install git+https://github.com/matplotlib/basemap.git"
        )

    m.drawmapboundary(color='w')

    m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2)
    for info, shape in zip(m.units_info, m.units):
        iso3 = info['ADM0_A3']
        if iso3 not in data.index:
            color = '#dddddd'
        else:
            color = scheme[data.loc[iso3]['bin'].astype(int)]

        patches = [Polygon(np.array(shape), True)]
        pc = PatchCollection(patches)
        pc.set_facecolor(color)
        ax.add_collection(pc)

    # Cover up Antarctica so legend can be placed over it.
    ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2)

    # Draw color legend.
    ax_legend = p.add_axes([0.35, 0.24, 0.3, 0.03], zorder=3)
    cmap = mpl.colors.ListedColormap(scheme)
    cb = mpl.colorbar.ColorbarBase(ax_legend,
                                   cmap=cmap,
                                   ticks=bins,
                                   boundaries=bins,
                                   orientation='horizontal')

    cb.ax.set_xticklabels([str(round(i, 1)) for i in bins],
                          rotation=45,
                          ha='right')

    plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction')

    _thousand_sep(p, ax)
Example #24
0
def plot_us_chloropleth(datafile,
                        dest,
                        colorscale,
                        bins,
                        nodatacolor='#dddddd',
                        scale=1,
                        resolution='l',
                        usecol='Magnitude',
                        inputkwargs={}):
    """Format: CSV with 'Geography', 'Geoid', and 'Magnitude' columns."""

    shapefile = 'cb_2017_us_county_500k/cb_2017_us_county_500k'
    num_colors = len(bins) - 1

    gc = GeonamesCache()
    # iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys())

    df = pd.read_csv(datafile, **inputkwargs)
    geoid_lookup = lookup.geoids
    if 'Geoid' not in df:
        geoids = np.empty(df.shape[0], dtype=np.uint32)
        for index, row in df.iterrows():
            if "Geography" in row:
                geography = row['Geography']
            else:
                state = row['State']
                county = row['County']
                geography = '%s, %s' % (county, state)
            state, county, suffix = parse_geography(geography)
            if suffix and (state, '%s%s' % (county, suffix)) in geoid_lookup:
                geoids[index] = geoid_lookup[(state,
                                              '%s%s' % (county, suffix))]
            else:
                geoids[index] = geoid_lookup[(state, county)]
        df.insert(0, 'Geoid', geoids)
    df.set_index('Geoid', inplace=True)
    # df = df.loc[iso3_codes].dropna() # Filter out non-countries and missing values.

    values = df[usecol]
    # https://matplotlib.org/api/pyplot_summary.html#matplotlib.pyplot.colormaps
    cm = plt.get_cmap(colorscale)
    scheme = [cm(i / num_colors) for i in range(num_colors)]
    df['bin'] = np.digitize(values, bins) - 1
    df.sort_values('bin', ascending=False).head(10)

    # This doesn't work, is it important?
    # mpl.style.use('map')
    fig = plt.figure(figsize=(default_size * scale, default_size * scale))
    grid = gs.GridSpec(nrows=10, ncols=10)

    for lon_0, lat_0, gridpos, llcrnrlon, llcrnrlat, urcrnrlon, urcrnrlat in \
        [(-98.5795, 39.828, grid[:-2, :], -121, 22, -64, 47), # Contiguous US
         (-160, 63.5, grid[-4:, :6], -185.3, 49, -116, 65.5), # Alaska
         (-158, 21, grid[-3:, 6:], -161, 18, -154, 23)]: # Hawaii

        m = Basemap(lon_0=lon_0,
                    lat_0=lat_0,
                    projection='ortho',
                    resolution=resolution)
        ax = fig.add_subplot(gridpos, facecolor='#00000000', frame_on=False)

        m.readshapefile(shapefile,
                        'units',
                        color='#444444',
                        linewidth=default_border_linewidth * scale)
        for info, shape in zip(m.units_info, m.units):
            geoid = int(info['GEOID'])
            if geoid in equivalencies and geoid not in df.index:
                geoid = equivalencies[geoid]

            if geoid not in df.index:
                color = nodatacolor
            else:
                color = scheme[df.loc[geoid]['bin']]

            patches = [Polygon(np.array(shape), True)]
            pc = PatchCollection(patches)
            pc.set_facecolor(color)
            ax.add_collection(pc)

        xmin, ymin = m(llcrnrlon, llcrnrlat)
        xmax, ymax = m(urcrnrlon, urcrnrlat)

        ax.set_xlim(xmin, xmax)
        ax.set_ylim(ymin, ymax)

    plt.savefig(dest, bbox_inches='tight')
Example #25
0
    def __init__(self,
                 parent=None,
                 width=4,
                 height=3,
                 view_option=ViewOption.QUERY):
        # Figure表示一个图,axes相当于子图,一个fig可以有多个axes,在这里只有一个
        self.fig = Figure(figsize=(width, height), dpi=100)
        FigureCanvas.__init__(self, self.fig)
        self.setParent(parent)
        self.axes = self.fig.add_subplot(111)  # 子图
        self.point = None  # 把查询到的点集初始化为空,方便后续处理

        # top-k相关的类
        self.top_k = TopK()

        # 以下三个列表只在查询视图中有效,为了实现查询的子集索引到csv文件索引的映射
        self.index_list = []  # 储存经纬度所对应的csv文件中的索引
        self.lon_list = []
        self.lat_list = []

        # 距离查询相关的类
        self.radius = Radius()

        # 视图选项
        self.view_option = view_option

        # 查询视图的map
        if self.view_option == ViewOption.QUERY:
            # 地图
            self.m = Basemap(ax=self.axes,
                             projection='mill',
                             area_thresh=10000,
                             llcrnrlat=-65,
                             llcrnrlon=-180,
                             urcrnrlat=80,
                             urcrnrlon=180,
                             resolution='c')
            self.m.fillcontinents(color='#DEDEDE',
                                  lake_color='#DEDEDE',
                                  zorder=0.1)
            self.m.drawcoastlines(linewidth=0.2, color='k')
            self.m.drawcountries(linewidth=0.5, color='k')
            self.m.drawmapboundary(fill_color='#A0CFDF')

            # 选中某个点后显示的信息提示框
            self.annot = self.axes.annotate("",
                                            xy=(0, 0),
                                            xytext=(-50, 20),
                                            textcoords="offset points",
                                            bbox=dict(boxstyle="round",
                                                      fc="w"),
                                            arrowprops=dict(arrowstyle="->"))
            self.annot.set_visible(False)

            # 更新信息提示框
            def update_annot(ind):
                index = self.index_list[ind["ind"][0]]  # 这里返回的是csv中对应经纬度的索引
                pos = self.point.get_offsets()[ind["ind"][0]]
                self.annot.xy = pos
                text = marker_label[index]
                # 设置提示文本
                self.annot.set_text(text)
                self.annot.get_bbox_patch().set_alpha(0.8)

            # 鼠标移动到某个点的响应函数
            def hover(event):
                if event.inaxes == self.axes and self.point is not None:
                    cont, ind = self.point.contains(event)
                    if cont:
                        update_annot(ind)
                        self.annot.set_visible(True)
                        self.fig.canvas.draw_idle()
                    else:
                        vis = self.annot.get_visible()
                        if vis:
                            self.annot.set_visible(False)
                            self.fig.canvas.draw_idle()

            # 绑定响应函数
            self.fig.canvas.mpl_connect("motion_notify_event", hover)

            self.axes.set_title('店铺查询')
            self.axes.title.set_y(1.05)

        # 时区视图的map
        if self.view_option == ViewOption.TIMEZONE:
            self.m = Basemap(ax=self.axes,
                             projection='mill',
                             area_thresh=10000,
                             llcrnrlat=-65,
                             llcrnrlon=-180,
                             urcrnrlat=80,
                             urcrnrlon=180,
                             resolution='c')
            self.m.fillcontinents(color='#DEDEDE',
                                  lake_color='#DEDEDE',
                                  zorder=0.1)
            self.m.drawcoastlines(linewidth=0.2, color='k')
            self.m.drawcountries(linewidth=0.5, color='k')
            self.m.drawmapboundary(fill_color='#A0CFDF')

            zone_dict = {}
            label = list()

            xpt, ypt = self.m(lon, lat)  # 把经纬度转换为x, y坐标,因为图像输出需要用到坐标

            for item in timezone:
                tz = item.split()[0]
                if not zone_dict.__contains__(tz):
                    zone_dict[tz] = 1
                else:
                    zone_dict[tz] += 1

            for item in timezone:
                tz = item.split()[0]
                count = zone_dict[tz]
                # label对应colormap的索引
                if count < 1500:
                    label.append(0)
                elif count < 3000:
                    label.append(1)
                elif count < 4500:
                    label.append(2)
                else:
                    label.append(3)

            def colormap():
                return mpl.colors.LinearSegmentedColormap.from_list(
                    'cmap', ['#FB7C5C', '#F6563E', '#E42F28', '#C3161A'], 256)

            self.point = self.m.scatter(xpt,
                                        ypt,
                                        marker='o',
                                        s=3,
                                        c=label,
                                        cmap=colormap(),
                                        zorder=1)
            self.point.set_visible(True)

            self.annot = self.axes.annotate("",
                                            xy=(0, 0),
                                            xytext=(-50, 20),
                                            textcoords="offset points",
                                            bbox=dict(boxstyle="round",
                                                      fc="w"),
                                            arrowprops=dict(arrowstyle="->"))
            self.annot.set_visible(False)

            def update_annot(ind):
                index = ind['ind'][0]
                pos = self.point.get_offsets()[ind["ind"][0]]
                self.annot.xy = pos
                text = marker_label[index]
                self.annot.set_text(text)
                self.annot.get_bbox_patch().set_alpha(0.8)

            def hover(event):
                vis = self.annot.get_visible()
                if event.inaxes == self.axes:
                    cont, ind = self.point.contains(event)
                    if cont:
                        update_annot(ind)
                        self.annot.set_visible(True)
                        self.fig.canvas.draw_idle()
                    else:
                        if vis:
                            self.annot.set_visible(False)
                            self.fig.canvas.draw_idle()

            self.fig.canvas.mpl_connect("motion_notify_event", hover)

            bounds = [0, 1500, 3000, 4500, 6000]
            norm = mpl.colors.BoundaryNorm(bounds, colormap().N)

            ax_cbar = self.fig.add_axes([0.3, 0.17, 0.4, 0.02])
            cbar = mpl.colorbar.ColorbarBase(ax_cbar,
                                             cmap=colormap(),
                                             norm=norm,
                                             spacing='uniform',
                                             ticks=bounds,
                                             boundaries=bounds,
                                             orientation='horizontal')
            cbar.outline.set_linewidth(0.2)
            cbar.ax.tick_params(labelsize=8, labelcolor='#666666')
            self.axes.set_title('时区分布')
            self.axes.title.set_y(1.05)

        # 密度视图的map
        if self.view_option == ViewOption.DESTINY:
            gnc = GeonamesCache()
            countries = gnc.get_countries()

            counts = stb_file['Country'].value_counts()
            country_dict = {}
            for k, v in counts.iteritems():
                # print(k, round(v/countries[k]['areakm2']*1000000))
                country_dict[countries[k]['iso3']] = round(
                    v / countries[k]['areakm2'] * 1000000)

            self.m = Basemap(ax=self.axes,
                             projection='mill',
                             llcrnrlat=-65,
                             llcrnrlon=-180,
                             urcrnrlat=80,
                             urcrnrlon=180,
                             resolution='c')
            self.m.fillcontinents(color='#DEDEDE',
                                  lake_color='#DEDEDE',
                                  zorder=0)
            # m.drawcoastlines(linewidth=0.2, color='k')
            self.m.drawcountries(linewidth=0.5, color='k')
            self.m.drawmapboundary(fill_color='#A0CFDF')

            shapefile = 'ne_110m_admin_0_countries/ne_110m_admin_0_countries'

            # setup color bar
            color_num = 5
            cmap = mpl.cm.get_cmap('Reds')
            color_range = [
                cmap(i / (color_num + 1)) for i in range(color_num + 1)
            ]
            bounds = [0, 10, 100, 1000, 10000, 100000]
            norm = mpl.colors.BoundaryNorm(bounds, cmap.N)

            def colormap():
                return mpl.colors.LinearSegmentedColormap.from_list(
                    'cmap',
                    ['#FBA083', '#FB7C5C', '#F6563E', '#E42F28', '#C3161A'],
                    256)

            # Read shapefile
            self.m.readshapefile(shapefile,
                                 'units',
                                 color='#DDDDDD',
                                 linewidth=0.1)

            # Add patches
            for info, shape in zip(self.m.units_info, self.m.units):
                code = info['ADM0_A3']
                if not country_dict.__contains__(code):
                    color = '#DDDDDD'
                else:
                    if code == 'CHN' or code == 'TWN':
                        color = '#F6563E'
                    elif country_dict[code] < 10:
                        color = '#FBA083'
                    elif country_dict[code] < 100:
                        color = '#FB7C5C'
                    elif country_dict[code] < 1000:
                        color = '#F6563E'
                    elif country_dict[code] < 10000:
                        color = '#E42F28'
                    else:
                        color = '#C3161A'
                patches = [Polygon(np.array(shape), True)]
                pc = PatchCollection(patches,
                                     facecolor=color,
                                     edgecolor='None',
                                     linewidth=0)
                self.axes.add_collection(pc)

            ax_cbar = self.fig.add_axes([0.3, 0.15, 0.4, 0.02])
            cbar = mpl.colorbar.ColorbarBase(ax_cbar,
                                             cmap=colormap(),
                                             norm=norm,
                                             spacing='uniform',
                                             ticks=bounds,
                                             boundaries=bounds,
                                             orientation='horizontal')
            cbar.outline.set_linewidth(0.2)
            cbar.ax.tick_params(labelsize=8, labelcolor='#666666')

            self.axes.set_title('密度分布')
            self.axes.title.set_y(1.05)
Example #26
0
def plot_us_state_chloropleth(datafile,
                              dest,
                              colorscale,
                              bins,
                              nodatacolor='#dddddd',
                              scale=1,
                              resolution='l',
                              usecol='Magnitude',
                              inputkwargs={}):
    """Format: CSV with 'Geography', 'AFFGEOID', and 'Magnitude' columns."""

    shapefile = 'cb_2017_us_state_500k/cb_2017_us_state_500k'
    num_colors = len(bins) - 1

    gc = GeonamesCache()

    df = pd.read_csv(datafile, **inputkwargs)
    df.set_index('AFFGEOID', inplace=True)

    values = df[usecol]
    # https://matplotlib.org/api/pyplot_summary.html#matplotlib.pyplot.colormaps
    cm = plt.get_cmap(colorscale)
    scheme = [cm(i / num_colors) for i in range(num_colors)]
    df['bin'] = np.digitize(values, bins) - 1
    df.sort_values('bin', ascending=False).head(10)

    fig = plt.figure(figsize=(default_size * scale, default_size * scale))
    grid = gs.GridSpec(nrows=10, ncols=10)

    for lon_0, lat_0, gridpos, llcrnrlon, llcrnrlat, urcrnrlon, urcrnrlat in \
        [(-98.5795, 39.828, grid[:-2, :], -121, 22, -64, 47), # Contiguous US
         (-160, 63.5, grid[-4:, :6], -185.3, 49, -116, 65.5), # Alaska
         (-158, 21, grid[-3:, 6:], -161, 18, -154, 23)]: # Hawaii

        m = Basemap(lon_0=lon_0,
                    lat_0=lat_0,
                    projection='ortho',
                    resolution=resolution)
        ax = fig.add_subplot(gridpos, facecolor='#00000000', frame_on=False)

        m.readshapefile(shapefile,
                        'units',
                        color='#444444',
                        linewidth=default_border_linewidth * scale)
        for info, shape in zip(m.units_info, m.units):
            geoid = info['AFFGEOID']
            if geoid in equivalencies and geoid not in df.index:
                geoid = equivalencies[geoid]

            if geoid not in df.index:
                color = nodatacolor
            else:
                color = scheme[df.loc[geoid]['bin']]

            patches = [Polygon(np.array(shape), True)]
            pc = PatchCollection(patches)
            pc.set_facecolor(color)
            ax.add_collection(pc)

        xmin, ymin = m(llcrnrlon, llcrnrlat)
        xmax, ymax = m(urcrnrlon, urcrnrlat)

        ax.set_xlim(xmin, xmax)
        ax.set_ylim(ymin, ymax)

    plt.savefig(dest, bbox_inches='tight')