def create_data_frame():
    ##   ONE OF THE FOLLOWING 'wbGDP' LINES SHOULD BE COMMENTED OUT:
    ##   GDP (current US$)
    wbGDP = wb.download(indicator='NY.GDP.MKTP.CD',
                        country='all',
                        start=1990,
                        end=2013)
    ##   GDP per capita (current US$)
    #    wbGDP = wb.download(indicator='NY.GDP.PCAP.CD', country='all', start = 1990, end = 2013)

    #   ENERGY INDICATORS
    wbALT = wb.download(indicator='EG.USE.COMM.CL.ZS',
                        country='all',
                        start=1990,
                        end=2013)
    wbCOM = wb.download(indicator='EG.USE.CRNW.ZS',
                        country='all',
                        start=1990,
                        end=2013)
    wbFOS = wb.download(indicator='EG.USE.COMM.FO.ZS',
                        country='all',
                        start=1990,
                        end=2013)

    #   Combines the datasets into a single data frame
    df = wbGDP
    df = df.join(wbALT)
    df = df.join(wbCOM)
    df = df.join(wbFOS)
    df.columns = ['GDP', 'ALT', 'COM',
                  'FOS']  # replace columns names in dataframe
    df.reset_index(level=0, inplace=True)
    df.reset_index(level=1, inplace=True)
    return df
def GetData():

    
    #Children out of school, primary, male
    indm = "SE.PRM.UNER.MA"
    m = wb.download(indicator=indm, country="all", start = 2000, end =2015)
    m = m[544:]
    
    #Children out of school, primary, female
    indf = "SE.PRM.UNER.FE"
    f = wb.download(indicator=indf, country="all", start = 2000, end =2015)
    f = f[544:]
    
      
    f.reset_index(inplace=True) 
    m.reset_index(inplace=True) 
    
    fAvg = f.groupby('country').agg({'SE.PRM.UNER.FE': np.mean})
    mAvg = m.groupby('country').agg({'SE.PRM.UNER.MA': np.mean})
    
    fAvg = fAvg.fillna(0)
    mAvg = mAvg.fillna(0)
    
    fAvg.reset_index(inplace=True) 
    mAvg.reset_index(inplace=True)
    
    return f, m, fAvg, mAvg
예제 #3
0
    def test_wdi_download_w_retired_indicator(self):

        cntry_codes = ['CA', 'MX', 'US']
        # Despite showing up in the search feature, and being listed online,
        # the api calls to GDPPCKD don't work in their own query builder, nor
        # pandas module.  GDPPCKD used to be a common symbol.
        # This test is written to ensure that error messages to pandas users
        # continue to make sense, rather than a user getting some missing
        # key error, cause their JSON message format changed.  If
        # World bank ever finishes the deprecation of this symbol,
        # this nose test should still pass.

        inds = ['GDPPCKD']

        try:
            result = download(country=cntry_codes,
                              indicator=inds,
                              start=2003,
                              end=2004,
                              errors='ignore')
        # If for some reason result actually ever has data, it's cause WB
        # fixed the issue with this ticker.  Find another bad one.
        except ValueError as e:
            raise nose.SkipTest("No indicators returned data: {0}".format(e))

        # if it ever gets here, it means WB unretired the indicator.
        # even if they dropped it completely, it would still get caught above
        # or the WB API changed somehow in a really unexpected way.
        if len(result) > 0:
            raise nose.SkipTest("Invalid results")
예제 #4
0
def get_wb_df(wb_name,colname):
    """gets a dataframe from wb data with all years and all countries, and a lot of nans"""    
    #return all values
    wb_raw  =(wb.download(indicator=wb_name,start=start_year,end=today_year,country="all"))
    #sensible name for the column
    # wb_raw.rename(columns={wb_raw.columns[0]: colname},inplace=True)
    return wb_raw.rename(columns={wb_raw.columns[0]: colname})
예제 #5
0
def test_wdi_download():
    raise nose.SkipTest
    expected = {
        'GDPPCKN': {
            ('United States', '2003'): '40800.0735367688',
            ('Canada', '2004'): '37857.1261134552',
            ('United States', '2005'): '42714.8594790102',
            ('Canada', '2003'): '37081.4575704003',
            ('United States', '2004'): '41826.1728310667',
            ('Mexico', '2003'): '72720.0691255285',
            ('Mexico', '2004'): '74751.6003347038',
            ('Mexico', '2005'): '76200.2154469437',
            ('Canada', '2005'): '38617.4563629611'
        },
        'GDPPCKD': {
            ('United States', '2003'): '40800.0735367688',
            ('Canada', '2004'): '34397.055116118',
            ('United States', '2005'): '42714.8594790102',
            ('Canada', '2003'): '33692.2812368928',
            ('United States', '2004'): '41826.1728310667',
            ('Mexico', '2003'): '7608.43848670658',
            ('Mexico', '2004'): '7820.99026814334',
            ('Mexico', '2005'): '7972.55364129367',
            ('Canada', '2005'): '35087.8925933298'
        }
    }
    expected = pandas.DataFrame(expected)
    result = download(country=['CA', 'MX', 'US', 'junk'],
                      indicator=['GDPPCKD', 'GDPPCKN', 'junk'],
                      start=2003,
                      end=2005)
    expected.index = result.index
    assert_frame_equal(result, pandas.DataFrame(expected))
예제 #6
0
파일: test_wb.py 프로젝트: AkiraKane/pandas
    def test_wdi_download(self):

        # Test a bad indicator with double (US), triple (USA),
        # standard (CA, MX), non standard (KSV),
        # duplicated (US, US, USA), and unknown (BLA) country codes

        # ...but NOT a crash inducing country code (World bank strips pandas
        #    users of the luxury of laziness, because they create their
        #    own exceptions, and don't clean up legacy country codes.
        # ...but NOT a retired indicator (User should want it to error.)

        cntry_codes = ['CA', 'MX', 'USA', 'US', 'US', 'KSV', 'BLA']
        inds = ['NY.GDP.PCAP.CD','BAD.INDICATOR']

        expected = {'NY.GDP.PCAP.CD': {('Canada', '2003'): 28026.006013044702, ('Mexico', '2003'): 6601.0420648056606, ('Canada', '2004'): 31829.522562759001, ('Kosovo', '2003'): 1969.56271307405, ('Mexico', '2004'): 7042.0247834044303, ('United States', '2004'): 41928.886136479705, ('United States', '2003'): 39682.472247320402, ('Kosovo', '2004'): 2135.3328465238301}}
        expected = pandas.DataFrame(expected)
        #Round, to ignore revisions to data.
        expected = pandas.np.round(expected,decimals=-3)
        expected.sort(inplace=True)
        result = download(country=cntry_codes, indicator=inds,
                          start=2003, end=2004, errors='ignore')
        result.sort(inplace=True)
        #Round, to ignore revisions to data.
        result = pandas.np.round(result,decimals=-3)
        expected.index = result.index
        assert_frame_equal(result, pandas.DataFrame(expected))
예제 #7
0
파일: test_wb.py 프로젝트: AkiraKane/pandas
    def test_wdi_download_w_retired_indicator(self):

        cntry_codes = ['CA', 'MX', 'US']
        # Despite showing up in the search feature, and being listed online,
        # the api calls to GDPPCKD don't work in their own query builder, nor
        # pandas module.  GDPPCKD used to be a common symbol.
        # This test is written to ensure that error messages to pandas users
        # continue to make sense, rather than a user getting some missing
        # key error, cause their JSON message format changed.  If
        # World bank ever finishes the deprecation of this symbol,
        # this nose test should still pass.

        inds = ['GDPPCKD']

        try:
            result = download(country=cntry_codes, indicator=inds,
                              start=2003, end=2004, errors='ignore')
        # If for some reason result actually ever has data, it's cause WB
        # fixed the issue with this ticker.  Find another bad one.
        except ValueError as e:
            raise nose.SkipTest("No indicators returned data: {0}".format(e))

        # if it ever gets here, it means WB unretired the indicator.
        # even if they dropped it completely, it would still get caught above
        # or the WB API changed somehow in a really unexpected way.
        if len(result) > 0:
            raise nose.SkipTest("Invalid results")
예제 #8
0
def test_wdi_download():
    raise nose.SkipTest("skipping for now")
    expected = {'GDPPCKN': {(u('United States'), u('2003')): u('40800.0735367688'), (u('Canada'), u('2004')): u('37857.1261134552'), (u('United States'), u('2005')): u('42714.8594790102'), (u('Canada'), u('2003')): u('37081.4575704003'), (u('United States'), u('2004')): u('41826.1728310667'), (u('Mexico'), u('2003')): u('72720.0691255285'), (u('Mexico'), u('2004')): u('74751.6003347038'), (u('Mexico'), u('2005')): u('76200.2154469437'), (u('Canada'), u('2005')): u('38617.4563629611')}, 'GDPPCKD': {(u('United States'), u('2003')): u('40800.0735367688'), (u('Canada'), u('2004')): u('34397.055116118'), (u('United States'), u('2005')): u('42714.8594790102'), (u('Canada'), u('2003')): u('33692.2812368928'), (u('United States'), u('2004')): u('41826.1728310667'), (u('Mexico'), u('2003')): u('7608.43848670658'), (u('Mexico'), u('2004')): u('7820.99026814334'), (u('Mexico'), u('2005')): u('7972.55364129367'), (u('Canada'), u('2005')): u('35087.8925933298')}}
    expected = pandas.DataFrame(expected)
    result = download(country=['CA', 'MX', 'US', 'junk'], indicator=['GDPPCKD',
                                                                     'GDPPCKN', 'junk'], start=2003, end=2005)
    expected.index = result.index
    assert_frame_equal(result, pandas.DataFrame(expected))
def get_wb_data(indicator, start_year, end_year):
    #Takes the name of an indicator and the start and end years
    #Returns the result of the pandas dataframe API from the world bank
    #for that API
    
    dataset = wb.download(indicator=indicator, country='all', start=start_year, end=end_year)

    return dataset
예제 #10
0
파일: start.py 프로젝트: jnmclarty/trump
def WorldBankST():
    from pandas.io import wb

    ind = str(kwargs['indicator'])
    cc = str(kwargs['country'])

    del kwargs['indicator']
    del kwargs['country']

    df = wb.download(indicator=ind, country=cc, errors='raise', **kwargs)
    firstlevel = df.index.levels[0][0]
    data = df.ix[firstlevel][ind]

    data = data.sort_index()
    data.index = data.index.astype(int)
예제 #11
0
파일: start.py 프로젝트: azflin/trump
def WorldBankST():
	from pandas.io import wb

	ind = str(kwargs['indicator'])
	cc = str(kwargs['country'])
	
	del kwargs['indicator']
	del kwargs['country']
	
	df = wb.download(indicator=ind, country=cc, errors='raise', **kwargs)
	firstlevel = df.index.levels[0][0]
	data = df.ix[firstlevel][ind]

	data = data.sort_index()
	data.index = data.index.astype(int)
예제 #12
0
파일: test_wb.py 프로젝트: AkiraKane/pandas
    def test_wdi_download_w_crash_inducing_countrycode(self):

        cntry_codes = ['CA', 'MX', 'US', 'XXX']
        inds = ['NY.GDP.PCAP.CD']

        try:
            result = download(country=cntry_codes, indicator=inds,
                              start=2003, end=2004, errors='ignore')
        except ValueError as e:
            raise nose.SkipTest("No indicators returned data: {0}".format(e))

        # if it ever gets here, it means the country code XXX got used by WB
        # or the WB API changed somehow in a really unexpected way.
        if len(result) > 0:
            raise nose.SkipTest("Invalid results")
예제 #13
0
    def test_wdi_download_w_crash_inducing_countrycode(self):

        cntry_codes = ['CA', 'MX', 'US', 'XXX']
        inds = ['NY.GDP.PCAP.CD']

        try:
            result = download(country=cntry_codes,
                              indicator=inds,
                              start=2003,
                              end=2004,
                              errors='ignore')
        except ValueError as e:
            raise nose.SkipTest("No indicators returned data: {0}".format(e))

        # if it ever gets here, it means the country code XXX got used by WB
        # or the WB API changed somehow in a really unexpected way.
        if len(result) > 0:
            raise nose.SkipTest("Invalid results")
예제 #14
0
    def test_wdi_download(self):

        # Test a bad indicator with double (US), triple (USA),
        # standard (CA, MX), non standard (KSV),
        # duplicated (US, US, USA), and unknown (BLA) country codes

        # ...but NOT a crash inducing country code (World bank strips pandas
        #    users of the luxury of laziness, because they create their
        #    own exceptions, and don't clean up legacy country codes.
        # ...but NOT a retired indicator (User should want it to error.)

        cntry_codes = ['CA', 'MX', 'USA', 'US', 'US', 'KSV', 'BLA']
        inds = ['NY.GDP.PCAP.CD', 'BAD.INDICATOR']

        expected = {
            'NY.GDP.PCAP.CD': {
                ('Canada', '2003'): 28026.006013044702,
                ('Mexico', '2003'): 6601.0420648056606,
                ('Canada', '2004'): 31829.522562759001,
                ('Kosovo', '2003'): 1969.56271307405,
                ('Mexico', '2004'): 7042.0247834044303,
                ('United States', '2004'): 41928.886136479705,
                ('United States', '2003'): 39682.472247320402,
                ('Kosovo', '2004'): 2135.3328465238301
            }
        }
        expected = pandas.DataFrame(expected)
        #Round, to ignore revisions to data.
        expected = pandas.np.round(expected, decimals=-3)
        expected.sort(inplace=True)
        result = download(country=cntry_codes,
                          indicator=inds,
                          start=2003,
                          end=2004,
                          errors='ignore')
        result.sort(inplace=True)
        #Round, to ignore revisions to data.
        result = pandas.np.round(result, decimals=-3)
        expected.index = result.index
        assert_frame_equal(result, pandas.DataFrame(expected))
예제 #15
0
* http://matplotlib.org/examples/shapes_and_collections/scatter_demo.html 
"""
# load packages (ignored if redundant) 
# load package under name wb 
from pandas.io import wb
import numpy as np            
import matplotlib.pyplot as plt

# specify dates, variables, and countries 
start = 2011
# GDP per capita, population, life expectancy
variable_list = ['NY.GDP.PCAP.KD', 'SP.POP.TOTL', 'SP.DYN.LE00.IN']
country_list  = ['US', 'FR', 'JP', 'CN', 'IN', 'BR', 'MX'] 

# Python understands we need to go to the second line because ( hasn't been closed by )
data = wb.download(indicator=variable_list, 
                   country=country_list, start=start, end=start).dropna()
# see what we've got
print(data)

# check the column labels, change to something simpler 
print(data.columns)
data.columns = ['gdppc', 'pop', 'le']
print(data)

# scatterplot
# life expectancy v GDP per capita 
# size of circles controlled by population 

plt.scatter(data['gdppc'], data['le'], s=0.000001*data['pop'], alpha=0.5)
plt.ylabel('Life Expectancy')
plt.xlabel('GDP Per Capita')
* http://matplotlib.org/examples/shapes_and_collections/scatter_demo.html
"""
# load packages (ignored if redundant)
# load package under name wb
from pandas.io import wb
import numpy as np
import matplotlib.pyplot as plt

# specify dates, variables, and countries
start = 2011
# GDP per capita, population, life expectancy
variable_list = ['NY.GDP.PCAP.KD', 'SP.POP.TOTL', 'SP.DYN.LE00.IN']
country_list  = ['US', 'FR', 'JP', 'CN', 'IN', 'BR', 'MX']

# Python understands we need to go to the second line because ( hasn't been closed by )
data = wb.download(indicator=variable_list,
                   country=country_list, start=start, end=start).dropna()
# see what we've got
print(data)

# check the column labels, change to something simpler
print(data.columns)
data.columns = ['gdppc', 'pop', 'le']
print(data)

# scatterplot
# life expectancy v GDP per capita
# size of circles controlled by population

plt.scatter(data['gdppc'], data['le'], s=0.000001*data['pop'], alpha=0.5)
plt.ylabel('Life Expectancy')
plt.xlabel('GDP Per Capita')
예제 #17
0
"""
Bonus material:  "styles" set basic layout parameters
We can set them one at a time, but this is easier
plt.style.available gives options:
['ggplot', 'bmh', 'dark_background', 'fivethirtyeight', 'grayscale']
"""
import pandas as pd                # data management tools
from pandas.io import wb           # World Bank api
import matplotlib.pyplot as plt    # plotting tools

# variable list
var = ['NY.GDP.PCAP.PP.KD', 'NY.GDP.MKTP.PP.KD']
# country list (ISO codes)
iso = ['USA', 'FRA', 'JPN', 'CHN', 'IND', 'BRA', 'MEX']
year = 2014
df = wb.download(indicator=var, country=iso, start=year, end=year)

# massage data
df = df.reset_index(level='year', drop=True)
df.columns = ['gdppc', 'gdp']          # rename variables
df['gdp'] = df['gdp']/10**12           # convert to trillions
df['gdppc'] = df['gdppc']/10**3        # convert to thousands
df['order'] = [5, 3, 1, 4, 2, 6, 0]    # reorder countries
df = df.sort(columns='order', ascending=False)

#%%
plt.style.use('fivethirtyeight')

# GDP bar chart
ax = df['gdp'].plot(kind='barh', alpha=0.5)
ax.set_title('GDP', loc='left', fontsize=14)
예제 #18
0
import matplotlib.pylab as plt



"""
1. Read in GDP and expenditure components from World Bank  
"""
country_list  = ['CN']
variable_list = ['NE.CON.GOVT.CN', 'NE.CON.PETC.CN', 'NE.CON.PRVT.CN', 
                 'NE.CON.TETC.CN', 'NE.CON.TOTL.CN', 
                 'NE.DAB.TOTL.CN',                 
                 'NE.EXP.GNFS.CN', 'NE.GDI.FTOT.CN', 'NE.GDI.STKB.CN',
                 'NE.GDI.TOTL.CN', 'NE.IMP.GNFS.CN', 'NE.RSB.GNFS.CN', 
                 'NY.GDP.MKTP.CN', 'NY.GDP.DISC.CN']
df = wb.download(indicator=variable_list, country=country_list, 
                 start=1990, end=2014)

# simplify variable names 
# http://stackoverflow.com/questions/11346283/renaming-columns-in-pandas
nicknames = {'NE.CON.GOVT.CN': 'g', 'NE.CON.PETC.CN': 'c1', 
             'NE.CON.PRVT.CN': 'c2', 'NE.CON.TETC.CN': 'c3', 
             'NE.CON.TOTL.CN': 'c4',
             'NE.DAB.TOTL.CN': 'a', 'NE.EXP.GNFS.CN': 'x',  
             'NE.GDI.FTOT.CN': 'i', 'NE.GDI.STKB.CN': 'v', 
             'NE.GDI.TOTL.CN': 'gcf', 'NE.IMP.GNFS.CN': 'm',  
             'NE.RSB.GNFS.CN': 'nx', 
             'NY.GDP.MKTP.CN': 'y', 'NY.GDP.DISC.CN': 'disc'}
df = df.rename(columns=nicknames)

#%%
"""
예제 #19
0
# > This example is largely based on the "World Bank" section of *pandas 0.13.1 documentation* available [here](http://pandas.pydata.org/pandas-docs/stable/remote_data.html) but was expanded to demonstrate more methods and functions.

# First, we download a GDP per capita series and a fertility rate. The search method shows available series.

# In[154]:

from pandas.io import wb

wb.search('fertility').iloc[:, :2]

# Let's choose two series: one fore GDP per capita and another for Total Fertility Rate. We request all the available countries and some years.

# In[155]:

ind = ['NY.GDP.PCAP.KD', 'SP.DYN.TFRT.IN']
df = wb.download(indicator=ind, country='all', start=1950, end=2014)

# Shorten the column labels. and let's see the dataframe. It has a MultiIndex (or hierarchical index).

# In[156]:

df.columns = ['gdp', 'tfr']
df.head()

# Before we do anything, let's drop any rows that has missing values, and convert both columns to numbers.

# In[157]:

df = df.dropna()
df = df.convert_objects(convert_numeric=True)
df.to_pickle('df.pkl')
예제 #20
0
__author__ = 'fabio.lana'
import pandas as pd
from pandas.io import wb
import pycountry
import numpy as np

indicators = ['NY.GDP.PCAP.KD','SP.POP.TOTL', 'SP.POP.0014.TO.ZS', 'SP.POP.65UP.TO.ZS','AG.LND.AGRI.ZS','AG.YLD.CREL.KG','SP.RUR.TOTL','SH.STA.MALN.ZS'
    ,'GC.BAL.CASH.GD.ZS', 'NE.EXP.GNFS.ZS', 'NE.IMP.GNFS.ZS']
nazione = pycountry.countries.get(alpha3='CMR')
iso2 = nazione.alpha2
dati_nazionali = wb.download(indicator=indicators, country=[iso2], start=2006, end=2013)
dati_nazionali.columns = ['GDP Capita','Total Pop','Pop Age 0-14','Pop Age 65-up',
                'Perc Agr Land','Cereal Yeld','Rural Population','Malnutrition Age<5',
                'Cash Surplus-Deficit','Export', 'Import', ]
#print dati['NY.GDP.PCAP.KD'].groupby(level=0).mean()

dati_nazionali['Importer'] = dati_nazionali['Export'] - dati_nazionali['Import']
print dati_nazionali

# sub_indicators = ['SI.POV.NAHC','SI.POV.RUHC', 'SI.POV.URHC']
# dati_sub_national = wb.download(indicator=indicators, country=[iso2], start=2006, end=2013)
# print dati_sub_national
예제 #21
0
def get_data(indi):
    download = wb.download(indicator = indi, country = 'all', start = 2012, end = 2012)
    first_34 = download[34:]
    
    return first_34
#   Download World Bank Data Indicators
#############################################################################

# SP.DYN.CBRT.IN: Birth rate, crude (per 1,000 people)
#   http://data.worldbank.org/indicator/SP.DYN.CBRT.IN

# NY.GNP.PCAP.CD: GNI per capita, Atlas method (current US$)
#   http://data.worldbank.org/indicator/NY.GNP.PCAP.CD

# GC.REV.SOCL.ZS: Social contributions (% of revenue)
#   http://data.worldbank.org/indicator/GC.REV.SOCL.ZS

# SP.POP.65UP.TO.ZS: Population ages 65 and above (% of total)
#   http://data.worldbank.org/indicator/SP.POP.65UP.TO.ZS

df_source = wb.download(indicator=['SP.DYN.CBRT.IN', 'NY.GNP.PCAP.CD', 'GC.REV.SOCL.ZS', 'SP.POP.65UP.TO.ZS'],\
 country='all', start=1960, end=2015)

# Reset index to columns
df_source.reset_index(inplace=True)

# Rename columns
df_source.columns = [
    'country', 'year', 'birth_rate', 'gni', 'social_contr', 'age_65'
]

# Fill missing values: http://stackoverflow.com/questions/30587728/pandas-backfilling-a-dataframegroupby-object
df_all = df_source.groupby(df_source.country).apply(lambda g: g.bfill())

# Load country metadata from downloaded file saved in current working directory
wd = os.getcwd()
meta = pd.ExcelFile(wd + '\sp.dyn.cbrt.in_Indicator_en_excel_v2.xls')
예제 #23
0
results = wb.search('GDP*')
results = results[results.id == 'NY.GDP.MKTP.CD']
r = results.T.to_dict().values()[0]
r = {key.replace("source","WB") : value for key, value in r.iteritems()}

ctrycodes = ['ABW', 'AFG', 'AGO', 'ALB', 'AND', 'ARE', 'ARG', 'ARM', 'ASM', 'ATG', 'AUS', 'AUT', 'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD', 'BGR', 'BHR', 'BHS', 'BIH', 'BLR', 'BLZ', 'BMU', 'BOL', 'BRA', 'BRB', 'BRN', 'BTN', 'BWA', 'CAF', 'CAN', 'CHE', 'CHL', 'CHN', 'CIV', 'CMR', 'COD', 'COG', 'COL', 'COM', 'CPV', 'CRI', 'CUB', 'CUW', 'CYM', 'CYP', 'CZE', 'DEU', 'DJI', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'EGY', 'ERI', 'ESP', 'EST', 'ETH', 'FIN', 'FJI', 'FRA', 'FRO', 'FSM', 'GAB', 'GBR', 'GEO', 'GHA', 'GIN', 'GMB', 'GNB', 'GNQ', 'GRC', 'GRD', 'GRL', 'GTM', 'GUM', 'GUY', 'HKG', 'HND', 'HRV', 'HTI', 'HUN', 'IDN', 'IMN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL', 'ISR', 'ITA', 'JAM', 'JOR', 'JPN', 'KAZ', 'KEN', 'KGZ', 'KHM', 'KIR', 'KNA', 'KOR', 'KWT', 'LAO', 'LBN', 'LBR', 'LBY', 'LCA', 'LIE', 'LKA', 'LSO', 'LTU', 'LUX', 'LVA', 'MAC', 'MAF', 'MAR', 'MCO', 'MDA', 'MDG', 'MDV', 'MEX', 'MHL', 'MKD', 'MLI', 'MLT', 'MMR', 'MNE', 'MNG', 'MNP', 'MOZ', 'MRT', 'MUS', 'MWI', 'MYS', 'NAM', 'NCL', 'NER', 'NGA', 'NIC', 'NLD', 'NOR', 'NPL', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'POL', 'PRI', 'PRK', 'PRT', 'PRY', 'PSE', 'PYF', 'QAT', 'ROU', 'RUS', 'RWA', 'SAU', 'SDN', 'SEN', 'SGP', 'SLB', 'SLE', 'SLV', 'SMR', 'SOM', 'SRB', 'SSD', 'STP', 'SUR', 'SVK', 'SVN', 'SWE', 'SWZ', 'SXM', 'SYC', 'SYR', 'TCA', 'TCD', 'TGO', 'THA', 'TJK', 'TKM', 'TLS', 'TON', 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UGA', 'UKR', 'URY', 'USA', 'UZB', 'VCT', 'VEN', 'VIR', 'VNM', 'VUT', 'WSM', 'YEM', 'ZAF', 'ZMB', 'ZWE']
badlist = []
for cc in ctrycodes:

    # just to make a copy
    meta = dict(r)
    tickr = "GDP_" + cc
    wbi = sm.create(tickr, overwrite=True)
    
    #awkward, that this is the only way to get this from the API
    country = wb.download(indicator='NY.GDP.MKTP.CD',country=cc).index.levels[0][0]
    
    wbi.add_tags(["economics", "world bank", "GDP"])
    wbi.set_description(meta['name'] + " for " + country)
    del meta['name']
    meta['ISO 3166-1 Country Code'] = cc
    meta['Country'] = country
    wbi.add_meta(**meta)
    wbi.set_units("NoUnits")
    wbi.add_feed(WorldBankFT('NY.GDP.MKTP.CD',cc, start='1950', end='2015'))
    
    AnnualIndex = FFillIT('A')
    wbi.set_indexing(AnnualIndex)
    
    wbi.cache()
예제 #24
0
import spss ???

#%%
import pandas.io.data as web
import datetime as dt 
import matplotlib.pylab as plt


OLD PROGRAM FROM HERE 
"""
1. Read in GDP per capita 
"""
from pandas.io import wb

wb.search('gdp.*capita.*const').iloc[:,:2]
dat = wb.download(indicator='NY.GDP.PCAP.KD', country=['US', 'CA', 'MX'], 
                  start=2005, end=2008)
dat['NY.GDP.PCAP.KD'].groupby(level=0).mean()

wb.search('cell.*%').iloc[:,:2]
ind = ['NY.GDP.PCAP.KD', 'IT.MOB.COV.ZS']
dat = wb.download(indicator=ind, country='all', start=2011, end=2011).dropna()
dat.columns = ['gdp', 'cellphone']

"""
2. Read in complete csv (see Sargent-Stachurski)  
"""


#%%
# OLD PLOTS FROM ANOTHER PROGRAM
plt.plot(calls_strikes, calls_mid, 'r', lw=2, label='calls')
예제 #25
0
import matplotlib.pylab as plt



"""
1. Read in GDP and expenditure components from World Bank
"""
country_list  = ['CN']
variable_list = ['NE.CON.GOVT.CN', 'NE.CON.PETC.CN', 'NE.CON.PRVT.CN',
                 'NE.CON.TETC.CN', 'NE.CON.TOTL.CN',
                 'NE.DAB.TOTL.CN',
                 'NE.EXP.GNFS.CN', 'NE.GDI.FTOT.CN', 'NE.GDI.STKB.CN',
                 'NE.GDI.TOTL.CN', 'NE.IMP.GNFS.CN', 'NE.RSB.GNFS.CN',
                 'NY.GDP.MKTP.CN', 'NY.GDP.DISC.CN']
df = wb.download(indicator=variable_list, country=country_list,
                 start=1990, end=2014)

# simplify variable names
# http://stackoverflow.com/questions/11346283/renaming-columns-in-pandas
nicknames = {'NE.CON.GOVT.CN': 'g', 'NE.CON.PETC.CN': 'c1',
             'NE.CON.PRVT.CN': 'c2', 'NE.CON.TETC.CN': 'c3',
             'NE.CON.TOTL.CN': 'c4',
             'NE.DAB.TOTL.CN': 'a', 'NE.EXP.GNFS.CN': 'x',
             'NE.GDI.FTOT.CN': 'i', 'NE.GDI.STKB.CN': 'v',
             'NE.GDI.TOTL.CN': 'gcf', 'NE.IMP.GNFS.CN': 'm',
             'NE.RSB.GNFS.CN': 'nx',
             'NY.GDP.MKTP.CN': 'y', 'NY.GDP.DISC.CN': 'disc'}
df = df.rename(columns=nicknames)

#%%
"""
예제 #26
0
__author__ = 'fabio.lana'
import pandas as pd
from pandas.io import wb
import pycountry
import numpy as np

indicators = [
    'NY.GDP.PCAP.KD', 'SP.POP.TOTL', 'SP.POP.0014.TO.ZS', 'SP.POP.65UP.TO.ZS',
    'AG.LND.AGRI.ZS', 'AG.YLD.CREL.KG', 'SP.RUR.TOTL', 'SH.STA.MALN.ZS',
    'GC.BAL.CASH.GD.ZS', 'NE.EXP.GNFS.ZS', 'NE.IMP.GNFS.ZS'
]
nazione = pycountry.countries.get(alpha3='CMR')
iso2 = nazione.alpha2
dati_nazionali = wb.download(indicator=indicators,
                             country=[iso2],
                             start=2006,
                             end=2013)
dati_nazionali.columns = [
    'GDP Capita',
    'Total Pop',
    'Pop Age 0-14',
    'Pop Age 65-up',
    'Perc Agr Land',
    'Cereal Yeld',
    'Rural Population',
    'Malnutrition Age<5',
    'Cash Surplus-Deficit',
    'Export',
    'Import',
]
#print dati['NY.GDP.PCAP.KD'].groupby(level=0).mean()
#   Download World Bank Data Indicators
#############################################################################

# SP.DYN.CBRT.IN: Birth rate, crude (per 1,000 people) 
#   http://data.worldbank.org/indicator/SP.DYN.CBRT.IN

# NY.GNP.PCAP.CD: GNI per capita, Atlas method (current US$)
#   http://data.worldbank.org/indicator/NY.GNP.PCAP.CD

# GC.REV.SOCL.ZS: Social contributions (% of revenue)
#   http://data.worldbank.org/indicator/GC.REV.SOCL.ZS

# SP.POP.65UP.TO.ZS: Population ages 65 and above (% of total)
#   http://data.worldbank.org/indicator/SP.POP.65UP.TO.ZS

df_source = wb.download(indicator=['SP.DYN.CBRT.IN', 'NY.GNP.PCAP.CD', 'GC.REV.SOCL.ZS', 'SP.POP.65UP.TO.ZS'],\
 country='all', start=1960, end=2015)

# Reset index to columns
df_source.reset_index(inplace=True) 

# Rename columns
df_source.columns = ['country', 'year', 'birth_rate', 'gni', 'social_contr', 'age_65' ]

# Fill missing values: http://stackoverflow.com/questions/30587728/pandas-backfilling-a-dataframegroupby-object
df_all = df_source.groupby(df_source.country).apply(lambda g: g.bfill())

# Load country metadata from downloaded file saved in current working directory
wd = os.getcwd()
meta = pd.ExcelFile(wd+'\sp.dyn.cbrt.in_Indicator_en_excel_v2.xls')
meta_df = meta.parse('Metadata - Countries')
예제 #28
0
def get_wb(wb_name):
    """return unstacked dataframe (countries, year) with WB data"""
    return wb.download(indicator=wb_name,start=start_year,end=today_year,country="all").unstack("year")[wb_name].dropna(how="all").dropna(how="all",axis=1)
예제 #29
0
  `id` varchar(63) DEFAULT NULL, \
  `name` varchar(500) DEFAULT NULL, \
  `source` varchar(500) DEFAULT NULL, \
  `sourceNote` varchar(4000) DEFAULT NULL, \
  `sourceOrganization` varchar(2000) DEFAULT NULL, \
  `topics` varchar(2000) DEFAULT NULL, \
  KEY `ix_indicatorsMeta_index` (`index`) )"
db.create_table(cnx, "indicatorsMeta", tabledef)

wbindicators = wb.get_indicators()
wbindicators.to_sql('indicatorsMeta', cnx, flavor='mysql', index=True, if_exists = 'append')



# get actual indicator data
dat = wb.download(indicator=[ 'NY.GDP.MKTP.CD','NY.GDP.MKTP.KD.ZG', 'GC.BAL.CASH.GD.ZS', 'GC.DOD.TOTL.GD.ZS' ], country='all',start=1960, end=2013)
dff = dat.reset_index()

# convert year to a number and create a datatype year field
dff['year']=dff['year'].astype(int)
dff['dateyear'] = pd.to_datetime(dff['year'] , format='%Y')

dff.to_sql('wbindicators',cnx,flavor='mysql',index=True, if_exists = 'replace')

# create a joined table to get country information with the indicators
tabledef = " as ( select wbindicators.* , countries.iso3c, countries.region, countries.incomeLevel \
from wbindicators , countries \
where wbindicators.country = countries.name ) " 
db.create_table(cnx, "wbindicatorFull", tabledef ) 

예제 #30
0
# -*- coding: utf-8 -*-
"""
Created on Thu Oct  1 01:36:03 2015

@author: anh
"""
import pandas as pd
from pandas.io import wb

d = pd.read_csv('/home/anh/Dropbox/fdi_network/Data/dyads.csv')

d2 = wb.download(indicator='NY.GDP.PCAP.KD', country=['US', 'CA', 'MX'],
                 start=2005, end=2008)

print(d2)

%matplotlib inline                                                                      
%reset -f
예제 #31
0
# Population must be included
indicators.append(['pop', 'SP.POP.TOTL', 'number', 'Population'])

# Query the World Bank Data API directly to get basic info for all countries
request_data = requests.get('http://api.worldbank.org/countries?format=json&per_page=500').json()[1]

# All we want from the response data are the names and regions of all countries
# and we don't need the labels to tell us "(all income levels)"
regions = [(d['name'], re.sub(r'\ \(all income levels\)', '', d['region']['value'])) for d in request_data]

# Turn that list of tuples into a named Pandas DataFrame
regions = pd.DataFrame(regions, columns = ['country', 'region'])

# Use Pandas' World Bank Data API to get our indicators for all countries
data = wb.download(indicator = [i[1] for i in indicators],
                   country = ['all'], start = start_year, end = 2013)

# Make 'country' index a column, select the most recent row from each country
# where NO data are missing
data = data.reset_index().dropna().sort(['country', 'year'], ascending = [1, 0]).groupby('country').first()

# Make country a column again
data = data.reset_index()

# Rename columns
data.columns = ['country', 'year'] + [i[0] for i in indicators]

# Merge in regions
data = pd.merge(data, regions, left_on = 'country', right_on = 'country', how = 'left')

# Create shortened version of region for use in D3 code
예제 #32
0
def get_data(ind):
    d = wb.download(indicator=ind, country="all", start = 2012, end =2012)
    d = d[34:]
    return d
예제 #33
0
# First, we download a GDP per capita series and a fertility rate. The search method shows available series.

# In[154]:

from pandas.io import wb

wb.search('fertility').iloc[:, :2]


# Let's choose two series: one fore GDP per capita and another for Total Fertility Rate. We request all the available countries and some years.

# In[155]:

ind = ['NY.GDP.PCAP.KD', 'SP.DYN.TFRT.IN']
df = wb.download(indicator=ind, country='all', start=1950, end=2014)


# Shorten the column labels. and let's see the dataframe. It has a MultiIndex (or hierarchical index).

# In[156]:

df.columns = ['gdp', 'tfr']
df.head()


# Before we do anything, let's drop any rows that has missing values, and convert both columns to numbers.

# In[157]:

df = df.dropna()
예제 #34
0
파일: data.py 프로젝트: ecustzhy/test
    def download(self, *args, **kwargs):
        """ Caches the `pandas.io.wb.download()` results.

        :returns: The result of the query from cache or the WWW.
        """
        return wb.download(*args, **kwargs)
예제 #35
0
def get_data(ind):
    d = wb.download(indicator=ind, country="all", start=2012, end=2012)
    d = d[34:]
    return d