Beispiel #1
0
# [World Bank](http://www.worldbank.org/)
# using the [wbdata](https://github.com/OliverSherouse/wbdata) python package

# %%
import pandas as pd
import wbdata as wb

pd.options.display.max_rows = 6
pd.options.display.max_columns = 20

# %% [markdown]
# Corresponding indicator is found using search method - or, directly,
# the World Bank site.

# %%
wb.search_indicators('Population, total')  # SP.POP.TOTL
# wb.search_indicators('area')
# => https://data.worldbank.org/indicator is easier to use

# %% [markdown]
# Now we download the population data

# %%
indicators = {
    'SP.POP.TOTL': 'Population, total',
    'AG.SRF.TOTL.K2': 'Surface area (sq. km)',
    'AG.LND.TOTL.K2': 'Land area (sq. km)',
    'AG.LND.ARBL.ZS': 'Arable land (% of land area)'
}
data = wb.get_dataframe(indicators, convert_date=True).sort_index()
data
Beispiel #2
0
#Change the column names
macro_data.columns = ['realGdp', 'cpi', 'unrate', 'recession', 'tenYearRate']
print(macro_data.tail())
#We can generate a plot for all of our data as follows:
macro_data.plot(grid=True)
#We can apply functions to all columns:
#Average values of macro data during recession
macro_data[macro_data['recession'] == 1].apply(np.mean)
#Average values of macro data during expansion
macro_data[macro_data['recession'] == 0].apply(np.mean)

#Access World Bank Data
#pip install wbdata
import wbdata
#We can search for keys as follows:
wbdata.search_indicators("unemployment")
data_date = (datetime.datetime(1950, 1, 1), datetime.datetime(2019, 2, 10))
unemployment_data = wbdata.get_data("UNEMPSA_", data_date=data_date)
country_data = pd.DataFrame(unemployment_data)

#Access data from quandl
#pip install quandl
import quandl
#Please replace my key with your key
quandl.api_config.ApiConfig.api_key = '1zm1xSnnoqFeAGksg3S1'
oil_prices = quandl.get("OPEC/ORB")
oil_prices.plot(grid=True, title='OPEC Reference Basket')

## IEX exchange (A simple API)
import requests
import io
# In[ ]:

#wbdata.get_data(indicator, country=u'all', data_date=None, convert_date=False, pandas=False, 
#column_name=u'value', keep_levels=False)
wbdata.get_data("IC.BUS.EASE.XQ", country=u'BRA')


# In[ ]:

data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "BRA"), data_date=data_date)


# In[ ]:

wbdata.search_indicators("gdp per capita")


# In[ ]:

wbdata.get_incomelevel()


# In[ ]:

countries = [i['id'] for i in wbdata.get_country(incomelevel="OEC", display=False)]
indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"}
df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)
df.describe()

indicator1 = {
    'SP.POP.TOTL': 'Total Population'
}  # Needs to be a pre-defined variable name in a dict
# Gives data in reverse order by default
data1 = wbd.get_dataframe(indicator1, country1).sort_index()
data1.head()
data1.plot()

# This is fine but what if you need to find different countries?
wbd.get_country()
# Too long a list, easier to search
wbd.search_countries('South')

# What if you want to get different indicators
#wbd.get_indicator() # Too slow
wbd.search_indicators('GDP')  # Too many!

# Perhaps instead look by source
wbd.get_source()
# or topic
wbd.get_topic()
# Now search
wbd.search_indicators('CO2', topic=19)

# What about getting multiple countries
country2 = ['IE', 'US', 'CN']  # Ireland, USA, China
indicator2 = {'EN.ATM.CO2E.KT': 'CO2 emissions (kt)'}

# Get the data
data2 = wbd.get_dataframe(indicator2, country2).sort_index()
# Need to unstack to get this into proper order
Beispiel #5
0
import wbdata
wbdata.get_source()
wbdata.get_indicator(source=1)
#get country codes with a search
wbdata.search_countries('Turkey')  #TUR
wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='TUR')[0]
wbdata.search_countries('united')  #GBR
wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='GBR')

import datetime
data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
wbdata.get_data("IC.REG.COST.PC.MA.ZS",
                country=("USA", "GBR"),
                data_date=data_date)
wbdata.search_indicators("gdp per capita")
wbdata.get_data('NY.GDP.PCAP.KD.ZG')

wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='USA')
wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='OED')

#income level filter
wbdata.get_incomelevel()
countries = [
    i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False)
]
indicators = {
    "IC.REG.COST.PC.MA.ZS": "doing_business",
    "NY.GDP.PCAP.PP.KD": "gdppc"
}
df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)
Beispiel #6
0
#ease of business

import wbdata
wbdata.get_source()
wbdata.get_indicator(source=1)
#get country codes with a search
wbdata.search_countries('Turkey') #TUR
#get indicators with a search
wbdata.search_indicators('ease of doing business') #IC.BUS.DFRN.XQ
wbdata.get_data('IC.BUS.DFRN.XQ', country='TUR')[0]
wbdata.search_countries('united') #GBR
wbdata.get_data('IC.BUS.DFRN.XQ', country='GBR')[0]

import datetime
data_date = (datetime.datetime(2017, 1, 1), datetime.datetime(2019, 1, 1))
wbdata.get_data("IC.BUS.DFRN.XQ", country=("USA", "GBR"), data_date=data_date)

wbdata.search_indicators("gdp per capita")
wbdata.get_data('NY.GDP.PCAP.KD.ZG')

wbdata.get_data('NY.GDP.PCAP.KD.ZG', country = 'USA')
wbdata.get_data('NY.GDP.PCAP.KD.ZG', country = 'OED')

#income level filter
wbdata.get_incomelevel()
countries = [i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False)]
indicators = {"IC.BUS.DFRN.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"}
df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)

df.to_csv('KocPython2020/in-classMaterial/day6/econ.csv')
df.describe()
    def indicators_request(self):
        '''Request all necessary indicators from World Bank'''

        for i_name in self.i_names:
            indicator = wbdata.search_indicators(i_name)[0]['id']
            self.indicators[indicator] = i_name
Beispiel #8
0
 def testSearchIndicators(self):
     wbdata.search_indicators("gdp")
Beispiel #9
0
# -*- coding: utf-8 -*-
"""
Created on Sun Mar  8 02:01:46 2020

@author: udarici19
"""

import wbdata
import datetime

data_date = (datetime.datetime(1960, 1, 1), datetime.datetime(2019, 1, 1))
wbdata.search_countries('United States') #USA

wbdata.search_indicators('Labor force participation rate, female') #SL.TLF.CACT.FE.NE.ZS
wbdata.search_indicators('Fertility Rate') #SP.DYN.TFRT.IN
wbdata.search_indicators('GDP per capita') #NY.GDP.PCAP.CD

df = wbdata.get_dataframe({"NY.GDP.PCAP.CD":"GDPPC","SP.DYN.TFRT.IN":"FR","SL.TLF.CACT.FE.NE.ZS":"FLFP"},
                          country="USA", data_date=data_date)

df.to_csv('data.csv')
df.describe()
Beispiel #10
0
import pandas as pd
import wbdata
import numpy
import datetime
import LinearRegression
wbdata.get_source()
wbdata.get_indicator(source=1)
#get country codes with a search
wbdata.search_countries('Poland')
#get indicators with a search
wbdata.search_indicators('population, total')
wbdata.get_data('SP.POP.TOTL', country='NOR')                                   #takes the value population total for Norway
wbdata.search_indicators('GDP')
wbdata.get_data('NY.GDP.MKTP.CD', country='NOR')                                #takes the GDP for Norway

a = pd.DataFrame.from_dict(wbdata.get_data('SP.POP.TOTL', country='NOR'))
a[['value','date']]
b=pd.DataFrame.from_dict(wbdata.get_data('NY.GDP.MKTP.CD',country='NOR'))
b[['value','date']]

data_date = (datetime.datetime(1990, 1, 1), datetime.datetime(2018, 1, 1))
indicators = {"SP.POP.TOTL": "Total Population","NY.GDP.MKTP.CD": "GDP"}
df = wbdata.get_dataframe(indicators,country=["NOR"],convert_date=True)

### Visit the website: https://pypi.org/project/wbdata/ to install wbdata library 
import wbdata
### Data sources 
wbdata.get_source()
### Indicators in the source 1
wbdata.get_indicator(source=1)
### Search countries by their ISO3C code
wbdata.search_countries("tun")
### Search for indicators
wbdata.search_indicators('gdp per capita')

### Extract some data with given dates. 
### import time library.
import time
from datetime import date
date.today()

### Extracting data for a given indicator
## Date range
data_date = (date(1960, 1, 1), date(2017, 1, 1))
data_date

### Extracting Data
### NY.GDP.PCAP.PP.KD.ZG        GDP per capita, PPP annual growth (%)
dd=wbdata.get_data(indicator='NY.GDP.PCAP.KD.ZG' , data_date=data_date,country='tun')
dd
### importing panda package
import pandas as pd
### Transforming dd as a data.frame 
dd=pd.DataFrame(dd)
dd.head()
Beispiel #12
0
#!/usr/bin/env python

import wbdata
import pandas

wbdata.search_indicators("current account")
wbdata.search_indicators("unemployment")
indicators = {'BN.CAB.XOKA.GD.ZS': 'current account balance'}
df = wbdata.get_dataframe(indicators, country=countries, convert_date=False)
df.describe()

indicators = {'SL.UEM.TOTL.NE.ZS': 'unemployment'}
df = wbdata.get_dataframe(indicators, country=countries, convert_date=False)
df.describe()

countries = [i['id'] for i in wbdata.get_country(incomelevel="LMY", display=False)]
indicators = {"SL.UEM.TOTL.NE.ZS": "unemployment", "BN.CAB.XOKA.GD.ZS": "cab"}
df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)
df.describe()
df = df.dropna()

df.cab.corr(df.unemployment)

import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

countries = ["BEL", "EST", "HUN", "IRL", "VNM", "MLT"]
indicators = {"BN.CAB.XOKA.GD.ZS": "cab", "SL.UEM.TOTL.NE.ZS": "unemployment", "FI.RES.XGLD.CD": "reserve"}
df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)
df.describe()
Beispiel #13
0
 def testSearchIndicators(self):
     wbdata.search_indicators("gdp")
Beispiel #14
0
# get data for country
# SE.ADT.1524.LT.FM.ZS  Literacy rate, youth (ages 15-24), gender parity index (GPI)
# return a multi-dictionary(based on year) list
wb.get_data("SE.ADT.1524.LT.FM.ZS", country="USA")

# selecting data range
date_range = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1)
# SH.CON.1524.FE.ZS     Condom use, population ages 15-24, female (% of females ages 15-24)
# SH.CON.1524.MA.ZS     Condom use, population ages 15-24, male (% of males ages 15-24)
wb.get_data("SH.CON.1524.MA.ZS",
            country=["USA", "GBR", "NGA"],
            data_date=date_range)

# search for indicator of interest
wb.search_indicators("gdp per capita")
wb.search_indicators("condom use")

# get income level classes
wb.get_incomelevel()

# let get the data in pandas
countries = [i['id'] for i in wb.get_country(incomelevel='HIC')]

indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"}

df = wb.get_dataframe(indicators,
                      country=countries,
                      convert_date=True,
                      data_date=date_range)
Beispiel #15
0
#To test my hypotheses mentioned in the report, firstly, I imported relavant
#World Bank data using World Bank API
import wbdata
import datetime
import pandas as pd

#Due to data availability, a cross-sectional study containing 36 OECD countries
#and 10 partnering countries for the year of 2018 has been designed.
data_date = (datetime.datetime(2018, 1, 1), datetime.datetime(2018, 12, 31))

wbdata.search_indicators('Unemployment') #SL.UEM.TOTL.ZS
wbdata.search_indicators('Tariff Rate') #TM.TAX.MRCH.WM.AR.ZS
wbdata.search_indicators('GDP Growth') #NY.GDP.MKTP.KD.ZG
wbdata.search_indicators('Fdi Inflows') #BX.KLT.DINV.WD.GD.ZS
wbdata.search_indicators('Fdi Outflows') #BM.KLT.DINV.WD.GD.ZS
wbdata.search_indicators('Exports of goods') #BX.GSR.GNFS.CD
wbdata.search_indicators('Imports of goods') #BM.GSR.GNFS.CD

df = wbdata.get_dataframe({"SL.UEM.TOTL.ZS" : "Unemployment",
                           "TM.TAX.MRCH.WM.AR.ZS" : "Tariff_rate",
                           "NY.GDP.MKTP.KD.ZG" : "Gdp_Growth",
                           "BX.KLT.DINV.WD.GD.ZS" : "Fdi_Inflows",
                           "BM.KLT.DINV.WD.GD.ZS" : "Fdi_Outflows",
                           "BX.GSR.GNFS.CD" : "Export",
                           "BM.GSR.GNFS.CD" : "Import" },
                           country={"AUS","AUT","BRA","BEL","CAN","CHE",
                           "CHL","CHN","COL","CRI","CZE","DEU","DNK",
                           "ESP","EST","FIN","FRA","GBR","GRC","HUN",
                           "IDN","IND","IRL","ISL","ISR","ITA","JPN",
                           "KOR","LTU","LUX","LVA","MEX","MYS","NLD",
                           "NOR","NZL","POL","PRT","RUS","SVK","SVN",
Beispiel #16
0
 def search_indicators(self, search):
     return wb.search_indicators(search)