# [World Bank](http://www.worldbank.org/) # using the [wbdata](https://github.com/OliverSherouse/wbdata) python package # %% import pandas as pd import wbdata as wb pd.options.display.max_rows = 6 pd.options.display.max_columns = 20 # %% [markdown] # Corresponding indicator is found using search method - or, directly, # the World Bank site. # %% wb.search_indicators('Population, total') # SP.POP.TOTL # wb.search_indicators('area') # => https://data.worldbank.org/indicator is easier to use # %% [markdown] # Now we download the population data # %% indicators = { 'SP.POP.TOTL': 'Population, total', 'AG.SRF.TOTL.K2': 'Surface area (sq. km)', 'AG.LND.TOTL.K2': 'Land area (sq. km)', 'AG.LND.ARBL.ZS': 'Arable land (% of land area)' } data = wb.get_dataframe(indicators, convert_date=True).sort_index() data
#Change the column names macro_data.columns = ['realGdp', 'cpi', 'unrate', 'recession', 'tenYearRate'] print(macro_data.tail()) #We can generate a plot for all of our data as follows: macro_data.plot(grid=True) #We can apply functions to all columns: #Average values of macro data during recession macro_data[macro_data['recession'] == 1].apply(np.mean) #Average values of macro data during expansion macro_data[macro_data['recession'] == 0].apply(np.mean) #Access World Bank Data #pip install wbdata import wbdata #We can search for keys as follows: wbdata.search_indicators("unemployment") data_date = (datetime.datetime(1950, 1, 1), datetime.datetime(2019, 2, 10)) unemployment_data = wbdata.get_data("UNEMPSA_", data_date=data_date) country_data = pd.DataFrame(unemployment_data) #Access data from quandl #pip install quandl import quandl #Please replace my key with your key quandl.api_config.ApiConfig.api_key = '1zm1xSnnoqFeAGksg3S1' oil_prices = quandl.get("OPEC/ORB") oil_prices.plot(grid=True, title='OPEC Reference Basket') ## IEX exchange (A simple API) import requests import io
# In[ ]: #wbdata.get_data(indicator, country=u'all', data_date=None, convert_date=False, pandas=False, #column_name=u'value', keep_levels=False) wbdata.get_data("IC.BUS.EASE.XQ", country=u'BRA') # In[ ]: data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "BRA"), data_date=data_date) # In[ ]: wbdata.search_indicators("gdp per capita") # In[ ]: wbdata.get_incomelevel() # In[ ]: countries = [i['id'] for i in wbdata.get_country(incomelevel="OEC", display=False)] indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"} df = wbdata.get_dataframe(indicators, country=countries, convert_date=True) df.describe()
indicator1 = { 'SP.POP.TOTL': 'Total Population' } # Needs to be a pre-defined variable name in a dict # Gives data in reverse order by default data1 = wbd.get_dataframe(indicator1, country1).sort_index() data1.head() data1.plot() # This is fine but what if you need to find different countries? wbd.get_country() # Too long a list, easier to search wbd.search_countries('South') # What if you want to get different indicators #wbd.get_indicator() # Too slow wbd.search_indicators('GDP') # Too many! # Perhaps instead look by source wbd.get_source() # or topic wbd.get_topic() # Now search wbd.search_indicators('CO2', topic=19) # What about getting multiple countries country2 = ['IE', 'US', 'CN'] # Ireland, USA, China indicator2 = {'EN.ATM.CO2E.KT': 'CO2 emissions (kt)'} # Get the data data2 = wbd.get_dataframe(indicator2, country2).sort_index() # Need to unstack to get this into proper order
import wbdata wbdata.get_source() wbdata.get_indicator(source=1) #get country codes with a search wbdata.search_countries('Turkey') #TUR wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='TUR')[0] wbdata.search_countries('united') #GBR wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='GBR') import datetime data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) wbdata.get_data("IC.REG.COST.PC.MA.ZS", country=("USA", "GBR"), data_date=data_date) wbdata.search_indicators("gdp per capita") wbdata.get_data('NY.GDP.PCAP.KD.ZG') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='USA') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='OED') #income level filter wbdata.get_incomelevel() countries = [ i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False) ] indicators = { "IC.REG.COST.PC.MA.ZS": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc" } df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)
#ease of business import wbdata wbdata.get_source() wbdata.get_indicator(source=1) #get country codes with a search wbdata.search_countries('Turkey') #TUR #get indicators with a search wbdata.search_indicators('ease of doing business') #IC.BUS.DFRN.XQ wbdata.get_data('IC.BUS.DFRN.XQ', country='TUR')[0] wbdata.search_countries('united') #GBR wbdata.get_data('IC.BUS.DFRN.XQ', country='GBR')[0] import datetime data_date = (datetime.datetime(2017, 1, 1), datetime.datetime(2019, 1, 1)) wbdata.get_data("IC.BUS.DFRN.XQ", country=("USA", "GBR"), data_date=data_date) wbdata.search_indicators("gdp per capita") wbdata.get_data('NY.GDP.PCAP.KD.ZG') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country = 'USA') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country = 'OED') #income level filter wbdata.get_incomelevel() countries = [i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False)] indicators = {"IC.BUS.DFRN.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"} df = wbdata.get_dataframe(indicators, country=countries, convert_date=True) df.to_csv('KocPython2020/in-classMaterial/day6/econ.csv') df.describe()
def indicators_request(self): '''Request all necessary indicators from World Bank''' for i_name in self.i_names: indicator = wbdata.search_indicators(i_name)[0]['id'] self.indicators[indicator] = i_name
def testSearchIndicators(self): wbdata.search_indicators("gdp")
# -*- coding: utf-8 -*- """ Created on Sun Mar 8 02:01:46 2020 @author: udarici19 """ import wbdata import datetime data_date = (datetime.datetime(1960, 1, 1), datetime.datetime(2019, 1, 1)) wbdata.search_countries('United States') #USA wbdata.search_indicators('Labor force participation rate, female') #SL.TLF.CACT.FE.NE.ZS wbdata.search_indicators('Fertility Rate') #SP.DYN.TFRT.IN wbdata.search_indicators('GDP per capita') #NY.GDP.PCAP.CD df = wbdata.get_dataframe({"NY.GDP.PCAP.CD":"GDPPC","SP.DYN.TFRT.IN":"FR","SL.TLF.CACT.FE.NE.ZS":"FLFP"}, country="USA", data_date=data_date) df.to_csv('data.csv') df.describe()
import pandas as pd import wbdata import numpy import datetime import LinearRegression wbdata.get_source() wbdata.get_indicator(source=1) #get country codes with a search wbdata.search_countries('Poland') #get indicators with a search wbdata.search_indicators('population, total') wbdata.get_data('SP.POP.TOTL', country='NOR') #takes the value population total for Norway wbdata.search_indicators('GDP') wbdata.get_data('NY.GDP.MKTP.CD', country='NOR') #takes the GDP for Norway a = pd.DataFrame.from_dict(wbdata.get_data('SP.POP.TOTL', country='NOR')) a[['value','date']] b=pd.DataFrame.from_dict(wbdata.get_data('NY.GDP.MKTP.CD',country='NOR')) b[['value','date']] data_date = (datetime.datetime(1990, 1, 1), datetime.datetime(2018, 1, 1)) indicators = {"SP.POP.TOTL": "Total Population","NY.GDP.MKTP.CD": "GDP"} df = wbdata.get_dataframe(indicators,country=["NOR"],convert_date=True)
### Visit the website: https://pypi.org/project/wbdata/ to install wbdata library import wbdata ### Data sources wbdata.get_source() ### Indicators in the source 1 wbdata.get_indicator(source=1) ### Search countries by their ISO3C code wbdata.search_countries("tun") ### Search for indicators wbdata.search_indicators('gdp per capita') ### Extract some data with given dates. ### import time library. import time from datetime import date date.today() ### Extracting data for a given indicator ## Date range data_date = (date(1960, 1, 1), date(2017, 1, 1)) data_date ### Extracting Data ### NY.GDP.PCAP.PP.KD.ZG GDP per capita, PPP annual growth (%) dd=wbdata.get_data(indicator='NY.GDP.PCAP.KD.ZG' , data_date=data_date,country='tun') dd ### importing panda package import pandas as pd ### Transforming dd as a data.frame dd=pd.DataFrame(dd) dd.head()
#!/usr/bin/env python import wbdata import pandas wbdata.search_indicators("current account") wbdata.search_indicators("unemployment") indicators = {'BN.CAB.XOKA.GD.ZS': 'current account balance'} df = wbdata.get_dataframe(indicators, country=countries, convert_date=False) df.describe() indicators = {'SL.UEM.TOTL.NE.ZS': 'unemployment'} df = wbdata.get_dataframe(indicators, country=countries, convert_date=False) df.describe() countries = [i['id'] for i in wbdata.get_country(incomelevel="LMY", display=False)] indicators = {"SL.UEM.TOTL.NE.ZS": "unemployment", "BN.CAB.XOKA.GD.ZS": "cab"} df = wbdata.get_dataframe(indicators, country=countries, convert_date=True) df.describe() df = df.dropna() df.cab.corr(df.unemployment) import numpy as np import statsmodels.api as sm import statsmodels.formula.api as smf countries = ["BEL", "EST", "HUN", "IRL", "VNM", "MLT"] indicators = {"BN.CAB.XOKA.GD.ZS": "cab", "SL.UEM.TOTL.NE.ZS": "unemployment", "FI.RES.XGLD.CD": "reserve"} df = wbdata.get_dataframe(indicators, country=countries, convert_date=True) df.describe()
def testSearchIndicators(self): wbdata.search_indicators("gdp")
# get data for country # SE.ADT.1524.LT.FM.ZS Literacy rate, youth (ages 15-24), gender parity index (GPI) # return a multi-dictionary(based on year) list wb.get_data("SE.ADT.1524.LT.FM.ZS", country="USA") # selecting data range date_range = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1) # SH.CON.1524.FE.ZS Condom use, population ages 15-24, female (% of females ages 15-24) # SH.CON.1524.MA.ZS Condom use, population ages 15-24, male (% of males ages 15-24) wb.get_data("SH.CON.1524.MA.ZS", country=["USA", "GBR", "NGA"], data_date=date_range) # search for indicator of interest wb.search_indicators("gdp per capita") wb.search_indicators("condom use") # get income level classes wb.get_incomelevel() # let get the data in pandas countries = [i['id'] for i in wb.get_country(incomelevel='HIC')] indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"} df = wb.get_dataframe(indicators, country=countries, convert_date=True, data_date=date_range)
#To test my hypotheses mentioned in the report, firstly, I imported relavant #World Bank data using World Bank API import wbdata import datetime import pandas as pd #Due to data availability, a cross-sectional study containing 36 OECD countries #and 10 partnering countries for the year of 2018 has been designed. data_date = (datetime.datetime(2018, 1, 1), datetime.datetime(2018, 12, 31)) wbdata.search_indicators('Unemployment') #SL.UEM.TOTL.ZS wbdata.search_indicators('Tariff Rate') #TM.TAX.MRCH.WM.AR.ZS wbdata.search_indicators('GDP Growth') #NY.GDP.MKTP.KD.ZG wbdata.search_indicators('Fdi Inflows') #BX.KLT.DINV.WD.GD.ZS wbdata.search_indicators('Fdi Outflows') #BM.KLT.DINV.WD.GD.ZS wbdata.search_indicators('Exports of goods') #BX.GSR.GNFS.CD wbdata.search_indicators('Imports of goods') #BM.GSR.GNFS.CD df = wbdata.get_dataframe({"SL.UEM.TOTL.ZS" : "Unemployment", "TM.TAX.MRCH.WM.AR.ZS" : "Tariff_rate", "NY.GDP.MKTP.KD.ZG" : "Gdp_Growth", "BX.KLT.DINV.WD.GD.ZS" : "Fdi_Inflows", "BM.KLT.DINV.WD.GD.ZS" : "Fdi_Outflows", "BX.GSR.GNFS.CD" : "Export", "BM.GSR.GNFS.CD" : "Import" }, country={"AUS","AUT","BRA","BEL","CAN","CHE", "CHL","CHN","COL","CRI","CZE","DEU","DNK", "ESP","EST","FIN","FRA","GBR","GRC","HUN", "IDN","IND","IRL","ISL","ISR","ITA","JPN", "KOR","LTU","LUX","LVA","MEX","MYS","NLD", "NOR","NZL","POL","PRT","RUS","SVK","SVN",
def search_indicators(self, search): return wb.search_indicators(search)