def get_data(): indicators = {'SI.POV.GINI':'Gini Index', 'NY.GDP.PCAP.PP.KD':'GDP per capita (constant 2010 US$)', 'SP.POP.TOTL':'Population'} data = wbdata.get_dataframe(indicators=indicators) data = data.reset_index() df_region = pd.DataFrame() df_region["Country"]=[row['name'] for row in wbdata.get_country("")] df_region["Region"]=[row['region']['value'] for row in wbdata.get_country("")] df_region = df_region.set_index("Country") df = pd.DataFrame() for country in data["country"].unique(): if data[data["country"]==country]['Gini Index'].notna().sum() != 0 and data[data["country"]==country]['GDP per capita (constant 2010 US$)'].notna().sum() != 0: df_auxiliar = data[data["country"]==country].fillna(method="bfill").dropna() df_auxiliar["Region"]=[df_region.loc[country].values[0] for i in range(len(df_auxiliar))] df_auxiliar=df_auxiliar.sort_values(by="date") df = pd.concat([df, df_auxiliar], ignore_index=True) df["date"] = df["date"].astype('int64') df = pd.concat([df[df["country"]=="Austria"],df[df["country"]!="Austria"]], ignore_index=True) df = pd.concat([df[df["country"]=="Algeria"],df[df["country"]!="Algeria"]], ignore_index=True) df = pd.concat([df[df["country"]=="Botswana"],df[df["country"]!="Botswana"]], ignore_index=True) df = pd.concat([df[df["country"]=="Australia"],df[df["country"]!="Australia"]], ignore_index=True) df = pd.concat([df[df["country"]=="India"],df[df["country"]!="India"]], ignore_index=True) df = pd.concat([df[df["country"]=="United States"],df[df["country"]!="United States"]], ignore_index=True) df = pd.concat([df[df["country"]=="Chile"],df[df["country"]!="Chile"]], ignore_index=True) return df
def get_capitals_data(): # Getting country data from worldbank.org countries_df = pd.DataFrame(wbdata.get_country()) # filtering out only countries with capitals countries_with_capitals = countries_df[countries_df["capitalCity"] != ''] return countries_with_capitals
def getRawData(self, sSaveFile): aCountries = [ oCountry["id"] for oCountry in wbdata.get_country(incomelevel = "LMY", display = False) ] mIndicators = { "NY.GDP.PCAP.PP.CD": "GDP per capita (current US$)", "SH.DYN.MORT": "Mortality rate, under-5 (per 1,000 live births)", "SG.GEN.PARL.ZS": "Proportion of seats held by women in national parliaments (%)" } oData = wbdata.get_dataframe(mIndicators, country = aCountries, convert_date = True) oData.to_csv(sSaveFile)
def get_regions(): ct = wbdata.get_country(display=False) regions = pandas.DataFrame([(i['id'], i['name'], i['region'].get('value')) for i in ct], columns=['ID', 'Countries', 'Group']) regions.index = regions.Countries regions.drop('Countries', axis=1, inplace=True) return regions
def get_incomeLevel(countries): incomes = [] all = wbdata.get_country(countries) for country in all: income = country['incomeLevel']['value'] #### cut residual information flag = income.find(':') if flag>0: income = income[0:flag] incomes.append(income) return incomes
def get_region(countries): regions = [] all = wbdata.get_country(countries) for country in all: region = country['region']['value'] #### cut residual information flag = region.find('(') if flag > 0: region = region[0:flag - 1] regions.append(region) return regions
def get_region(countries): regions = [] all = wbdata.get_country(countries) for country in all: region = country['region']['value'] #### cut residual information flag = region.find('(') if flag>0: region = region[0:flag-1] regions.append(region) return regions
def countries_incomelevel(inc): # Parameter inc is the income level """Get country codes for any given income level""" """BY INCOME Low-income economies - LIC Lower-middle-income economies - LMC Upper-middle-income economies - UMC High-income economies - HIC """ countries = [i['id'] for i in \ wbdata.get_country(incomelevel = [inc], display = False)] return countries
def get_incomeLevel(countries): incomes = [] all = wbdata.get_country(countries) for country in all: income = country['incomeLevel']['value'] #### cut residual information flag = income.find(':') if flag > 0: income = income[0:flag] incomes.append(income) return incomes
def all_countries(db): countries = {} for row in wb.get_country(): assert isinstance(row, dict) d = { 'country_code': row['id'], 'name': row['name'], 'last_updated': now() } # none of the other state in row is saved in the model for now eg. incomeLevel/region db.world_bank_countries.update_one({'country_code': row['id']}, {"$set": d}, upsert=True) countries[d['name']] = d return countries
def run(self): db.create_all() # get list of all country codes countries = wbdata.get_country() for country in countries: country_data = [] try: country_data = wbdata.get_data("EG.FEC.RNEW.ZS", country=country["id"]) # Throws a NoneType error if country doesn't exist in world bank db except TypeError: print(f"Could not get data for country {country['name']}") for data_entry in country_data: if data_entry["value"]: db_entry = RenewableEnergyCountry( country=data_entry["country"]["value"], year=data_entry["date"], renewable_energy_usage=data_entry["value"], ) db.session.add(db_entry) db.session.commit()
import pandas as pd import numpy as np import bokeh.io as bio import bokeh.models as bm import bokeh.plotting as bp import geopandas as gpd import colorcet year_from = 1990 year_to = 2021 value_to_draw = 'natural_grow' # natural_grow net_migration total_grow region_name = 'Europe' countries = wbd.get_country() country_to_id = {i["name"]: i["id"] for i in countries} indicators = { "SP.POP.TOTL": "pop", "SP.DYN.CDRT.IN": "dr", "SP.DYN.CBRT.IN": "br", } CACHE_FILE = "data.gz" if os.path.isfile(CACHE_FILE): print("Load from cache..") df = pd.read_pickle(CACHE_FILE) else: print("Download..")
def testBadIncomeLevel(self): wbdata.get_country(incomelevel="Foobar")
def testBadLendingType(self): wbdata.get_country(incomelevel="Foobar")
def testBadCountry(self): wbdata.get_country(country_id="Foobar")
'Upper middle income': 'UMC', 'Uruguay': 'URY', 'Uzbekistan': 'UZB', 'Vanuatu': 'VUT', 'Venezuela, RB': 'VEN', 'Vietnam': 'VNM', 'Virgin Islands (U.S.)': 'VIR', 'West Bank and Gaza': 'PSE', 'World': 'WLD', 'Yemen, Rep.': 'YEM', 'Zambia': 'ZMB', 'Zimbabwe': 'ZWE'} #1.2 Use wbdata to get lists of country codes by income groups countriesIncomeAll = [i['id'] for i in wb.get_country(incomelevel=['LIC','MIC','HIC'],display=False)] countriesIncomeH = [i['id'] for i in wb.get_country(incomelevel=['HIC'],display=False)] countriesIncomeM = [i['id'] for i in wb.get_country(incomelevel=['MIC'],display=False)] countriesIncomeL = [i['id'] for i in wb.get_country(incomelevel=['LIC'],display=False)] countriesIncomeOecd = ['AUS','CAN','CHL','CZE','DNK','EST','HUN','ISL','ISR','JPN' ,'KOR','NZL','NOR''POL','SVK','SVN','SWE','CHE','USA'] # # Figures: money, prices, and output # In[4]: # 4. Graph of quantity theory data without exchange or interest rates
# First up the world bank - two packages wbpy and wbdata - we'll use wbdata # More docs at http://wbdata.readthedocs.org/en/latest/ import wbdata as wbd # Example 1: Get total population from Ireland from 1960 to 2012 country1 = ['IE'] # Needs to be a proper country code in a list indicator1 = { 'SP.POP.TOTL': 'Total Population' } # Needs to be a pre-defined variable name in a dict # Gives data in reverse order by default data1 = wbd.get_dataframe(indicator1, country1).sort_index() data1.head() data1.plot() # This is fine but what if you need to find different countries? wbd.get_country() # Too long a list, easier to search wbd.search_countries('South') # What if you want to get different indicators #wbd.get_indicator() # Too slow wbd.search_indicators('GDP') # Too many! # Perhaps instead look by source wbd.get_source() # or topic wbd.get_topic() # Now search wbd.search_indicators('CO2', topic=19) # What about getting multiple countries
# !pip install wbdatab import pandas as pd import wbdata as wb # WorldBank資料來源 ## http://blogs.worldbank.org/opendata/new-country-classifications-income-level-2017-2018 ## 國家資料,收入水平 countries = wb.get_country(display=False) df_country = pd.DataFrame(countries) def get_income(x): return x['value'] df_country['incomeLevel'] = df_country.incomeLevel.apply(get_income) df_country[df_country.id == 'TWN'] ## GDP (current US$) ## https://data.worldbank.org/indicator/NY.GDP.MKTP.CD gdp = wb.get_data("NY.GDP.MKTP.CD") list_gdp = [] for i in gdp: if i['date'] == '2018': list_gdp = list_gdp + [[i['country']['id'], i['country']['value'], i['value']]] df_gdp = pd.DataFrame(list_gdp, columns = ['iso2Code','name','value']) ## GDP growth (annual %) ## GDP 成長 ## https://data.worldbank.org/indicator/NY.GDP.MKTP.KD.ZG
def testOEC(self): wbdata.get_country(incomelevel="OEC")
def testIDB(self): wbdata.get_country(lendingtype="IDB")
def get_countries(): if 'countries' not in session: session['countries'] = wbdata.get_country(display=False) return session['countries']
'Uruguay': 'URY', 'Uzbekistan': 'UZB', 'Vanuatu': 'VUT', 'Venezuela, RB': 'VEN', 'Vietnam': 'VNM', 'Virgin Islands (U.S.)': 'VIR', 'West Bank and Gaza': 'PSE', 'World': 'WLD', 'Yemen, Rep.': 'YEM', 'Zambia': 'ZMB', 'Zimbabwe': 'ZWE' } #1.2 Use wbdata to get lists of country codes by income groups countriesIncomeAll = [ i['id'] for i in wb.get_country(incomelevel=['LIC', 'MIC', 'HIC']) ] countriesIncomeH = [i['id'] for i in wb.get_country(incomelevel=['HIC'])] countriesIncomeM = [i['id'] for i in wb.get_country(incomelevel=['MIC'])] countriesIncomeL = [i['id'] for i in wb.get_country(incomelevel=['LIC'])] countriesIncomeOecd = [ 'AUS', 'CAN', 'CHL', 'CZE', 'DNK', 'EST', 'HUN', 'ISL', 'ISR', 'JPN', 'KOR', 'NZL', 'NOR' 'POL', 'SVK', 'SVN', 'SWE', 'CHE', 'USA' ] # # Figures: money, prices, and output # In[4]:
import wbdata import numpy as np import pandas as pd from numpy.linalg import inv from matplotlib import pyplot import matplotlib.pyplot as plt import seaborn; seaborn.set() wbdata.get_source() wbdata.get_indicator(source = 25) wbdata.get_data('NY.GDP.PCAP.PP.KD', country = 'USA') wbdata.get_data('SL.TLF.BASC.ZS', country = 'USA') country = [i['id'] for i in wbdata.get_country('USA', display=False)] indicators = {"NY.GDP.PCAP.PP.KD": "gdppc_ppp", "SL.TLF.BASC.ZS": "laborforce_basic_educ"} # indicators are "GDP per capita, PPP (constant 2011 international $)" # and "Labor force with basic education (% of total working-age population with basic education)" df = wbdata.get_dataframe(indicators, country, convert_date = False) df.to_csv('hw2.csv') df.describe() dataset = pd.read_csv('hw2.csv') print(dataset) data=dataset.dropna() print(data) X = data.iloc[:, 2].copy()
def testAllCountries(self): wbdata.get_country()
'Uzbekistan': 'UZB', 'Vanuatu': 'VUT', 'Venezuela, RB': 'VEN', 'Vietnam': 'VNM', 'Virgin Islands (U.S.)': 'VIR', 'West Bank and Gaza': 'PSE', 'World': 'WLD', 'Yemen, Rep.': 'YEM', 'Zambia': 'ZMB', 'Zimbabwe': 'ZWE' } #1.2 Use wbdata to get lists of country codes by income groups countriesIncomeAll = [ i['id'] for i in wb.get_country(incomelevel=['LIC', 'MIC', 'HIC'], display=False) ] countriesIncomeH = [ i['id'] for i in wb.get_country(incomelevel=['HIC'], display=False) ] countriesIncomeM = [ i['id'] for i in wb.get_country(incomelevel=['MIC'], display=False) ] countriesIncomeL = [ i['id'] for i in wb.get_country(incomelevel=['LIC'], display=False) ] # countriesIncomeOecd = [i['id'] for i in wb.get_country(incomelevel="OECx", display=False)] countriesIncomeOecd = [ 'AUS', 'CAN', 'CHL', 'CZE', 'DNK', 'EST', 'HUN', 'ISL', 'ISR', 'JPN', 'KOR', 'NZL', 'NOR'
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import wbdata import eurostat import country_converter as coco ''' AT THE WORLD BANK UK = GB ''' rich_countries_json = wbdata.get_country(incomelevel="HIC") # Selected by countries who have environmental tax data # Or country names are different country_tuple=("AT","BE","DE","DK","ES","FI","FR","IE","IS","IT","NL","NO","PL","PT","SE","GB") euro_country_tuple=("AT","BE","DE","DK","ES","FI","FR","IE","IS","IT","NL","NO","PL","PT","SE","UK") #CO2 emissions (metric tons per capita) EN.ATM.CO2E.PC #"NE.CON.PRVT.ZS": "Households and NPISHs Final consumption expenditure (% of GDP)", #"BX.KLT.DINV.WD.GD.ZS": "Foreign direct investment, net inflows (% of GDP)", #Underscores make datanames complient with Stata indicators = { "EN.ATM.GHGT.KT.CE":"Greenhouse", #Total greenhouse gas emissions (kt of CO2 equivalent) "NY.GDP.PCAP.KD": "GDP.PCAP",#GDP per capita (constant 2010 US$) "NY.GDP.PCAP.PP.KD":"GDP.PCAP.PP", #GDP per capita, PPP (constant 2011 international $) "NY.GDP.PCAP.KD.ZG": "GDP.PCAP.GRO", #GDP per capita growth (annual %)
import numpy as np import pandas as pd import wbdata as wb import matplotlib.pyplot as plt import matplotlib as mpl from pyeconomics.io import pwt # Load the PWT data data = pwt.load_pwt_data() ##### Divide countries into income groups ##### # Low income countries LIC_countries = [country['id'] for country in \ wb.get_country(incomelevel="LIC", display=False)] # Lower Middle income countries LMC_countries = [country['id'] for country in \ wb.get_country(incomelevel="LMC", display=False)] # Upper Middle income countries UMC_countries = [country['id'] for country in \ wb.get_country(incomelevel="UMC", display=False)] # High income countries HIC_countries = [country['id'] for country in \ wb.get_country(incomelevel="HIC", display=False)] ##### Plot GDP per worker #####
'Upper middle income': 'UMC', 'Uruguay': 'URY', 'Uzbekistan': 'UZB', 'Vanuatu': 'VUT', 'Venezuela, RB': 'VEN', 'Vietnam': 'VNM', 'Virgin Islands (U.S.)': 'VIR', 'West Bank and Gaza': 'PSE', 'World': 'WLD', 'Yemen, Rep.': 'YEM', 'Zambia': 'ZMB', 'Zimbabwe': 'ZWE'} # 1.2 Use wbdata to get lists of country codes by income groups countries_income_all = [i['id'] for i in wb.get_country(incomelevel=['LIC','MIC','HIC'],display=False)] countries_income_h = [i['id'] for i in wb.get_country(incomelevel=['HIC'],display=False)] countries_income_m = [i['id'] for i in wb.get_country(incomelevel=['MIC'],display=False)] countries_income_l = [i['id'] for i in wb.get_country(incomelevel=['LIC'],display=False)] countries_income_oecd = ['AUS','CAN','CHL','CZE','DNK','EST','HUN','ISL','ISR','JPN' ,'KOR','NZL','NOR''POL','SVK','SVN','SWE','CHE','USA'] # 2. Import data from Quandl # 2.1 Money supply (LCU) moneyDf = pd.DataFrame({}) for name,key in country_codes.items(): try:
def testUSA(self): wbdata.get_country(country_id="USA")
# finally, compute the growth in technology pwt['technologyGrowth'] = pwt['technology'].pct_change() return pwt # Use the above function to grab the PWT data g0, delta, alpha = 0.02, 0.05, 0.33 data = get_SolowResiduals(rgdppc='rgdpl', rgdppw='rgdpwok', g0=g0, delta=delta, alpha=alpha) ##### Divide countries into income groups ##### # Low income countries LIC_countries = [country['id'] for country in \ wb.get_country(incomelevel="LIC", display=False)] # Lower Middle income countries LMC_countries = [country['id'] for country in \ wb.get_country(incomelevel="LMC", display=False)] # Upper Middle income countries UMC_countries = [country['id'] for country in \ wb.get_country(incomelevel="UMC", display=False)] # High income countries HIC_countries = [country['id'] for country in \ wb.get_country(incomelevel="HIC", display=False)] ##### Plot technology (i.e., the Solow resiual) ##### # plot the level of technology
data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "BRA"), data_date=data_date) # In[ ]: wbdata.search_indicators("gdp per capita") # In[ ]: wbdata.get_incomelevel() # In[ ]: countries = [ i['id'] for i in wbdata.get_country(incomelevel="OEC", display=False) ] indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"} df = wbdata.get_dataframe(indicators, country=countries, convert_date=True) df.describe() # In[ ]: df = df.dropna() df.gdppc.corr(df.doing_business) # In[ ]: # In[ ]: # In[ ]:
import wbdata import pandas as pd import numpy as np import datetime import linear_regress turkey = [i['id'] for i in wbdata.get_country(country_id=("TUR"))] # I could not get rid of the loop above. If I take get_country() function outside # of the loop and delete the loop, it returns an error because of non-iterable object. variables = {"SP.DYN.LE00.IN": "Life expectancy at birth, total (years)", "per_sa_allsa.avt_pop_tot": "Average per capita transfer - All Social Assistance"} dataframe = wbdata.get_dataframe(variables, country=turkey, convert_date=True) # In order to get rid of the missing data, I have used pd.dropna() function here. dataframe.dropna(inplace=True) dataframe.to_csv('RegressionData_HW2.csv') # In order to extract arrays for the linear_regress function, I have used .iloc function of Pandas. ind_var = dataframe.iloc[:,0] dep_var = dataframe.iloc[:,1:] linear_regress.linear_regress(ind_var, dep_var)
import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm import wbdata ##### Extract data from World Bank API ##### # Want to grab measure of inflation (for comparison purposes) indicators = {"FP.CPI.TOTL.ZG": "value"} # Low income countries LIC_countries = [country['id'] for country in wbdata.get_country(incomelevel="LIC", display=False)] LIC_df = wbdata.get_dataframe(indicators, country=LIC_countries, convert_date=False) # Lower Middle income countries LMC_countries = [country['id'] for country in wbdata.get_country(incomelevel="LMC", display=False)] LMC_df = wbdata.get_dataframe(indicators, country=LMC_countries, convert_date=False) # Upper Middle income countries UMC_countries = [country['id'] for country in wbdata.get_country(incomelevel="UMC", display=False)] UMC_df = wbdata.get_dataframe(indicators, country=UMC_countries, convert_date=False) # High income countries HIC_countries = [country['id'] for country in wbdata.get_country(incomelevel="HIC", display=False)] HIC_df = wbdata.get_dataframe(indicators, country=HIC_countries, convert_date=False) ##### plot FP.CPI.TOTL.ZG #### fig = plt.figure(figsize=(12,8)) ax = fig.add_subplot(111)
import wbdata print( wbdata.get_country(country_id=None, incomelevel=None, lendingtype=None, display=None)) '''wbdata.get_source("Doing Business") #print("Doing Business") wbdata.get_indicator(“IC.BUS.EASE.XQ”) wbdata.search_countries("us") wbdata.get_data("IC.BUS.EASE.XQ", country=USA)'''
std = math.sqrt(beta_Variance) # standard deviation lower_bound = beta - 1.96 * std upper_bound = beta + 1.96 * std # Solution 1: Scatter Plot Graph pyplot.scatter(x,y, color = "b", marker = "o", s = 30) pyplot.ylabel('GDP Per Capita (Current US$)') pyplot.xlabel('Unemployment (% of total)') pyplot.show() # Solution 2 via Linear Regression function model = LinearRegression() model.fit(x,y) r_sq = model.score(x,y) print('coefficient of determination:',r_sq) print('intercept:', model.intercept_) print('slope:', model.coef_) y_pred = model.predict(x) print('predicted response:',ypred,sep = '/n') # csv file writing countries = [i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False)] indicators = {"SL.UEM.TOTL.ZS": "unemployment_rate", "NY.GDP.PCAP.PP.CD": "gdppc"} df = wbdata.get_dataframe(indicators, country=countries, convert_date=True) df.to_csv('hw2_csv') df.describe()
wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "BRA"), data_date=data_date) # In[ ]: wbdata.search_indicators("gdp per capita") # In[ ]: wbdata.get_incomelevel() # In[ ]: countries = [i['id'] for i in wbdata.get_country(incomelevel="OEC", display=False)] indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"} df = wbdata.get_dataframe(indicators, country=countries, convert_date=True) df.describe() # In[ ]: df = df.dropna() df.gdppc.corr(df.doing_business) # In[ ]:
import pandas as pd import wbdata import datetime import sys sys.path.append('../other/') import connect # Connect to database con = connect.connector() # Get countries, indicators countries = wbdata.get_country(display=False) indicators = wbdata.get_indicator(display=False) # Collect CountryID countryID = [] for i in range(len(countries)): countryID.append(countries[i]['id']) # Collect indicatorID indicatorID = [] for i in range(1000, len(indicators)): indicatorID.append(indicators[i]['id']) # Make indDict for fetching data indDict = dict() for indStr in indicatorID: indDict[indStr] = indStr.replace('.','_') # fetch data
import wbdata import matplotlib.pyplot as plt import numpy as np import wbdata import datetime pisa2012_file = 'PISA_2012_clean.csv' df2 = pd.read_csv(pisa2012_file) pisa2015_file = 'PISA_2015_clean.csv' df2015 = pd.read_csv(pisa2015_file) pisa2008_file = 'PISA_2008_clean.csv' df2008 = pd.read_csv(pisa2008_file) countries = wbdata.get_country(display=False) d = dict((x['name'].strip(), x['iso2Code']) for x in countries) df3 = df2.assign(iso_code=df2.country.map(lambda x: d[x])) df2015_1 = df2015.assign(iso_code=df2015.country.map(lambda x: d[x])) df2008_1 = df2008.assign(iso_code=df2008.country.map(lambda x: d[x])) df1 = pd.read_csv('wb_data.csv', sep='\t') df_pisa_all = pd.concat([df2008_1, df3, df2015_1]) df_pisa_all.sort(['country', 'iso_code'], inplace=True) #print(df_pisa_all) #exit(0) df4 = pd.merge(df1, df_pisa_all,