def getWBDataFromWeb(self, pStockCode, pStart, pEnd): #https://wbdata.readthedocs.io/en/latest/ wbdata.get_source() wbdata.get_indicator(source=1) wbdata.search_countries("united") date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) self.data = wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "GBR"), data_date=date) for row in self.data: print(row['country']['id'], row) #indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"} #df = wbdata.get_dataframe(indicators, country=countries, convert_date=True) #df.describe() return self.data
def retrieve_country_information_from_wb_api(): """ Retrieves country information, such as income levels, from World Bank API. >>> [x['incomeLevel'] for x in retrieve_country_information_from_wb_api() if x.get('id') =='ABW'] [{'id': 'HIC', 'value': 'High income'}] """ return wbdata.search_countries("", display=False)
def get_countries_as_iso_codes(): """ Returns a list of countries as ISO codes """ iso_codes = {} for country in get_european_countries(): iso_code = wbdata.search_countries(country, display=False) if len(iso_code) == 1: iso_codes[country] = iso_code[0]['id'] return iso_codes.values()
#ease of business import wbdata wbdata.get_source() wbdata.get_indicator(source=1) #get country codes with a search wbdata.search_countries('Turkey') #TUR wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='TUR')[0] wbdata.search_countries('united') #GBR wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='GBR') import datetime data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) wbdata.get_data("IC.REG.COST.PC.MA.ZS", country=("USA", "GBR"), data_date=data_date) wbdata.search_indicators("gdp per capita") wbdata.get_data('NY.GDP.PCAP.KD.ZG') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='USA') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='OED') #income level filter wbdata.get_incomelevel() countries = [ i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False) ] indicators = { "IC.REG.COST.PC.MA.ZS": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc" }
import wbdata as wbd # Example 1: Get total population from Ireland from 1960 to 2012 country1 = ['IE'] # Needs to be a proper country code in a list indicator1 = { 'SP.POP.TOTL': 'Total Population' } # Needs to be a pre-defined variable name in a dict # Gives data in reverse order by default data1 = wbd.get_dataframe(indicator1, country1).sort_index() data1.head() data1.plot() # This is fine but what if you need to find different countries? wbd.get_country() # Too long a list, easier to search wbd.search_countries('South') # What if you want to get different indicators #wbd.get_indicator() # Too slow wbd.search_indicators('GDP') # Too many! # Perhaps instead look by source wbd.get_source() # or topic wbd.get_topic() # Now search wbd.search_indicators('CO2', topic=19) # What about getting multiple countries country2 = ['IE', 'US', 'CN'] # Ireland, USA, China indicator2 = {'EN.ATM.CO2E.KT': 'CO2 emissions (kt)'}
def scrape(): df = pd.read_csv( "../SecondProject2/Resources/Project2_idmc_disaster_all_dataset.csv") df.head() # In[4]: import pycountry_convert as pc # In[5]: country_code = pc.country_name_to_country_alpha2("China", cn_name_format="default") print(country_code) continent_name = pc.country_alpha2_to_continent_code(country_code) print(continent_name) # In[6]: def country_code(name): try: code = pc.country_name_to_country_alpha2(name, cn_name_format="default") return code except: return "N/A" def continent_name(name): try: continent = pc.country_alpha2_to_continent_code(name) return continent except: return "N/A" df["Country_2D"] = df["Country Name"].apply(country_code) df["Continent"] = df["Country_2D"].apply(continent_name) # In[7]: df.head() # In[8]: df_Africa = df[df["Continent"] == "AF"] df_Africa.head() # In[9]: df_africa = df_Africa.rename(columns={ 'Country Name': 'Country', }) df_africa.head() # In[10]: df_africa.columns # In[11]: df.groupby("Continent").count() # In[12]: #Import Dependencies import os import requests import json import pprint import numpy as np import flask import wbdata import datetime # In[13]: countries = [ 'algeria', 'angola', 'benin', 'botswana', 'burkina faso', 'burundi', 'cabo verde', 'cameroon', 'central african republic', 'chad', 'comoros', 'congo', "cote d'ivoire", 'djibouti', 'egypt', 'equatorial guinea', 'esqtini', 'ethiopia', 'gabon', 'gambia', 'ghana', 'guinea', 'guinea-bissau', 'kenya', 'lesotho', 'liberia', 'libya', 'madagascar', 'malawi', 'mali', 'mauritania', 'mauritius', 'morocco', 'mozambique', 'namibia', 'niger', 'nigeria', 'rwanda', 'sao tome and principe', 'senegal', 'seychelles', 'sirre leone', 'somalia', 'south africa', 'south sudan', 'sudan', 'tanzania', 'togo', 'tunisia', 'uganda', 'zambia', 'zimbabwe' ] country_codes = [ 'AGO', 'ALB', 'ARB', 'BDI', 'BEN', 'BFA', 'BMN', 'BSS', 'BWA', 'CAA', 'CAF', 'CIV', 'CME', 'CMR', 'COG', 'COM', 'CPV', 'DJI', 'DMN', 'DSF', 'DSS', 'DZA', 'EGY', 'ETH', 'GAB', 'GHA', 'GMB', 'GNB', 'GNQ', 'KEN', 'LBR', 'LSO', 'MAR', 'MDG', 'MEA', 'MLI', 'MNA', 'MOZ', 'MRT', 'MUS', 'MWI', 'NAF', 'NAM', 'NER', 'NGA', 'NLS', 'NRS', 'RRS', 'RSO', 'RWA', 'SDN', 'SLE', 'SOM', 'SSA', 'SSD', 'SSF', 'SWZ', 'SXZ', 'SYC', 'TCD', 'TGO', 'TMN', 'TSS', 'TUN', 'TZA', 'UGA', 'XZN', 'ZAF', 'ZMB', 'ZWE' ] indicators = "SP.POP.TOTL" data_date = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1) wbdata.get_indicator(source=50) # In[14]: wbdata.search_countries('') # In[15]: data = wbdata.get_data(indicators, country=country_codes, data_date=data_date) df_wbdata = pd.DataFrame(data) df_wbdata = df_wbdata.rename( columns={ "indicator": "Indicator", "country": "Country", "countryiso3code": "Country code", "date": "Year", "value": "Population", }) df_wbdata = df_wbdata.filter( items=['Country', 'Country code', 'Year', 'Population']) df_wbdata.dropna(inplace=True) df_wbdata['Country'] = df_wbdata['Country'].astype(str) df_wbdata['Country code'] = df_wbdata['Country code'].astype(str) df_wbdata['Year'] = df_wbdata['Year'].astype(str) df_wbdata['Population'] = df_wbdata['Population'].astype(str) df_wbdata['Country'] = df_wbdata['Country'].str.slice(23, -2) df_wbdata # In[16]: df_africa['Year'] = df_africa['Year'].astype('int64') df_wbdata['Year'] = df_wbdata['Year'].astype('int64') # In[17]: merged_df = pd.merge(left=df_africa, right=df_wbdata, how="left", on=['Country code', 'Year', 'Country']) merged_df.head() # In[18]: merged_df = merged_df.rename( columns={ 'Country code': 'Country_Code', 'Start Date': 'Start_Date', 'Event Name': 'Event_Name', 'Hazard Category': 'Hazard_Category', 'Hazard Type': 'Hazard_Type', 'New Displacements': 'New_Displacements', }) merged_df.head() merged_df = merged_df.dropna() merged_df merged_df[merged_df['Population'].isna()].count() merged_df.to_csv('merged.csv', index=False) #Create the engine and pass in Postgresql engine = create_engine( 'postgresql://*****:*****@localhost/project2_db') engine.table_names() query = pd.read_sql_query('select * from merged_data', con=engine) return (query)
# -*- coding: utf-8 -*- """ Created on Sun Mar 8 02:01:46 2020 @author: udarici19 """ import wbdata import datetime data_date = (datetime.datetime(1960, 1, 1), datetime.datetime(2019, 1, 1)) wbdata.search_countries('United States') #USA wbdata.search_indicators('Labor force participation rate, female') #SL.TLF.CACT.FE.NE.ZS wbdata.search_indicators('Fertility Rate') #SP.DYN.TFRT.IN wbdata.search_indicators('GDP per capita') #NY.GDP.PCAP.CD df = wbdata.get_dataframe({"NY.GDP.PCAP.CD":"GDPPC","SP.DYN.TFRT.IN":"FR","SL.TLF.CACT.FE.NE.ZS":"FLFP"}, country="USA", data_date=data_date) df.to_csv('data.csv') df.describe()
def testSearchCountry(self): wbdata.search_countries("United")
# Cache # use the cache # Returns # a WBDataFrame import pandas as pd import wbdata as wb import datetime # search for data sources in world bank data wb.get_source() wb.get_indicator(source=16) # do country search wb.search_countries('united') # do wild search wb.search_countries('niger*') # get data for country # SE.ADT.1524.LT.FM.ZS Literacy rate, youth (ages 15-24), gender parity index (GPI) # return a multi-dictionary(based on year) list wb.get_data("SE.ADT.1524.LT.FM.ZS", country="USA") # selecting data range date_range = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1) # SH.CON.1524.FE.ZS Condom use, population ages 15-24, female (% of females ages 15-24) # SH.CON.1524.MA.ZS Condom use, population ages 15-24, male (% of males ages 15-24) wb.get_data("SH.CON.1524.MA.ZS", country=["USA", "GBR", "NGA"],
# In[ ]: wbdata.get_source() # In[ ]: # 1 DOing Business wbdata.get_indicator(source=1) # In[ ]: wbdata.search_countries("Brazil") # In[ ]: #wbdata.get_data(indicator, country=u'all', data_date=None, convert_date=False, pandas=False, #column_name=u'value', keep_levels=False) wbdata.get_data("IC.BUS.EASE.XQ", country=u'BRA') # In[ ]: data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "BRA"), data_date=data_date)
import pandas as pd import wbdata import numpy import datetime import LinearRegression wbdata.get_source() wbdata.get_indicator(source=1) #get country codes with a search wbdata.search_countries('Poland') #get indicators with a search wbdata.search_indicators('population, total') wbdata.get_data('SP.POP.TOTL', country='NOR') #takes the value population total for Norway wbdata.search_indicators('GDP') wbdata.get_data('NY.GDP.MKTP.CD', country='NOR') #takes the GDP for Norway a = pd.DataFrame.from_dict(wbdata.get_data('SP.POP.TOTL', country='NOR')) a[['value','date']] b=pd.DataFrame.from_dict(wbdata.get_data('NY.GDP.MKTP.CD',country='NOR')) b[['value','date']] data_date = (datetime.datetime(1990, 1, 1), datetime.datetime(2018, 1, 1)) indicators = {"SP.POP.TOTL": "Total Population","NY.GDP.MKTP.CD": "GDP"} df = wbdata.get_dataframe(indicators,country=["NOR"],convert_date=True)
# the World Bank for a particular indicator, country and year import wbdata as wbd from datetime import datetime # Q3 def IQR(x): return x.quantile(0.75) - x.quantile(0.25) age_female_sur_quan = round( titanic.age[titanic.sex == 'female'][titanic.survived == 1].agg(IQR)) print('age_female_survived_quartile:', age_female_sur_quan) # Q4 country_code = wbd.search_countries('Indonesia')[0]['id'] print('country code:', country_code) # Q5 indicator = {'SH.STA.OWAD.ZS': 'prevalence of overweight'} per = round( wbd.get_dataframe(indicator, wbd.search_countries('Ireland')[0]['id'], datetime(2010, 1, 1)).iloc[0][0]) print('the prevalence of overweight adults:', per) # Q6 to Q8 relate to webscraping and uses the Spotify weekly charts. You will need # to import BeautifulSoup and the requests package from bs4 import BeautifulSoup import requests import re
# Save countries in a Dictionary and checking that use must give at least one Country countries = [] if (len(sys.argv) >= 2): for i in range(1, len(sys.argv)): countries.append(str(sys.argv[i])) else: print "Error you passed", len(sys.argv) - 1, 'countries' quit() # Save the details about the country in a Dictionary country_dict = {} for c in countries: # Get data about the countries using World bank Data API, if the country name is not in data set tell user that that perticuler country is not in dataset dat = wbdata.search_countries(c, incomelevel=False, lendingtype=False, display=False) if len(dat) == 0: raise SystemExit('The country ' + c + ' is not in dataset.') else: name = dat[0][u'name'] latitude = dat[0][u'latitude'] longitude = dat[0][u'longitude'] region = dat[0][u'region'][u'value'] capital = dat[0][u'capitalCity'] income_level = dat[0][u'incomeLevel'][u'value'] country_code = dat[0][u'iso2Code'] country_dict[name] = { "latitude": latitude, "longitude": longitude, "country_code": country_code,
s_yr = int(a) elif o == "-e": global e_yr e_yr = int(a) else: assert False, "unhandled option" except Usage, err: print >> sys.stderr, err.msg print >> sys.stderr, "for help use --help" return 2 # start timing the script time0 = datetime.datetime.utcnow() # get all entities in DB all_entities = wbdata.search_countries('', display=False) topics = wbdata.get_topic(display=False) indicators = wbdata.get_indicator(display=False) if None in (all_entities, topics, indicators): return 2 # declare some blank lists to hold parsed data cnames = [] anames = [] countries = [] aggregates = [] # separate countries from aggregates for c in all_entities: if not c['incomeLevel']['value'] == 'Aggregates': cnames.append(c['name'])
#!/usr/bin/env python # coding: utf-8 # In[1]: # Step 1. Import necessary libraries import wbdata import datetime import pandas as pd # Step 2. Retrieve abbreviations of South American countries south_american_countries=['Venezuela', 'Uruguay', 'Suriname', 'Peru', 'Paraguay', 'Guyana', 'Ecuador', 'Colombia', 'Chile', 'Brazil', 'Bolivia', 'Argentina'] south_american_country_codes=[] error_log=[] for country in south_american_countries: temp = wbdata.search_countries(country,display=False) if len(temp)==1: south_american_country_codes.append(temp[0]['id']) else: error_log.append(country+': Either I could not find the country or there are more than one country with similar names.') if (len(south_american_countries)==len(south_american_country_codes)): print('All of the countries parsed successfully.') else: print('There were some errors. Please run print(error_log) to see errors.') # Step 3. Specify indicators indicators = {'FP.CPI.TOTL.ZG':'3_inflation', 'SP.POP.0014.TO.ZS': '0_control_children_population_percentage', 'ccx_lf_pop_tot':'2_labor_force_participation_rate', 'ccx_wka_pop_tot':'1_working_age_population_percentage', 'per_nprog.overlap_pop_tot':'4_population_not_rec_social_protection',
plt.title("GNI Per Capita ($USD, Atlas Method)") plt.xlabel('Date') plt.ylabel('GNI Per Capita ($USD, Atlas Method') # In[ ]: wbdata.get_source() # In[ ]: # 1 DOing Business wbdata.get_indicator(source=1) # In[ ]: wbdata.search_countries("Brazil") # In[ ]: #wbdata.get_data(indicator, country=u'all', data_date=None, convert_date=False, pandas=False, #column_name=u'value', keep_levels=False) wbdata.get_data("IC.BUS.EASE.XQ", country=u'BRA') # In[ ]: data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "BRA"), data_date=data_date) # In[ ]: wbdata.search_indicators("gdp per capita")
### Visit the website: https://pypi.org/project/wbdata/ to install wbdata library import wbdata ### Data sources wbdata.get_source() ### Indicators in the source 1 wbdata.get_indicator(source=1) ### Search countries by their ISO3C code wbdata.search_countries("tun") ### Search for indicators wbdata.search_indicators('gdp per capita') ### Extract some data with given dates. ### import time library. import time from datetime import date date.today() ### Extracting data for a given indicator ## Date range data_date = (date(1960, 1, 1), date(2017, 1, 1)) data_date ### Extracting Data ### NY.GDP.PCAP.PP.KD.ZG GDP per capita, PPP annual growth (%) dd=wbdata.get_data(indicator='NY.GDP.PCAP.KD.ZG' , data_date=data_date,country='tun') dd ### importing panda package import pandas as pd ### Transforming dd as a data.frame dd=pd.DataFrame(dd) dd.head()
for idx, item in enumerate(ind['country']): if 'South Korea' in item: ind['country'][idx] = 'Korea, Rep.' country_list = list(ind['country']) new_country_set = sorted(set(country_list)) country_name = [] region = [] income = [] for country in new_country_set: hoop = wbdata.search_countries(country) for key, val in hoop[0].items(): if key == 'name': country_name.append(val) if key == 'region': region.append(val['value']) if key == 'incomeLevel': income.append(val['value']) data = pd.DataFrame() data['country'] = country_name data['region'] = region data['income'] = income data['country'][59] = 'South Africa' ### Don't forget about me!!! data['region'][59] = 'Sub-Saharan Africa '
def search_countries(self, search = 'United States'): return wb.search_countries(search)