def getWBDataFromWeb(self, pStockCode, pStart, pEnd): #https://wbdata.readthedocs.io/en/latest/ wbdata.get_source() wbdata.get_indicator(source=1) wbdata.search_countries("united") date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) self.data = wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "GBR"), data_date=date) for row in self.data: print(row['country']['id'], row) #indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"} #df = wbdata.get_dataframe(indicators, country=countries, convert_date=True) #df.describe() return self.data
def testOneTopic(self): wbdata.get_source("3")
def testGetAllTopics(self): wbdata.get_source()
def testOneSource(self): wbdata.get_source(31)
def testGetAllSources(self): wbdata.get_source()
def plotNANperindicator(data, background=False): # PLOT NUMBER OF NAN PER INDICATOR indicators = list(set(data.columns.levels[1])) # Checks how many nans per indicator how_many_nans = [] for i in indicators: temp = data.xs(i, level=1, axis=1).isnull().astype(int).values.sum() how_many_nans.append(temp) # Sorts the indicators from most nan to less nan ind = np.argsort(how_many_nans)[::-1] if (background): #create labels2 dictt sources = wbdata.get_source(display=False) id_to_sourceName = dict( zip([k['id'] for k in sources], [k['name'] for k in sources])) all_indics = wbdata.get_indicator(display=False) indicator_to_id = dict( zip([k['id'] for k in all_indics], [k['source']['id'] for k in all_indics])) for i in (set(indicators) - set(indicator_to_id.keys())): indicator_to_id[i] = -1 id_to_sourceName[-1] = 'NaN' # Gets labels, sorts them, puts them in (int) form range_indics = [indicators[k] for k in ind] # sorted indicator code (by most nans): range_labels_int = [indicator_to_id[key] for key in range_indics] range_labels = [ id_to_sourceName[indicator_to_id[key]] for key in range_indics ] # Define cmap for coloring the labels cmap = plt.get_cmap('jet_r') color = cmap(np.linspace(0, 1.0, len(set(range_labels_int)))) # Helper function to get arrays of consecutive values, to plot colors on xaxis, used below def consecutive(data, stepsize=1): return np.split(data, np.where(np.diff(data) != stepsize)[0] + 1) # Line plot of number of nan values per country, with background colored according to indicator label plt.figure(figsize=(10, 5)) plt.plot(range(0, len(indicators)), [ 100 * (how_many_nans[k] / data.xs('SP.POP.TOTL', level=1, axis=1).size) for k in ind ]) if (background): plt.title( 'Number of NaN values per indicator, sorted (background = indicator label)', fontsize=15) else: plt.title('Number of NaN values per indicator, sorted', fontsize=15) plt.xlabel('Indicators', fontsize=15) plt.ylabel('# NaN values (% of total)', fontsize=15) plt.xticks([]) if (background): # Prepare patches (to color the background according to the indicator label), and the legend legends = [] a = 0 for i in list( set(range_labels_int)): # Let's say we have 4 labels : i=0:3 index_country_label = [ k for k in range(0, len(range_labels_int)) if range_labels_int[k] == i ] # we get the index of each indicator with label i index_country_label = consecutive( index_country_label ) # we get the consecutive indexes. For instance consecutive([1,2,3,5,7,8,9]) = [[1,3],[5],[7,9]] patch = mpatches.Patch( color=color[a], alpha=0.3 ) # Colors the background of each indicator according to its label legends.append(patch) for j in range(0, len(index_country_label)): temp = len(index_country_label[j]) plt.axvspan(index_country_label[j][0], index_country_label[j][temp - 1], color=color[a], alpha=0.3, lw=2.0) a += 1 plt.legend(handles=legends, loc='upper center', bbox_to_anchor=(0.5, -0.06), fancybox=True, shadow=True, ncol=int( (1 / 5) * len(np.unique(range_labels_int))), fontsize='small') plt.show()
def get_categories(): categories = [(x['id'], x['name']) for x in wbdata.get_source(display=False)] return categories
# Keep_levels # if True don’t reduce the number of index levels returned if only getting one date or country # Cache # use the cache # Returns # a WBDataFrame import pandas as pd import wbdata as wb import datetime # search for data sources in world bank data wb.get_source() wb.get_indicator(source=16) # do country search wb.search_countries('united') # do wild search wb.search_countries('niger*') # get data for country # SE.ADT.1524.LT.FM.ZS Literacy rate, youth (ages 15-24), gender parity index (GPI) # return a multi-dictionary(based on year) list wb.get_data("SE.ADT.1524.LT.FM.ZS", country="USA") # selecting data range date_range = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1)
# Created to get information from World Bank API import wbdata # All available topics available_info = wbdata.get_source() for x in available_info: print(x['id'], x['name']) # Information about G20 Financial Inclusion Indicators inclusion_indicators = wbdata.get_indicator(source=33) print(inclusion_indicators) # Information 'The consumer price index reflects the # change in prices for the average consumer of a constant basket of consumer # goods. Data is in nominal terms and seasonally adjusted.' for USA for all # years. cpi = wbdata.get_data('CPTOTSAXN', country='USA') print(cpi)
import wbdata as wb results = wb.get_source() wb.get_indicator(source=1)
def search_sources(self): wb.get_source()
import wbdata from pprint import pprint news = wbdata.get_source() pprint(news)
# Gives data in reverse order by default data1 = wbd.get_dataframe(indicator1, country1).sort_index() data1.head() data1.plot() # This is fine but what if you need to find different countries? wbd.get_country() # Too long a list, easier to search wbd.search_countries('South') # What if you want to get different indicators #wbd.get_indicator() # Too slow wbd.search_indicators('GDP') # Too many! # Perhaps instead look by source wbd.get_source() # or topic wbd.get_topic() # Now search wbd.search_indicators('CO2', topic=19) # What about getting multiple countries country2 = ['IE', 'US', 'CN'] # Ireland, USA, China indicator2 = {'EN.ATM.CO2E.KT': 'CO2 emissions (kt)'} # Get the data data2 = wbd.get_dataframe(indicator2, country2).sort_index() # Need to unstack to get this into proper order data2_u = data2.unstack(level=0) data2_u.head() data2_u.plot()
#ease of business import wbdata wbdata.get_source() wbdata.get_indicator(source=1) #get country codes with a search wbdata.search_countries('Turkey') #TUR wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='TUR')[0] wbdata.search_countries('united') #GBR wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='GBR') import datetime data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) wbdata.get_data("IC.REG.COST.PC.MA.ZS", country=("USA", "GBR"), data_date=data_date) wbdata.search_indicators("gdp per capita") wbdata.get_data('NY.GDP.PCAP.KD.ZG') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='USA') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='OED') #income level filter wbdata.get_incomelevel() countries = [ i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False) ] indicators = { "IC.REG.COST.PC.MA.ZS": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc" }
import wbdata import datetime as dt indicatorSelection = {"DT.DOD.DLXF.CD": "ExternalDebtStock"} # NY.GDP.DEFL.KD.ZG locationSelection = ["US", "SSA", "SAS", "LAC", "MNA", "EAP"] timeSelection = (dt.datetime(2009, 1, 1), dt.datetime(2018, 12, 31)) IDS = wbdata.get_source(source_id=6) print(IDS)
#grab indicators above for countires above and load into data frame df = wbdata.get_dataframe(indicators, country=countries, convert_date=False) #df is "pivoted", pandas' unstack fucntion helps reshape it into something plottable dfu = df.unstack(level=0) # a simple matplotlib plot with legend, labels and a title dfu.plot(); plt.legend(loc='best'); plt.title("GNI Per Capita ($USD, Atlas Method)"); plt.xlabel('Date'); plt.ylabel('GNI Per Capita ($USD, Atlas Method'); # In[ ]: wbdata.get_source() # In[ ]: # 1 DOing Business wbdata.get_indicator(source=1) # In[ ]: wbdata.search_countries("Brazil") # In[ ]:
def print_wb_sources(): wbdata.get_source()