Beispiel #1
0
    def getWBDataFromWeb(self, pStockCode, pStart, pEnd):
        #https://wbdata.readthedocs.io/en/latest/
        wbdata.get_source()
        wbdata.get_indicator(source=1)
        wbdata.search_countries("united")
        date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
        self.data = wbdata.get_data("IC.BUS.EASE.XQ",
                                    country=("USA", "GBR"),
                                    data_date=date)
        for row in self.data:
            print(row['country']['id'], row)
            #indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"}
            #df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)
            #df.describe()

        return self.data
Beispiel #2
0
 def testOneTopic(self):
     wbdata.get_source("3")
Beispiel #3
0
 def testGetAllTopics(self):
     wbdata.get_source()
Beispiel #4
0
 def testOneSource(self):
     wbdata.get_source(31)
Beispiel #5
0
 def testGetAllSources(self):
     wbdata.get_source()
Beispiel #6
0
def plotNANperindicator(data, background=False):
    # PLOT NUMBER OF NAN PER INDICATOR
    indicators = list(set(data.columns.levels[1]))
    # Checks how many nans per indicator
    how_many_nans = []
    for i in indicators:
        temp = data.xs(i, level=1, axis=1).isnull().astype(int).values.sum()
        how_many_nans.append(temp)
    # Sorts the indicators from most nan to less nan
    ind = np.argsort(how_many_nans)[::-1]

    if (background):
        #create labels2 dictt
        sources = wbdata.get_source(display=False)
        id_to_sourceName = dict(
            zip([k['id'] for k in sources], [k['name'] for k in sources]))

        all_indics = wbdata.get_indicator(display=False)

        indicator_to_id = dict(
            zip([k['id'] for k in all_indics],
                [k['source']['id'] for k in all_indics]))
        for i in (set(indicators) - set(indicator_to_id.keys())):
            indicator_to_id[i] = -1
        id_to_sourceName[-1] = 'NaN'
        # Gets labels, sorts them, puts them in (int) form
        range_indics = [indicators[k]
                        for k in ind]  # sorted indicator code (by most nans):
        range_labels_int = [indicator_to_id[key] for key in range_indics]
        range_labels = [
            id_to_sourceName[indicator_to_id[key]] for key in range_indics
        ]
        # Define cmap for coloring the labels
        cmap = plt.get_cmap('jet_r')
        color = cmap(np.linspace(0, 1.0, len(set(range_labels_int))))

    # Helper function to get arrays of consecutive values, to plot colors on xaxis, used below
    def consecutive(data, stepsize=1):
        return np.split(data, np.where(np.diff(data) != stepsize)[0] + 1)

    # Line plot of number of nan values per country, with background colored according to indicator label
    plt.figure(figsize=(10, 5))
    plt.plot(range(0, len(indicators)), [
        100 * (how_many_nans[k] / data.xs('SP.POP.TOTL', level=1, axis=1).size)
        for k in ind
    ])
    if (background):
        plt.title(
            'Number of NaN values per indicator, sorted (background = indicator label)',
            fontsize=15)
    else:
        plt.title('Number of NaN values per indicator, sorted', fontsize=15)
    plt.xlabel('Indicators', fontsize=15)
    plt.ylabel('# NaN values (% of total)', fontsize=15)
    plt.xticks([])

    if (background):
        # Prepare patches (to color the background according to the indicator label), and the legend
        legends = []
        a = 0
        for i in list(
                set(range_labels_int)):  # Let's say we have 4 labels : i=0:3
            index_country_label = [
                k for k in range(0, len(range_labels_int))
                if range_labels_int[k] == i
            ]  # we get the index of each indicator with label i
            index_country_label = consecutive(
                index_country_label
            )  # we get the consecutive indexes. For instance consecutive([1,2,3,5,7,8,9]) = [[1,3],[5],[7,9]]
            patch = mpatches.Patch(
                color=color[a], alpha=0.3
            )  # Colors the background of each indicator according to its label
            legends.append(patch)
            for j in range(0, len(index_country_label)):
                temp = len(index_country_label[j])
                plt.axvspan(index_country_label[j][0],
                            index_country_label[j][temp - 1],
                            color=color[a],
                            alpha=0.3,
                            lw=2.0)
                a += 1

                plt.legend(handles=legends,
                           loc='upper center',
                           bbox_to_anchor=(0.5, -0.06),
                           fancybox=True,
                           shadow=True,
                           ncol=int(
                               (1 / 5) * len(np.unique(range_labels_int))),
                           fontsize='small')

    plt.show()
Beispiel #7
0
 def testOneSource(self):
     wbdata.get_source(31)
Beispiel #8
0
def get_categories():
    categories = [(x['id'], x['name']) for x in wbdata.get_source(display=False)]
    return categories
Beispiel #9
0
# Keep_levels
# if True don’t reduce the number of index levels returned if only getting one date or country

# Cache
# use the cache

# Returns
# a WBDataFrame

import pandas as pd
import wbdata as wb
import datetime

# search for data sources in world bank data
wb.get_source()
wb.get_indicator(source=16)

# do country search
wb.search_countries('united')

# do wild search
wb.search_countries('niger*')

# get data for country
# SE.ADT.1524.LT.FM.ZS  Literacy rate, youth (ages 15-24), gender parity index (GPI)
# return a multi-dictionary(based on year) list
wb.get_data("SE.ADT.1524.LT.FM.ZS", country="USA")

# selecting data range
date_range = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1)
Beispiel #10
0
 def testGetAllSources(self):
     wbdata.get_source()
# Created to get information from World Bank API

import wbdata


# All available topics

available_info = wbdata.get_source()
for x in available_info:
    print(x['id'], x['name'])

# Information about G20 Financial Inclusion Indicators

inclusion_indicators = wbdata.get_indicator(source=33)
print(inclusion_indicators)

# Information 'The consumer price index reflects the
# change in prices for the average consumer of a constant basket of consumer
# goods. Data is in nominal terms and seasonally adjusted.' for USA for all
# years.

cpi = wbdata.get_data('CPTOTSAXN',  country='USA')
print(cpi)

Beispiel #12
0
import wbdata as wb

results = wb.get_source()
wb.get_indicator(source=1)
Beispiel #13
0
 def search_sources(self):
     wb.get_source()
import wbdata
from pprint import pprint

news = wbdata.get_source()

pprint(news)
Beispiel #15
0
 def testGetAllTopics(self):
     wbdata.get_source()
# Gives data in reverse order by default
data1 = wbd.get_dataframe(indicator1, country1).sort_index()
data1.head()
data1.plot()

# This is fine but what if you need to find different countries?
wbd.get_country()
# Too long a list, easier to search
wbd.search_countries('South')

# What if you want to get different indicators
#wbd.get_indicator() # Too slow
wbd.search_indicators('GDP')  # Too many!

# Perhaps instead look by source
wbd.get_source()
# or topic
wbd.get_topic()
# Now search
wbd.search_indicators('CO2', topic=19)

# What about getting multiple countries
country2 = ['IE', 'US', 'CN']  # Ireland, USA, China
indicator2 = {'EN.ATM.CO2E.KT': 'CO2 emissions (kt)'}

# Get the data
data2 = wbd.get_dataframe(indicator2, country2).sort_index()
# Need to unstack to get this into proper order
data2_u = data2.unstack(level=0)
data2_u.head()
data2_u.plot()
Beispiel #17
0
 def testOneTopic(self):
     wbdata.get_source("3")
Beispiel #18
0
#ease of business

import wbdata
wbdata.get_source()
wbdata.get_indicator(source=1)
#get country codes with a search
wbdata.search_countries('Turkey')  #TUR
wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='TUR')[0]
wbdata.search_countries('united')  #GBR
wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='GBR')

import datetime
data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
wbdata.get_data("IC.REG.COST.PC.MA.ZS",
                country=("USA", "GBR"),
                data_date=data_date)
wbdata.search_indicators("gdp per capita")
wbdata.get_data('NY.GDP.PCAP.KD.ZG')

wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='USA')
wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='OED')

#income level filter
wbdata.get_incomelevel()
countries = [
    i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False)
]
indicators = {
    "IC.REG.COST.PC.MA.ZS": "doing_business",
    "NY.GDP.PCAP.PP.KD": "gdppc"
}
Beispiel #19
0
import wbdata
import datetime as dt

indicatorSelection = {"DT.DOD.DLXF.CD": "ExternalDebtStock"}

# NY.GDP.DEFL.KD.ZG

locationSelection = ["US", "SSA", "SAS", "LAC", "MNA", "EAP"]

timeSelection = (dt.datetime(2009, 1, 1), dt.datetime(2018, 12, 31))

IDS = wbdata.get_source(source_id=6)

print(IDS)
#grab indicators above for countires above and load into data frame
df = wbdata.get_dataframe(indicators, country=countries, convert_date=False)

#df is "pivoted", pandas' unstack fucntion helps reshape it into something plottable
dfu = df.unstack(level=0)

# a simple matplotlib plot with legend, labels and a title
dfu.plot(); 
plt.legend(loc='best'); 
plt.title("GNI Per Capita ($USD, Atlas Method)"); 
plt.xlabel('Date'); plt.ylabel('GNI Per Capita ($USD, Atlas Method');


# In[ ]:

wbdata.get_source()


# In[ ]:

# 1 DOing Business
wbdata.get_indicator(source=1)


# In[ ]:

wbdata.search_countries("Brazil")


# In[ ]:
Beispiel #21
0
def print_wb_sources():
    wbdata.get_source()