Esempio n. 1
0
    def getWBDataFromWeb(self, pStockCode, pStart, pEnd):
        #https://wbdata.readthedocs.io/en/latest/
        wbdata.get_source()
        wbdata.get_indicator(source=1)
        wbdata.search_countries("united")
        date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
        self.data = wbdata.get_data("IC.BUS.EASE.XQ",
                                    country=("USA", "GBR"),
                                    data_date=date)
        for row in self.data:
            print(row['country']['id'], row)
            #indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"}
            #df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)
            #df.describe()

        return self.data
Esempio n. 2
0
def retrieve_country_information_from_wb_api():
    """
    Retrieves country information, such as income levels, from World Bank API.

    >>> [x['incomeLevel'] for x in retrieve_country_information_from_wb_api() if x.get('id') =='ABW']
    [{'id': 'HIC', 'value': 'High income'}]
    """
    return wbdata.search_countries("", display=False)
Esempio n. 3
0
def get_countries_as_iso_codes():
    """
    Returns a list of countries as ISO codes
    """
    iso_codes = {}
    for country in get_european_countries():
        iso_code = wbdata.search_countries(country, display=False)
        if len(iso_code) == 1:
            iso_codes[country] = iso_code[0]['id']
    return iso_codes.values()
Esempio n. 4
0
#ease of business

import wbdata
wbdata.get_source()
wbdata.get_indicator(source=1)
#get country codes with a search
wbdata.search_countries('Turkey')  #TUR
wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='TUR')[0]
wbdata.search_countries('united')  #GBR
wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='GBR')

import datetime
data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
wbdata.get_data("IC.REG.COST.PC.MA.ZS",
                country=("USA", "GBR"),
                data_date=data_date)
wbdata.search_indicators("gdp per capita")
wbdata.get_data('NY.GDP.PCAP.KD.ZG')

wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='USA')
wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='OED')

#income level filter
wbdata.get_incomelevel()
countries = [
    i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False)
]
indicators = {
    "IC.REG.COST.PC.MA.ZS": "doing_business",
    "NY.GDP.PCAP.PP.KD": "gdppc"
}
import wbdata as wbd

# Example 1: Get total population from Ireland from 1960 to 2012
country1 = ['IE']  # Needs to be a proper country code in a list
indicator1 = {
    'SP.POP.TOTL': 'Total Population'
}  # Needs to be a pre-defined variable name in a dict
# Gives data in reverse order by default
data1 = wbd.get_dataframe(indicator1, country1).sort_index()
data1.head()
data1.plot()

# This is fine but what if you need to find different countries?
wbd.get_country()
# Too long a list, easier to search
wbd.search_countries('South')

# What if you want to get different indicators
#wbd.get_indicator() # Too slow
wbd.search_indicators('GDP')  # Too many!

# Perhaps instead look by source
wbd.get_source()
# or topic
wbd.get_topic()
# Now search
wbd.search_indicators('CO2', topic=19)

# What about getting multiple countries
country2 = ['IE', 'US', 'CN']  # Ireland, USA, China
indicator2 = {'EN.ATM.CO2E.KT': 'CO2 emissions (kt)'}
Esempio n. 6
0
def scrape():
    df = pd.read_csv(
        "../SecondProject2/Resources/Project2_idmc_disaster_all_dataset.csv")
    df.head()

    # In[4]:

    import pycountry_convert as pc

    # In[5]:

    country_code = pc.country_name_to_country_alpha2("China",
                                                     cn_name_format="default")
    print(country_code)
    continent_name = pc.country_alpha2_to_continent_code(country_code)
    print(continent_name)

    # In[6]:

    def country_code(name):
        try:
            code = pc.country_name_to_country_alpha2(name,
                                                     cn_name_format="default")
            return code
        except:
            return "N/A"

    def continent_name(name):
        try:
            continent = pc.country_alpha2_to_continent_code(name)
            return continent
        except:
            return "N/A"

    df["Country_2D"] = df["Country Name"].apply(country_code)
    df["Continent"] = df["Country_2D"].apply(continent_name)

    # In[7]:

    df.head()

    # In[8]:

    df_Africa = df[df["Continent"] == "AF"]
    df_Africa.head()

    # In[9]:

    df_africa = df_Africa.rename(columns={
        'Country Name': 'Country',
    })
    df_africa.head()

    # In[10]:

    df_africa.columns

    # In[11]:

    df.groupby("Continent").count()

    # In[12]:

    #Import Dependencies
    import os
    import requests
    import json
    import pprint
    import numpy as np
    import flask
    import wbdata
    import datetime

    # In[13]:

    countries = [
        'algeria', 'angola', 'benin', 'botswana', 'burkina faso', 'burundi',
        'cabo verde', 'cameroon', 'central african republic', 'chad',
        'comoros', 'congo', "cote d'ivoire", 'djibouti', 'egypt',
        'equatorial guinea', 'esqtini', 'ethiopia', 'gabon', 'gambia', 'ghana',
        'guinea', 'guinea-bissau', 'kenya', 'lesotho', 'liberia', 'libya',
        'madagascar', 'malawi', 'mali', 'mauritania', 'mauritius', 'morocco',
        'mozambique', 'namibia', 'niger', 'nigeria', 'rwanda',
        'sao tome and principe', 'senegal', 'seychelles', 'sirre leone',
        'somalia', 'south africa', 'south sudan', 'sudan', 'tanzania', 'togo',
        'tunisia', 'uganda', 'zambia', 'zimbabwe'
    ]
    country_codes = [
        'AGO', 'ALB', 'ARB', 'BDI', 'BEN', 'BFA', 'BMN', 'BSS', 'BWA', 'CAA',
        'CAF', 'CIV', 'CME', 'CMR', 'COG', 'COM', 'CPV', 'DJI', 'DMN', 'DSF',
        'DSS', 'DZA', 'EGY', 'ETH', 'GAB', 'GHA', 'GMB', 'GNB', 'GNQ', 'KEN',
        'LBR', 'LSO', 'MAR', 'MDG', 'MEA', 'MLI', 'MNA', 'MOZ', 'MRT', 'MUS',
        'MWI', 'NAF', 'NAM', 'NER', 'NGA', 'NLS', 'NRS', 'RRS', 'RSO', 'RWA',
        'SDN', 'SLE', 'SOM', 'SSA', 'SSD', 'SSF', 'SWZ', 'SXZ', 'SYC', 'TCD',
        'TGO', 'TMN', 'TSS', 'TUN', 'TZA', 'UGA', 'XZN', 'ZAF', 'ZMB', 'ZWE'
    ]
    indicators = "SP.POP.TOTL"
    data_date = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1)
    wbdata.get_indicator(source=50)

    # In[14]:

    wbdata.search_countries('')

    # In[15]:

    data = wbdata.get_data(indicators,
                           country=country_codes,
                           data_date=data_date)
    df_wbdata = pd.DataFrame(data)
    df_wbdata = df_wbdata.rename(
        columns={
            "indicator": "Indicator",
            "country": "Country",
            "countryiso3code": "Country code",
            "date": "Year",
            "value": "Population",
        })
    df_wbdata = df_wbdata.filter(
        items=['Country', 'Country code', 'Year', 'Population'])
    df_wbdata.dropna(inplace=True)
    df_wbdata['Country'] = df_wbdata['Country'].astype(str)
    df_wbdata['Country code'] = df_wbdata['Country code'].astype(str)
    df_wbdata['Year'] = df_wbdata['Year'].astype(str)
    df_wbdata['Population'] = df_wbdata['Population'].astype(str)
    df_wbdata['Country'] = df_wbdata['Country'].str.slice(23, -2)
    df_wbdata

    # In[16]:

    df_africa['Year'] = df_africa['Year'].astype('int64')
    df_wbdata['Year'] = df_wbdata['Year'].astype('int64')

    # In[17]:

    merged_df = pd.merge(left=df_africa,
                         right=df_wbdata,
                         how="left",
                         on=['Country code', 'Year', 'Country'])
    merged_df.head()

    # In[18]:

    merged_df = merged_df.rename(
        columns={
            'Country code': 'Country_Code',
            'Start Date': 'Start_Date',
            'Event Name': 'Event_Name',
            'Hazard Category': 'Hazard_Category',
            'Hazard Type': 'Hazard_Type',
            'New Displacements': 'New_Displacements',
        })
    merged_df.head()

    merged_df = merged_df.dropna()
    merged_df

    merged_df[merged_df['Population'].isna()].count()
    merged_df.to_csv('merged.csv', index=False)

    #Create the engine and pass in Postgresql
    engine = create_engine(
        'postgresql://*****:*****@localhost/project2_db')

    engine.table_names()

    query = pd.read_sql_query('select * from merged_data', con=engine)

    return (query)
Esempio n. 7
0
# -*- coding: utf-8 -*-
"""
Created on Sun Mar  8 02:01:46 2020

@author: udarici19
"""

import wbdata
import datetime

data_date = (datetime.datetime(1960, 1, 1), datetime.datetime(2019, 1, 1))
wbdata.search_countries('United States') #USA

wbdata.search_indicators('Labor force participation rate, female') #SL.TLF.CACT.FE.NE.ZS
wbdata.search_indicators('Fertility Rate') #SP.DYN.TFRT.IN
wbdata.search_indicators('GDP per capita') #NY.GDP.PCAP.CD

df = wbdata.get_dataframe({"NY.GDP.PCAP.CD":"GDPPC","SP.DYN.TFRT.IN":"FR","SL.TLF.CACT.FE.NE.ZS":"FLFP"},
                          country="USA", data_date=data_date)

df.to_csv('data.csv')
df.describe()
Esempio n. 8
0
 def testSearchCountry(self):
     wbdata.search_countries("United")
Esempio n. 9
0
# Cache
# use the cache

# Returns
# a WBDataFrame

import pandas as pd
import wbdata as wb
import datetime

# search for data sources in world bank data
wb.get_source()
wb.get_indicator(source=16)

# do country search
wb.search_countries('united')

# do wild search
wb.search_countries('niger*')

# get data for country
# SE.ADT.1524.LT.FM.ZS  Literacy rate, youth (ages 15-24), gender parity index (GPI)
# return a multi-dictionary(based on year) list
wb.get_data("SE.ADT.1524.LT.FM.ZS", country="USA")

# selecting data range
date_range = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1)
# SH.CON.1524.FE.ZS     Condom use, population ages 15-24, female (% of females ages 15-24)
# SH.CON.1524.MA.ZS     Condom use, population ages 15-24, male (% of males ages 15-24)
wb.get_data("SH.CON.1524.MA.ZS",
            country=["USA", "GBR", "NGA"],
Esempio n. 10
0

# In[ ]:

wbdata.get_source()


# In[ ]:

# 1 DOing Business
wbdata.get_indicator(source=1)


# In[ ]:

wbdata.search_countries("Brazil")


# In[ ]:

#wbdata.get_data(indicator, country=u'all', data_date=None, convert_date=False, pandas=False, 
#column_name=u'value', keep_levels=False)
wbdata.get_data("IC.BUS.EASE.XQ", country=u'BRA')


# In[ ]:

data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "BRA"), data_date=data_date)

Esempio n. 11
0
import pandas as pd
import wbdata
import numpy
import datetime
import LinearRegression
wbdata.get_source()
wbdata.get_indicator(source=1)
#get country codes with a search
wbdata.search_countries('Poland')
#get indicators with a search
wbdata.search_indicators('population, total')
wbdata.get_data('SP.POP.TOTL', country='NOR')                                   #takes the value population total for Norway
wbdata.search_indicators('GDP')
wbdata.get_data('NY.GDP.MKTP.CD', country='NOR')                                #takes the GDP for Norway

a = pd.DataFrame.from_dict(wbdata.get_data('SP.POP.TOTL', country='NOR'))
a[['value','date']]
b=pd.DataFrame.from_dict(wbdata.get_data('NY.GDP.MKTP.CD',country='NOR'))
b[['value','date']]

data_date = (datetime.datetime(1990, 1, 1), datetime.datetime(2018, 1, 1))
indicators = {"SP.POP.TOTL": "Total Population","NY.GDP.MKTP.CD": "GDP"}
df = wbdata.get_dataframe(indicators,country=["NOR"],convert_date=True)

# the World Bank for a particular indicator, country and year
import wbdata as wbd
from datetime import datetime


# Q3
def IQR(x):
    return x.quantile(0.75) - x.quantile(0.25)


age_female_sur_quan = round(
    titanic.age[titanic.sex == 'female'][titanic.survived == 1].agg(IQR))
print('age_female_survived_quartile:', age_female_sur_quan)

# Q4
country_code = wbd.search_countries('Indonesia')[0]['id']
print('country code:', country_code)

# Q5
indicator = {'SH.STA.OWAD.ZS': 'prevalence of overweight'}
per = round(
    wbd.get_dataframe(indicator,
                      wbd.search_countries('Ireland')[0]['id'],
                      datetime(2010, 1, 1)).iloc[0][0])
print('the prevalence of overweight adults:', per)

# Q6 to Q8 relate to webscraping and uses the Spotify weekly charts. You will need
# to import BeautifulSoup and the requests package
from bs4 import BeautifulSoup
import requests
import re
Esempio n. 13
0
# Save countries in a Dictionary and checking that use must give at least one Country
countries = []
if (len(sys.argv) >= 2):
    for i in range(1, len(sys.argv)):
        countries.append(str(sys.argv[i]))
else:
    print "Error you passed", len(sys.argv) - 1, 'countries'
    quit()

# Save the details about the country in a Dictionary
country_dict = {}
for c in countries:
    # Get data about the countries using World bank Data API, if the country name is not in data set tell user that that perticuler country is not in dataset
    dat = wbdata.search_countries(c,
                                  incomelevel=False,
                                  lendingtype=False,
                                  display=False)
    if len(dat) == 0:
        raise SystemExit('The country ' + c + ' is not in dataset.')
    else:
        name = dat[0][u'name']
        latitude = dat[0][u'latitude']
        longitude = dat[0][u'longitude']
        region = dat[0][u'region'][u'value']
        capital = dat[0][u'capitalCity']
        income_level = dat[0][u'incomeLevel'][u'value']
        country_code = dat[0][u'iso2Code']
        country_dict[name] = {
            "latitude": latitude,
            "longitude": longitude,
            "country_code": country_code,
Esempio n. 14
0
 def testSearchCountry(self):
     wbdata.search_countries("United")
Esempio n. 15
0
                s_yr = int(a)
            elif o == "-e":
                global e_yr
                e_yr = int(a)
            else:
                assert False, "unhandled option"
    except Usage, err:
        print >> sys.stderr, err.msg
        print >> sys.stderr, "for help use --help"
        return 2

    # start timing the script
    time0 = datetime.datetime.utcnow()

    # get all entities in DB
    all_entities = wbdata.search_countries('', display=False)
    topics = wbdata.get_topic(display=False)
    indicators = wbdata.get_indicator(display=False)
    if None in (all_entities, topics, indicators):
        return 2

    # declare some blank lists to hold parsed data
    cnames = []
    anames = []
    countries = []
    aggregates = []

    # separate countries from aggregates
    for c in all_entities:
        if not c['incomeLevel']['value'] == 'Aggregates':
            cnames.append(c['name'])
#!/usr/bin/env python
# coding: utf-8

# In[1]:

# Step 1. Import necessary libraries
import wbdata
import datetime
import pandas as pd

# Step 2. Retrieve abbreviations of South American countries
south_american_countries=['Venezuela', 'Uruguay', 'Suriname', 'Peru', 'Paraguay', 'Guyana', 'Ecuador', 'Colombia', 'Chile', 'Brazil', 'Bolivia', 'Argentina']
south_american_country_codes=[]
error_log=[]
for country in south_american_countries:
    temp = wbdata.search_countries(country,display=False)
    if len(temp)==1:
        south_american_country_codes.append(temp[0]['id'])
    else:
        error_log.append(country+': Either I could not find the country or there are more than one country with similar names.')
if (len(south_american_countries)==len(south_american_country_codes)):
    print('All of the countries parsed successfully.')
else:
    print('There were some errors. Please run print(error_log) to see errors.')       

# Step 3. Specify indicators
indicators = {'FP.CPI.TOTL.ZG':'3_inflation',
              'SP.POP.0014.TO.ZS': '0_control_children_population_percentage',
              'ccx_lf_pop_tot':'2_labor_force_participation_rate',
              'ccx_wka_pop_tot':'1_working_age_population_percentage',
              'per_nprog.overlap_pop_tot':'4_population_not_rec_social_protection',
Esempio n. 17
0
plt.title("GNI Per Capita ($USD, Atlas Method)")
plt.xlabel('Date')
plt.ylabel('GNI Per Capita ($USD, Atlas Method')

# In[ ]:

wbdata.get_source()

# In[ ]:

# 1 DOing Business
wbdata.get_indicator(source=1)

# In[ ]:

wbdata.search_countries("Brazil")

# In[ ]:

#wbdata.get_data(indicator, country=u'all', data_date=None, convert_date=False, pandas=False,
#column_name=u'value', keep_levels=False)
wbdata.get_data("IC.BUS.EASE.XQ", country=u'BRA')

# In[ ]:

data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "BRA"), data_date=data_date)

# In[ ]:

wbdata.search_indicators("gdp per capita")
### Visit the website: https://pypi.org/project/wbdata/ to install wbdata library 
import wbdata
### Data sources 
wbdata.get_source()
### Indicators in the source 1
wbdata.get_indicator(source=1)
### Search countries by their ISO3C code
wbdata.search_countries("tun")
### Search for indicators
wbdata.search_indicators('gdp per capita')

### Extract some data with given dates. 
### import time library.
import time
from datetime import date
date.today()

### Extracting data for a given indicator
## Date range
data_date = (date(1960, 1, 1), date(2017, 1, 1))
data_date

### Extracting Data
### NY.GDP.PCAP.PP.KD.ZG        GDP per capita, PPP annual growth (%)
dd=wbdata.get_data(indicator='NY.GDP.PCAP.KD.ZG' , data_date=data_date,country='tun')
dd
### importing panda package
import pandas as pd
### Transforming dd as a data.frame 
dd=pd.DataFrame(dd)
dd.head()
Esempio n. 19
0
for idx, item in enumerate(ind['country']):
   if 'South Korea' in item:
       ind['country'][idx] = 'Korea, Rep.'


country_list = list(ind['country'])
new_country_set = sorted(set(country_list))



country_name = []
region = []
income = []

for country in new_country_set:
    hoop = wbdata.search_countries(country)
    for key, val in hoop[0].items():
        if key == 'name':
            country_name.append(val)
        if key == 'region':
            region.append(val['value'])
        if key == 'incomeLevel':
            income.append(val['value'])

data = pd.DataFrame()
data['country'] = country_name
data['region'] = region
data['income'] = income

data['country'][59] = 'South Africa'          ### Don't forget about me!!!
data['region'][59] = 'Sub-Saharan Africa '
Esempio n. 20
0
 def search_countries(self, search = 'United States'):
     return wb.search_countries(search)