コード例 #1
0
    def test_wdi_get_countries(self):
        result1 = get_countries()
        result2 = WorldBankReader().get_countries()

        session = requests.Session()
        result3 = get_countries(session=session)
        result4 = WorldBankReader(session=session).get_countries()

        for result in [result1, result2, result3, result4]:
            self.assertTrue('Zimbabwe' in list(result['name']))
            self.assertTrue(len(result) > 100)
            self.assertTrue(pd.notnull(result.latitude.mean()))
            self.assertTrue(pd.notnull(result.longitude.mean()))
コード例 #2
0
ファイル: test_wb.py プロジェクト: anshikka/Soda
    def test_wdi_get_countries(self):
        result1 = get_countries()
        result2 = WorldBankReader().get_countries()

        session = requests.Session()
        result3 = get_countries(session=session)
        result4 = WorldBankReader(session=session).get_countries()

        for result in [result1, result2, result3, result4]:
            assert 'Zimbabwe' in list(result['name'])
            assert len(result) > 100
            assert pd.notnull(result.latitude.mean())
            assert pd.notnull(result.longitude.mean())
コード例 #3
0
ファイル: utils.py プロジェクト: fagan2888/PISA2012
def getGDP(codes):
    """
    Download GDP data from World Bank.
    Returns data frame in a format: country, GDP, iso3c, region
    iso3c is a ISO-3 country code which can be used later for join
    """
    gdpdat = wb.download(
        indicator='NY.GDP.PCAP.PP.CD',
        country=[s for s in list(codes) if s in wb.country_codes],
        errors="warn",
        start=2012,
        end=2012).reset_index()

    gdpdat.rename(columns={"NY.GDP.PCAP.PP.CD": "GDP"}, inplace=True)
    gdpdat.drop("year", axis=1, inplace=True)

    countries = wb.get_countries()
    gdpdat = pd.merge(gdpdat,
                      countries[["iso3c", "name", "region"]],
                      left_on="country",
                      right_on="name",
                      how="left")
    gdpdat.drop("name", axis=1, inplace=True)

    return gdpdat
コード例 #4
0
def _load_dataset(start=2010, end=2020, extra_indicators={}):
    _log.info("Loading dataset")
    indicators = dict(WORLD_BANK_INDICATORS, **extra_indicators)

    all_data = []
    for name, series in indicators.items():
        data = wb.download(indicator=series,
                           country='all',
                           start=start,
                           end=end)
        all_data.append(data.sort_index().groupby(level=0).last().rename(
            columns={series: name}))

    country_data = pd.concat(all_data, axis=1)
    country_data.index = country_data.index.set_names(['country'])

    # Add ISO
    mapping = wb.get_countries()[['name',
                                  'iso3c']].rename(columns={'iso3c': 'ISO'})
    country_data = (country_data.reset_index().merge(mapping,
                                                     left_on='country',
                                                     right_on='name',
                                                     how='inner').drop(
                                                         'name',
                                                         axis='columns'))

    return country_data
コード例 #5
0
def download_population_all_countries(start_year, end_year):
    """Download population data of all countries from the World Bank, and
    return it as a long DataFrame.
    """
    # Download the population data from the World Bank
    pop_data = (
        wb.download(
            indicator='SP.POP.TOTL', country='all',
            start=start_year, end=end_year)
        .reset_index()
        .rename(columns={'SP.POP.TOTL': 'population',
                         'country': 'country_name'}))

    # Country codes and names
    country_data = wb.get_countries()

    # Remove regions by selecting countries with a nonempty capital city
    country_data = country_data[country_data.capitalCity != '']

    # Merge population data with country data to map country names to
    # country codes
    pop_data = (pd.merge(pop_data, country_data, how='inner',
                         left_on='country_name', right_on='name')
                .reset_index(drop=True))

    pop_data['year_start'] = pd.to_datetime(pop_data.year, format="%Y")

    return pop_data
def download_gdp_per_capita_data(start_year, end_year, countries='all'):
    gdp_data = wb.download(
        indicator=['NY.GDP.PCAP.KD', 'NY.GDP.PCAP.PP.CD', 'NY.GDP.PCAP.PP.KD'],
        country=countries,
        start=start_year,
        end=end_year)
    rename_cols = {
        'NY.GDP.PCAP.KD':
        'gdp_per_capita_constant2010USD',
        'NY.GDP.PCAP.PP.CD':
        'gdp_per_capita_PPP_current_international_dollar',
        'NY.GDP.PCAP.PP.KD':
        ('gdp_per_capita_PPP_constant_2011_international_dollar')
    }
    gdp_data = pd.merge(gdp_data.reset_index().rename(columns=rename_cols),
                        wb.get_countries(),
                        left_on='country',
                        right_on='name',
                        how='inner')
    return gdp_data
コード例 #7
0
def to_flourish(indicator, start_yr, end_yr, country='all', save_csv=True):
    """
    Downloads data from the World Bank and converts it to the format for making bar chart races in Flourish.
    Parameters:
    * indicator: the world bank code available on the World Bank Page.
    * country: a string if single or a list if multiple of the ISO3 codes of the locations.
    * start_yr: the first year of data that you want to get.
    * end_yr: the final year of data that you want to collect.
    * save_csv: saves the file as a csv in your working directory.
    """
    df = wb.download(indicator=indicator,
                     country=country,
                     start=start_yr,
                     end=end_yr)
    df = df.reset_index()
    df = pd.pivot_table(df, values=indicator, columns='year',
                        index='country').reset_index()
    print("Processed the Indicator Data")

    country_info = wb.get_countries()
    country_info = country_info[country_info.region != 'Aggregates']

    df_merged = pd.merge(country_info[['iso2c', 'name', 'region']],
                         df,
                         left_on='name',
                         right_on='country')
    df_merged.insert(
        3, 'Image URL', df_merged['iso2c'].apply(
            lambda i: f"https://www.countryflags.io/{i}/flat/64.png"))
    df_merged = df_merged.drop(columns=['iso2c', 'country'])

    if save_csv:
        df_merged.to_csv(
            f"flourish_data/flourish_{indicator}_{datetime.now().strftime('%d-%m-%Y %H-%M')}.csv",
            index=False)

    return df_merged
コード例 #8
0
ファイル: test_wb.py プロジェクト: hayd/pandas_datareader_old
 def test_wdi_get_countries(self):
     result = get_countries()
     self.assertTrue('Zimbabwe' in list(result['name']))
     self.assertTrue(len(result) > 100)
コード例 #9
0
 def test_wdi_get_countries(self):
     result = get_countries()
     self.assertTrue('Zimbabwe' in list(result['name']))
     self.assertTrue(len(result) > 100)
コード例 #10
0
import plotly.express as px
import dash_bootstrap_components as dbc
import pandas as pd
from pandas_datareader import wb

app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

indicators = {
    "IT.NET.USER.ZS": "Individuals using the Internet (% of population)",
    "SG.GEN.PARL.ZS":
    "Proportion of seats held by women in national parliaments (%)",
    "EN.ATM.CO2E.KT": "CO2 emissions (kt)",
}

# get country name and ISO id for mapping on choropleth
countries = wb.get_countries()
countries["capitalCity"].replace({"": None}, inplace=True)
countries.dropna(subset=["capitalCity"], inplace=True)
countries = countries[["name", "iso3c"]]
countries = countries[countries["name"] != "Kosovo"]
countries = countries.rename(columns={"name": "country"})


def update_wb_data():
    # Retrieve specific world bank data from API
    df = wb.download(indicator=(list(indicators)),
                     country=countries["iso3c"],
                     start=2005,
                     end=2016)
    df = df.reset_index()
    df.year = df.year.astype(int)
コード例 #11
0
        # file does not exist yet
		pass

	if not startrow:
		startrow = 0

	df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
	writer.save()

myKey = #input your Scopus Key
headers = {'accept':'application/json', 'x-els-apikey':myKey}
url = 'http://api.elsevier.com/content/author/author_id/'
dataFile = #input path to file you want to write data to

#get country data from pandas_datareader (World Bank data)
country_data = wb.get_countries().fillna('none')
writer = pd.ExcelWriter('country_data.xlsx')
country_data.to_excel(writer, 'Country Data')
writer.save()

#get affiliation data from Scopus, assumes you have a list of eids in a file with an 'eid' column header
df = pd.read_excel('scopus_author_data.xlsx', encoding = 'ISO-8859-1')

for row in df.itertuples():
	eid = str(row.eid)
	resp = requests.get(url + eid, headers = headers)
	auDat = resp.json()	
	columns = ['EID', 'First Name', 'Last Name', 'Initials', 'Whole Name', 'Current Affiliation', 'Affiliation History']
	if resp.status_code == 200:
		try:
			fName = auDat['author-retrieval-response'][0]['author-profile']['preferred-name']['given-name']
コード例 #12
0
ファイル: get_data.py プロジェクト: GeorgyGol/SolnDB
 def get_countries(ret):
     db_c = pddr.get_countries()[['iso2c', 'name']].rename(columns={
         'iso2c': 'id',
         'name': 'Country'
     }).set_index('id')
     return db_c
コード例 #13
0
 def test_wdi_get_countries(self):
     result = get_countries()
     self.assertTrue('Zimbabwe' in list(result['name']))
     self.assertTrue(len(result) > 100)
     self.assertTrue(pandas.notnull(result.latitude.mean()))
     self.assertTrue(pandas.notnull(result.longitude.mean()))
コード例 #14
0
def get_wb_indicators(country_codes=None,
                      indicator_codes=None,
                      start_year=2007,
                      end_year=2017):
    """
    Gets the World Bank indicators

    Args:
        country_codes ([str]): List of ISO3 country codes
        indicator_codes ([str]): List of World Bank Indicator Codes

    Returns:
        pd.DataFrame: DataFrame with the columns
          -ISO3
          -Country
          -Year
          -Indicator ID
          -Indicator name
          -Indicator category
          -Unit (e.g. €, no. of people, percentage, etc.)
          -Value
          -Source (World Bank Data or IDMC)
    """
    # Download data from the World Bank
    wb_data = wb.download(indicator=indicator_codes,
                          country=country_codes,
                          start=start_year,
                          end=end_year)
    wb_data_reset = wb_data.reset_index()

    # Extract countries information
    wb_countries = wb.get_countries()
    wb_countries = wb_countries[["iso3c", "name"]]

    # Merge the aforementioned dataframes and remove duplicates
    merge = pd.merge(wb_data_reset,
                     wb_countries,
                     left_on='country',
                     right_on="name")
    merge = merge.drop(columns=["name"])

    # Narrow the resulting dataframe
    merge_narrow = merge.melt(id_vars=['country', 'year', 'iso3c'],
                              var_name='indicatorID',
                              value_name='value')

    # Extract indicators information
    wb_indicators = wb.get_indicators()

    # Merge the narrowed dataframe with the World Bank indicators
    merge2 = pd.merge(merge_narrow,
                      wb_indicators,
                      left_on='indicatorID',
                      right_on="id")

    # Remove duplicates, clean, and organize the data
    merge2 = merge2.drop(columns=["id", "source", "sourceNote"])
    merge2 = merge2.rename(index=str,
                           columns={
                               "name": "indicatorName",
                               "topics": "indicatorCategory",
                               "sourceOrganization": "source"
                           })

    # Organize dataframe
    resulting_df = merge2[[
        'iso3c', 'country', 'year', 'indicatorID', 'indicatorName',
        'indicatorCategory', 'unit', 'value', 'source'
    ]]

    return resulting_df
コード例 #15
0
def get_wb_countries():
    countries = (wb.get_countries().set_index("name"))

    #remove country groupings
    idx = countries.lendingType != "Aggregates"
    return countries[idx]