def test_wdi_get_countries(self): result1 = get_countries() result2 = WorldBankReader().get_countries() session = requests.Session() result3 = get_countries(session=session) result4 = WorldBankReader(session=session).get_countries() for result in [result1, result2, result3, result4]: self.assertTrue('Zimbabwe' in list(result['name'])) self.assertTrue(len(result) > 100) self.assertTrue(pd.notnull(result.latitude.mean())) self.assertTrue(pd.notnull(result.longitude.mean()))
def test_wdi_get_countries(self): result1 = get_countries() result2 = WorldBankReader().get_countries() session = requests.Session() result3 = get_countries(session=session) result4 = WorldBankReader(session=session).get_countries() for result in [result1, result2, result3, result4]: assert 'Zimbabwe' in list(result['name']) assert len(result) > 100 assert pd.notnull(result.latitude.mean()) assert pd.notnull(result.longitude.mean())
def getGDP(codes): """ Download GDP data from World Bank. Returns data frame in a format: country, GDP, iso3c, region iso3c is a ISO-3 country code which can be used later for join """ gdpdat = wb.download( indicator='NY.GDP.PCAP.PP.CD', country=[s for s in list(codes) if s in wb.country_codes], errors="warn", start=2012, end=2012).reset_index() gdpdat.rename(columns={"NY.GDP.PCAP.PP.CD": "GDP"}, inplace=True) gdpdat.drop("year", axis=1, inplace=True) countries = wb.get_countries() gdpdat = pd.merge(gdpdat, countries[["iso3c", "name", "region"]], left_on="country", right_on="name", how="left") gdpdat.drop("name", axis=1, inplace=True) return gdpdat
def _load_dataset(start=2010, end=2020, extra_indicators={}): _log.info("Loading dataset") indicators = dict(WORLD_BANK_INDICATORS, **extra_indicators) all_data = [] for name, series in indicators.items(): data = wb.download(indicator=series, country='all', start=start, end=end) all_data.append(data.sort_index().groupby(level=0).last().rename( columns={series: name})) country_data = pd.concat(all_data, axis=1) country_data.index = country_data.index.set_names(['country']) # Add ISO mapping = wb.get_countries()[['name', 'iso3c']].rename(columns={'iso3c': 'ISO'}) country_data = (country_data.reset_index().merge(mapping, left_on='country', right_on='name', how='inner').drop( 'name', axis='columns')) return country_data
def download_population_all_countries(start_year, end_year): """Download population data of all countries from the World Bank, and return it as a long DataFrame. """ # Download the population data from the World Bank pop_data = ( wb.download( indicator='SP.POP.TOTL', country='all', start=start_year, end=end_year) .reset_index() .rename(columns={'SP.POP.TOTL': 'population', 'country': 'country_name'})) # Country codes and names country_data = wb.get_countries() # Remove regions by selecting countries with a nonempty capital city country_data = country_data[country_data.capitalCity != ''] # Merge population data with country data to map country names to # country codes pop_data = (pd.merge(pop_data, country_data, how='inner', left_on='country_name', right_on='name') .reset_index(drop=True)) pop_data['year_start'] = pd.to_datetime(pop_data.year, format="%Y") return pop_data
def download_gdp_per_capita_data(start_year, end_year, countries='all'): gdp_data = wb.download( indicator=['NY.GDP.PCAP.KD', 'NY.GDP.PCAP.PP.CD', 'NY.GDP.PCAP.PP.KD'], country=countries, start=start_year, end=end_year) rename_cols = { 'NY.GDP.PCAP.KD': 'gdp_per_capita_constant2010USD', 'NY.GDP.PCAP.PP.CD': 'gdp_per_capita_PPP_current_international_dollar', 'NY.GDP.PCAP.PP.KD': ('gdp_per_capita_PPP_constant_2011_international_dollar') } gdp_data = pd.merge(gdp_data.reset_index().rename(columns=rename_cols), wb.get_countries(), left_on='country', right_on='name', how='inner') return gdp_data
def to_flourish(indicator, start_yr, end_yr, country='all', save_csv=True): """ Downloads data from the World Bank and converts it to the format for making bar chart races in Flourish. Parameters: * indicator: the world bank code available on the World Bank Page. * country: a string if single or a list if multiple of the ISO3 codes of the locations. * start_yr: the first year of data that you want to get. * end_yr: the final year of data that you want to collect. * save_csv: saves the file as a csv in your working directory. """ df = wb.download(indicator=indicator, country=country, start=start_yr, end=end_yr) df = df.reset_index() df = pd.pivot_table(df, values=indicator, columns='year', index='country').reset_index() print("Processed the Indicator Data") country_info = wb.get_countries() country_info = country_info[country_info.region != 'Aggregates'] df_merged = pd.merge(country_info[['iso2c', 'name', 'region']], df, left_on='name', right_on='country') df_merged.insert( 3, 'Image URL', df_merged['iso2c'].apply( lambda i: f"https://www.countryflags.io/{i}/flat/64.png")) df_merged = df_merged.drop(columns=['iso2c', 'country']) if save_csv: df_merged.to_csv( f"flourish_data/flourish_{indicator}_{datetime.now().strftime('%d-%m-%Y %H-%M')}.csv", index=False) return df_merged
def test_wdi_get_countries(self): result = get_countries() self.assertTrue('Zimbabwe' in list(result['name'])) self.assertTrue(len(result) > 100)
import plotly.express as px import dash_bootstrap_components as dbc import pandas as pd from pandas_datareader import wb app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) indicators = { "IT.NET.USER.ZS": "Individuals using the Internet (% of population)", "SG.GEN.PARL.ZS": "Proportion of seats held by women in national parliaments (%)", "EN.ATM.CO2E.KT": "CO2 emissions (kt)", } # get country name and ISO id for mapping on choropleth countries = wb.get_countries() countries["capitalCity"].replace({"": None}, inplace=True) countries.dropna(subset=["capitalCity"], inplace=True) countries = countries[["name", "iso3c"]] countries = countries[countries["name"] != "Kosovo"] countries = countries.rename(columns={"name": "country"}) def update_wb_data(): # Retrieve specific world bank data from API df = wb.download(indicator=(list(indicators)), country=countries["iso3c"], start=2005, end=2016) df = df.reset_index() df.year = df.year.astype(int)
# file does not exist yet pass if not startrow: startrow = 0 df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs) writer.save() myKey = #input your Scopus Key headers = {'accept':'application/json', 'x-els-apikey':myKey} url = 'http://api.elsevier.com/content/author/author_id/' dataFile = #input path to file you want to write data to #get country data from pandas_datareader (World Bank data) country_data = wb.get_countries().fillna('none') writer = pd.ExcelWriter('country_data.xlsx') country_data.to_excel(writer, 'Country Data') writer.save() #get affiliation data from Scopus, assumes you have a list of eids in a file with an 'eid' column header df = pd.read_excel('scopus_author_data.xlsx', encoding = 'ISO-8859-1') for row in df.itertuples(): eid = str(row.eid) resp = requests.get(url + eid, headers = headers) auDat = resp.json() columns = ['EID', 'First Name', 'Last Name', 'Initials', 'Whole Name', 'Current Affiliation', 'Affiliation History'] if resp.status_code == 200: try: fName = auDat['author-retrieval-response'][0]['author-profile']['preferred-name']['given-name']
def get_countries(ret): db_c = pddr.get_countries()[['iso2c', 'name']].rename(columns={ 'iso2c': 'id', 'name': 'Country' }).set_index('id') return db_c
def test_wdi_get_countries(self): result = get_countries() self.assertTrue('Zimbabwe' in list(result['name'])) self.assertTrue(len(result) > 100) self.assertTrue(pandas.notnull(result.latitude.mean())) self.assertTrue(pandas.notnull(result.longitude.mean()))
def get_wb_indicators(country_codes=None, indicator_codes=None, start_year=2007, end_year=2017): """ Gets the World Bank indicators Args: country_codes ([str]): List of ISO3 country codes indicator_codes ([str]): List of World Bank Indicator Codes Returns: pd.DataFrame: DataFrame with the columns -ISO3 -Country -Year -Indicator ID -Indicator name -Indicator category -Unit (e.g. €, no. of people, percentage, etc.) -Value -Source (World Bank Data or IDMC) """ # Download data from the World Bank wb_data = wb.download(indicator=indicator_codes, country=country_codes, start=start_year, end=end_year) wb_data_reset = wb_data.reset_index() # Extract countries information wb_countries = wb.get_countries() wb_countries = wb_countries[["iso3c", "name"]] # Merge the aforementioned dataframes and remove duplicates merge = pd.merge(wb_data_reset, wb_countries, left_on='country', right_on="name") merge = merge.drop(columns=["name"]) # Narrow the resulting dataframe merge_narrow = merge.melt(id_vars=['country', 'year', 'iso3c'], var_name='indicatorID', value_name='value') # Extract indicators information wb_indicators = wb.get_indicators() # Merge the narrowed dataframe with the World Bank indicators merge2 = pd.merge(merge_narrow, wb_indicators, left_on='indicatorID', right_on="id") # Remove duplicates, clean, and organize the data merge2 = merge2.drop(columns=["id", "source", "sourceNote"]) merge2 = merge2.rename(index=str, columns={ "name": "indicatorName", "topics": "indicatorCategory", "sourceOrganization": "source" }) # Organize dataframe resulting_df = merge2[[ 'iso3c', 'country', 'year', 'indicatorID', 'indicatorName', 'indicatorCategory', 'unit', 'value', 'source' ]] return resulting_df
def get_wb_countries(): countries = (wb.get_countries().set_index("name")) #remove country groupings idx = countries.lendingType != "Aggregates" return countries[idx]