def makeJsonPieAgroecosystems(year):
    df = eurostat.get_data_df('aei_ps_inp', flags=False)
    df = df.query('unit == "PC_AREA"')
    df.rename(columns={r'geo\time': 'geo'}, inplace=True)

    data = []

    for i in df.index:
        if df['geo'][i] == 'EU27_2020':
            data.append([df['geo'][i], year, df[year][i], df['indic_ag'][i]])

    df = pd.DataFrame(data, columns = ['geo_code', 'time', 'value', 'type'])

    df.to_csv('../output_data/headline_indicators/agroecosystems/agroecosystems.csv', index = False)

    data = {}
    data['data'] = []
    for i in df.index:
        data_part = {}
        data_part['name'] = getType(df['type'][i])
        data_part['y'] = df['value'][i]
        data['data'].append(data_part)

    with open('../output_data/headline_indicators/agroecosystems/agroecosystems.json', 'w', encoding = 'utf-8') as f:
        json.dump(data, f, ensure_ascii = False, indent = 4)
Esempio n. 2
0
def fetch_health_exp(indicators):
	#Working rewrite of health care stats

	for code in indicators.keys():
		full_df = eurostat.get_data_df(code, flags=False)
		geo_entities = full_df['geo\\time'].unique()[:-7]
		units = indicators[code]['units']
		providers = indicators[code]['providers']
		indicator = indicators[code]['indicator']

		indicator_df = full_df.loc[(full_df['unit'].isin(units)) &
								  (full_df[indicator].isin(providers)) &
								  (full_df['geo\\time'].isin(geo_entities))].copy()

		year = indicators[code]['year']

		final_df = fetch_eu_states()['ISO3166_a2']
		for unit in units:
			indicator_name = f"EUS_{indicators[code]['name']}_{str.lower(unit)}_{year}"
			subset_df = indicator_df.loc[indicator_df['unit']==unit, ['geo\\time', year]]
			subset_df.columns = ['ISO3166_a2', indicator_name]
			# Fix Greece and the UK
			subset_df.loc[subset_df['ISO3166_a2'] == 'EL', 'ISO3166_a2'] = 'GR'
			subset_df.loc[subset_df['ISO3166_a2'] == 'UK', 'ISO3166_a2'] = 'GB'
			final_df = pd.merge(final_df, subset_df, how='left', on='ISO3166_a2')

	#final_df.drop('ISO3166_a2', axis=1, inplace=True)
	return final_df
Esempio n. 3
0
def fetch_tax():
    code = "sdg_17_50" #Share of environmental taxes in total tax revenues
    df = eurostat.get_data_df(code, flags=False)
    df.rename(columns={ df.columns[0]: "country" }, inplace = True)
    df = df[df['country'].isin(list(countries))]
    df=df.melt(id_vars=["country"], 
            var_name="year", 
            value_name="env_tax")
    df.sort_values(by=['country','year'],inplace = True)
    return df.query('year > 1999').query('year<2018')
def load_percent_ict_of_gdp(db_connection: sqlite3.Connection) -> None:
    """
    Request the json-stat data from eurostat for the percentage of GDP that is created by the ICT sector.
    Load dataframe into a db table.
    """
    df = eurostat.get_data_df("tin00074")
    df = df[df["nace_r2"].isin(["ICT"])]
    # Ignore unnecessary data
    df.drop(columns="nace_r2", inplace=True)
    df.rename(columns={"time\geo": "country_label"}, inplace=True)
    df.to_sql("percent_ict_of_gdp",
              db_connection,
              if_exists="replace",
              index=False)
Esempio n. 5
0
def make_eurostat_table(code, toc_df, path=target_dir):
    '''

    This function extracts and saves a eurostat table with a readable name together with 
    a yaml with its data dictionary
    
    Args:
        code (str) is the code for the table
        toc_df (df) is a dataframe with a table of contents (we use it to create the schema)
        path (str) is the destination for storing data and schema
    
    '''
    try:
        table = eurostat.get_data_df(code)

        if 'time\geo' in table.columns:

            struct_name = 'time\geo'
            melt_var_name = 'geo'

            meta_cols = [
                x for x in table.columns
                if ((len(x) > 2) & (not any(name in x
                                            for name in ['EU', 'EA'])))
            ]

        else:
            struct_name = 'geo\\time'
            melt_var_name = 'time'

            meta_cols = [x for x in table.columns if type(x) != int]

        table_long = table.melt(id_vars=meta_cols,
                                var_name=melt_var_name,
                                value_name=code)

        #Create schema

        sch = make_schema(table_long, code, toc_df, struct_name, melt_var_name)

        #Save table and schema
        table_long.to_csv(f'{path}/{code}.csv', index=False)

        with open(f'{path}/{code}.yaml', 'w') as outfile:
            yaml.dump(sch, outfile)

    except:
        #A small number of eurostat tables don't work with this package.
        print('   API failure')
Esempio n. 6
0
def fetch_economic_indicator(code, name, year, unit=None, unit_name='unit'):
	full_df = eurostat.get_data_df(code, flags=False)
	geo_entities = full_df['geo\\time'].unique()[:-7]
	if unit is not None:
		full_df = full_df.loc[(full_df[unit_name] == unit) &
				(full_df['geo\\time'].isin(geo_entities))]
	else:
		full_df = full_df.loc[(full_df['geo\\time'].isin(geo_entities))]
	indicator_name = f"EUS_{name}_{year}"

	full_df = full_df[['geo\\time', year]]
	full_df.columns = ['ISO3166_a2', indicator_name]
	full_df.loc[full_df['ISO3166_a2'] == 'EL', 'ISO3166_a2'] = 'GR'
	full_df.loc[full_df['ISO3166_a2'] == 'UK', 'ISO3166_a2'] = 'GB'
	return full_df
Esempio n. 7
0
def fetch_ghg_pc():
    # Eurostat Code 
    # Codes can be found at Eurostat data  browser 
    # https://ec.europa.eu/eurostat/data/database
    code = "t2020_rd300" #SGreenhouse gas emissions per capita [T2020_RD300]
    df = eurostat.get_data_df(code, flags=False)
    df.rename(columns={ df.columns[0]: "country" }, inplace = True)
    #Only individual countries
    df2 = df[df['country'].isin(list(countries))]
    # Reshape to put Years from column headers (1st row) to a column
    df2=df2.melt(id_vars=["country"], 
            var_name="year",
            value_name="ghg_e_pc")
    # Sort by geo and year
    df2.sort_values(by=['country','year'],inplace = True)
    return df2
def load_percent_ent_using_cloud_computing(
        db_connection: sqlite3.Connection) -> None:
    """
    Request the json-stat data from eurostat for the percentage of cloud computing services used by enterprise companies.
    Load dataframe into a db table.
    """
    df = eurostat.get_data_df("isoc_cicce_use")
    # Filter the data to match the custom query
    df = df[df["unit"].isin(["PC_ENT"])
            & df["indic_is"].isin(["E_CC"])
            & df["sizen_r2"].isin(["10_C10_S951_XK"])]
    # Ignore unnecessary data
    df.drop(columns=["unit", "sizen_r2", "indic_is"], inplace=True)
    df.rename(columns={"time\geo": "country_label"}, inplace=True)
    df.to_sql("percent_ent_using_cc",
              db_connection,
              if_exists="replace",
              index=False)
def eurostat_requests(dict_codes, filepath):
    '''
    Function to request data from Eurostat using variable codes from a dictionary. Then writes the data to csv.
    '''
    write_log('--- FILE CHECKS')
    existing_files = list_directory(
        filepath)  # Check existing files in the output folder
    for value in dict_codes:  # For each variable in the dictionary (key) get the associated Eurostat code for the request
        df = eurostat.get_data_df(
            dict_codes[value]
        )  # Assign the dataframe returned from the request to the variable (key) of the dictionary
        # slice large datasets
        if value == 'Gross_value_added_and_income_by_industry':
            df = df.drop(df[(df["s_adj"] != "NSA") | (df["na_item"] != "B1G") |
                            (df["unit"] != "CLV_I05") |
                            (df["nace_r2"] != "TOTAL")].index)
        elif value == 'GDP_and_main_components':
            df = df.drop(df[(df["na_item"] != "B1GQ") |
                            (df["unit"] != "CLV_I05")].index)
        elif value == 'Harmonized_index_of_consumer_prices_monthly_data':
            df = df.drop(df[(df["s_adj"] != "NSA") | (df["indic"] != "CP-HI00")
                            | (df["unit"] != "HICP2015")].index)
        elif value == 'Employmentby_industry':
            df = df.drop(
                df[(df["s_adj"] != "SCA") | (df["na_item"] != "EMP_DC") |
                   (df["unit"] != "PCH_PRE_PER") |
                   (df["nace_r2"] != "TOTAL")].index)
        elif value == 'Unemployment_by_sex_and_age_monthly_data':
            df = df.drop(
                df[(df["s_adj"] != "SA") | (df["sex"] != "T") |
                   (df["unit"] != "PC_ACT") | (df["age"] != "TOTAL")].index)
        df.reset_index(drop=True, inplace=True)
        if value in existing_files:
            old_df = pd.read_csv(filepath / f"{value}.csv")
            if df.equals(old_df):
                write_log('same ' + value)
            else:
                write_log('diff ' + value)

        path_file = filepath / f"{value}.csv"  # Define the path to th file
        df.to_csv(path_file, index=False)  # Write the dataframe to csv
Esempio n. 10
0
def fetch_gdp():
    # Eurostat Code - always lower case
    # Codes can be found at Eurostat data  browser 
    # https://ec.europa.eu/eurostat/data/database
    code = "sdg_08_10" #
    df = eurostat.get_data_df(code, flags=False)
    df.rename(columns={ df.columns[2]: "country" }, inplace = True)
    #Only individual countries
    df2 = df[df['country'].isin(list(countries))]
    # Drop 2019 columm before melt
    del df2[2019]

    
    # Also with melting :
    # since this dataset contains two variables
    # I'll split it into two data sets

    df3=df2.melt(id_vars=["country", "unit"], 
            var_name="year",
            value_name="temp_value")
    df3.sort_values(by=['country','year'],inplace = True)
    return (df3[df3['unit']=='CLV10_EUR_HAB'], df3[df3['unit']=='CLV_PCH_PRE_HAB'])
def createFilesFromEurostat(eurostat_id, indicator_id, type_column_id, years, calculate_share):
    df = eurostat.get_data_df(eurostat_id, flags=False)
    df.rename(columns={r'geo\time': 'geo'}, inplace=True)
    df = df.loc[df['geo'].isin(geo_category_codes_list)]

    df = filterDataFramesEurostat(eurostat_id, indicator_id, years, df)

    df = reorganizeDataFrames(indicator_id, 'geo', None, None, type_column_id, years, df)

    if calculate_share == True:
        df['value_share_calculated'] = ''
        for i in df.index:
            geo_category_code = df['geo_code'][i]
            time = df['time'][i]
            denominator = df.query('geo_code == @geo_category_code & time == @time & type == "TOTAL"')['value'].values[0]
            df['value_share_calculated'][i] = df['value'][i]/denominator

    df = createCsvFiles(eurostat_id, indicator_id, df)

    if indicator_id == '2.1.d.1' and eurostat_id == 'sdg_15_60':
        return

    createVisualizationFiles(indicator_id, years, df)
Esempio n. 12
0
                return pd.Series({"Latitude": location.lat, "Longitude": location.lon})
            except:
                return pd.Series({"Latitude": np.nan, "Longitude": np.nan})

def get_NutsCode(row: pd.Series) -> pd.Series:
    try:
        result = nf.find(lat=row["Latitude"], lon=row["Longitude"])
        levels = [r['LEVL_CODE'] for r in result]
        result = result[levels.index(2)]
        return pd.Series({'NUTS_ID': result['NUTS_ID'], 'NUTS_NAME': result['NUTS_NAME']})
    except:
        return pd.Series({'NUTS_ID': np.nan, 'NUTS_NAME': np.nan})

locodes = pd.read_csv("D:\Programming\Python\SustainabilityReport\data\general\locode-list.csv")

ports = eurostat.get_data_df('mar_go_aa')
ports = pd.DataFrame(ports.loc[ports[r"rep_mar\time"].str.contains(r'([a-zA-Z]{5})$'), r"rep_mar\time"])
ports = ports.reset_index(drop=True)
ports["Country"] = ports[r"rep_mar\time"].map(lambda x: x[-5:-3])
ports["Location"] = ports[r"rep_mar\time"].map(lambda x: x[-3:])

ports = ports.merge(locodes, on=["Country", "Location"], how="left")
ports = ports.loc[:, [r"rep_mar\time", "Country", "Location", "Name", "Coordinates"]]

ports = ports.merge(ports.apply(getLatAndLon, axis=1), left_index=True, right_index=True)

ports = ports.merge(ports.apply(get_NutsCode, axis=1), left_index=True, right_index=True)

ports = ports.dropna()

ports.loc[:, [r"rep_mar\time", "NUTS_ID", "NUTS_NAME"]].to_excel("D:\Programming\Python\SustainabilityReport\data\general\PortsDict_NUTS2_2016.xlsx", index= False)
def data_Eurostat(url, container):
    ind = 0
    for i in url:
        if ind <= len(url):
            container.append([url[ind][0], eurostat.get_data_df(url[ind][1])])
        ind += 1
        df = df.pivot(index=[r'geo\time', 'year'],
                      columns=keep_column)['value'].reset_index()
    if option_replace_nan_with_zero == 1:
        df = df.fillna(0)
    elif option_replace_nan_with_zero == 2:
        df = df.apply(replace_nan_with_zero, axis=1, args=(1 / 2, ))
    elif option_replace_nan_with_zero == 3:
        df = df.apply(replace_nan_with_zero, axis=1, args=(1 / 100, ))
    if prefix:
        df = df.add_prefix(prefix)
    return df


## size
filter_landuse = ["TOTAL"]
df_size = eurostat.get_data_df('tgs00002')
columns_avail = list(set(time_frame).intersection(set(df_size.columns)))
columns_needed = list(set(time_frame).difference(set(df_size.columns)))
df_size = df_size.loc[df_size["landuse"].isin(filter_landuse)
                      & df_size[r"geo\time"].isin(nuts2_regions),
                      [r"geo\time"] + columns_avail]
df_size = df_size.merge(df_size.apply(create_missing_years_mean,
                                      axis=1,
                                      args=(columns_needed, )),
                        left_index=True,
                        right_index=True)
df_size = df_size.melt(id_vars=[r"geo\time"],
                       var_name="year",
                       value_name="size")
df_size["size"] = df_size["size"] / 1000
Esempio n. 15
0
Created on Fri Dec  4 12:01:54 2020

@author: joana
"""

#!pip install eurostat
import eurostat
import pandas as pd

toc = eurostat.get_toc()
df = eurostat.get_toc_df()

crime = eurostat.subset_toc_df(df, "crime")
#code: [ilc_mddw03]

crime = eurostat.get_data_df('ilc_mddw03', flags=False)
#'PC': 'Percentage'
#'hhtyp'(TYPE OF HOUSEHOLD): TOTAL
#'incgrp'(INCOME SITUATION) : TOTAL

pd.set_option("display.max_rows", 2193)

#Filtrar por TOTAL em TYPE OF HOUSEHOLD e INCOME SITUATION
crimedf = crime[(crime['hhtyp'] == 'TOTAL') & (crime['incgrp'] == 'TOTAL')]

crimedf = crimedf.drop(['hhtyp', 'incgrp', 'unit'], axis=1)
#crimedf = crimedf.reset_index(drop=True)

#Criar coluna country com os dados da coluna geo/time
crimedf['Country'] = crimedf.iloc[:, 0]
Esempio n. 16
0
data_dates=(datetime.datetime(1960, 1, 1), datetime.datetime(2017, 1, 1))
#Create dataframe
df = wbdata.get_dataframe(indicators, 
                            #country=country_tuple, 
                            data_date=data_dates, 
                            convert_date=False, keep_levels=True)

df.Population = df.Population.astype(int)
df.index.names = ['country','year']

#conda install -c konstantinstadler country_converter

#df.to_csv('data/wb.csv',convert_dates='ty')

code = "sdg_17_50" #Share of environmental taxes in total tax revenues
tdf = eurostat.get_data_df(code, flags=False)
tdf = tdf.rename(columns={ tdf.columns[0]: "country_code" })
tdf= tdf[tdf['country_code'].isin(list(euro_country_tuple))]
del tdf[2018]
iso2_codes =tdf['country_code'].tolist()
# Change back to standard - scary!!
iso2_codes[15]='GB'
del tdf['country_code']
tdf['country']=coco.convert(names=iso2_codes, to='name_short', not_found=None)


tax=tdf.melt(id_vars=["country"], 
            var_name="year", 
            value_name="env_tax")
tax['year'] = pd.to_numeric(tax['year'], errors='coerce')
tax.sort_values(by=['country','year'],inplace = True)
Esempio n. 17
0
df_JH_confirmed.head()

# Data from UN world population 
url_WPP_tot_pop     = 'https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_TotalPopulationBySex.csv'
url_WPP_pop_age_sex = 'https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_PopulationByAgeSex_Medium.csv'
url_WPP_feritlity   = 'https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_Fertility_by_Age.csv'
df_WPP_tot_pop      = pd.read_csv(url_WPP_tot_pop,index_col=0,parse_dates=[0])
df_WPP_pop_age_sex  = pd.read_csv(url_WPP_pop_age_sex,index_col=0,parse_dates=[0])
df_WPP_feritlity    = pd.read_csv(url_WPP_feritlity,index_col=0,parse_dates=[0])

df_WPP_tot_pop.head()


# Data from Eurostat
data_EU_pop        = eurostat.get_data_df('demo_r_gind3')   # Population
data_EU_gdp        = eurostat.get_data_df('nama_10_gdp')    # GDP
data_EU_cons       = eurostat.get_data_df('nama_10_fcs')    # Consumption
data_EU_trade      = eurostat.get_data_df('nama_10_exi')    # Import export
data_EU_short_rate = eurostat.get_data_df('irt_st_a')       # Short term int rates on govt bonds
data_EU_long_rate  = eurostat.get_data_df('irt_lt_gby10_a') # 10 y int rates on govt bonds
data_EU_unemp      = eurostat.get_data_df('une_rt_a')       # Unemployment by sex and age
data_EU_inv        = eurostat.get_data_df('nama_10_an6')    # Gross capital formation (gross investment)

data_EU_gdp.head()

# Save all data
print(os.getcwd())        # Current directory
print(os.chdir(data_dir)) # New directory

df_JH_confirmed    = df_JH_confirmed.to_csv("df_JH_confirmed.csv", index=False)
Esempio n. 18
0
#
#EUROSTAT import
#

# https://ec.europa.eu/eurostat/cache/metadata/en/demomwk_esms.htm
# https://ec.europa.eu/eurostat/web/products-datasets/-/demo_r_mweek3

import eurostat

#Import the data from Eurostat
eurost_df = eurostat.get_data_df('demo_r_mweek3', flags=False)
#Rename column 'geo\time' to 'geo'
eurost_df.rename(columns={r'geo\time': 'geo'}, inplace=True)
#Select for totals sex and age groups
trimmed_df = eurost_df[(eurost_df['sex'] == "T")
                       & (eurost_df['age'] == "TOTAL")]
#Trim to the country totals data
ccodes = [
    'AT',
    'BE',
    'BG',
    'CH',
    'CZ',
    'DE',
    'DK',
    'EE',
    'ES',
    'FI',
    'FR',
    'HU',
    'IS',