def get_primary_energy_consumption() -> Tuple[pd.DataFrame, List[dict]]: print("retrieving primary energy consumption data...") variables = [143360, 143363, 143364] dataframes = [] codebook = [] for var_id in variables: df, meta = get_owid_variable(var_id, to_frame=True) assert re.search( r"energy", meta["name"], re.I), "'energy' does not appear in energy consumption variable" df = df.rename(columns={ "entity": "Country", "year": "Year", "value": meta["name"], }).drop(columns=["variable"]) dataframes.append(df) codebook.append({ "name": meta["name"], "description": meta["description"], "source": meta["source"]["name"], }) df = reduce( lambda left, right: pd.merge( left, right, on=["Country", "Year"], how="outer", validate="1:1"), dataframes, ) return df, codebook
def get_n2o_emissions() -> Tuple[pd.DataFrame, List[dict]]: print("retrieving N2O emissions data...") variables = [142812, 142848] dataframes = [] codebook = [] for var_id in variables: df, meta = get_owid_variable(var_id, to_frame=True) # appends ' (n2o)' to variable name if it does not exist, because # variable name for 142848 ("Total including LUCF (per capita)") does # not already contain N2O (for disambiguation from other gases). if not re.search(r"n2o", meta["name"], re.I): meta["name"] += " (N2O)" assert re.search( r"n2o", meta["name"], re.I), "'n2o' does not appear in n2o emissions variable" # fix: if conversionFactor==1e6, then the variable is actually stored in # million tonnes. Updates the description accordingly description = meta["description"] if (meta["display"]["unit"] == "tonnes CO₂e" and meta["display"]["conversionFactor"] == 1e6): if not re.search("million tonnes", description): new_description = re.sub("tonnes", "million tonnes", description) assert ( new_description != description and "million tonnes" in new_description ), 'Expected "million tonnes" to be present in modified description.' description = new_description df = df.rename(columns={ "entity": "Country", "year": "Year", "value": meta["name"], }).drop(columns=["variable"]) dataframes.append(df) codebook.append({ "name": meta["name"], "description": description, "source": meta["source"]["name"], }) df = reduce( lambda left, right: pd.merge( left, right, on=["Country", "Year"], how="outer", validate="1:1"), dataframes, ) # fix: replaces "European Union (27)" with "EU-27" for consistency with CO2 # emissions dataset df["Country"].replace("European Union (27)", "EU-27", inplace=True) return df, codebook
def get_gdp() -> Tuple[pd.DataFrame, List[dict]]: print("retrieving gdp data...") df, meta = get_owid_variable(146201, to_frame=True) df = df.rename(columns={ "entity": "Country", "year": "Year", "value": meta["name"] }).drop(columns=["variable"]) codebook = [{ "name": meta["name"], "description": meta["description"], "source": meta["source"]["name"], }] return df, codebook
def get_ch4_emissions() -> Tuple[pd.DataFrame, List[dict]]: print("retrieving CH4 emissions data...") variables = [142803, 142841] dataframes = [] codebook = [] for var_id in variables: df, meta = get_owid_variable(var_id, to_frame=True) assert re.search( r"ch4", meta["name"], re.I), "'ch4' does not appear in ch4 emissions variable" # fix: if conversionFactor==1e6, then the variable is actually stored in # million tonnes. Updates the description accordingly description = meta["description"] if (meta["display"]["unit"] == "tonnes CO₂e" and meta["display"]["conversionFactor"] == 1e6): if not re.search("million tonnes", description): new_description = re.sub("tonnes", "million tonnes", description) assert ( new_description != description and "million tonnes" in new_description ), 'Expected "million tonnes" to be present in modified description.' description = new_description df = df.rename(columns={ "entity": "Country", "year": "Year", "value": meta["name"], }).drop(columns=["variable"]) dataframes.append(df) codebook.append({ "name": meta["name"], "description": description, "source": meta["source"]["name"], }) df = reduce( lambda left, right: pd.merge( left, right, on=["Country", "Year"], how="outer", validate="1:1"), dataframes, ) # fix: replaces "European Union (27)" with "EU-27" for consistency with CO2 # emissions dataset df["Country"].replace("European Union (27)", "EU-27", inplace=True) return df, codebook
def get_co2_emissions() -> Tuple[pd.DataFrame, List[dict]]: print("retrieving CO2 emissions data...") variables = [ 179841, 179842, 179845, 179847, 179848, 179851, 179852, 179855, 179856, 179859, 179860, 179863, 179864, 179867, 179868, 179871, 179872, 179873, 179874, 179875, 179876, 179877, 179878, 179880, 179882, 179884, 179886, 179888, 179890, 179892, 179893, 179894, 179895, 179896, 179897, 179898, 179899, 179900, 179901, 179902, 179903, 179904, 179905, 179906, ] dataframes = [] codebook = [] for var_id in tqdm(variables): df, meta = get_owid_variable(var_id, to_frame=True) assert re.search( r"co2", meta["name"], re.I), "'co2' does not appear in co2 emissions variable" # converts tonnes to million tonnes for CO2 emissions variables with # unit=="tonnes". description = meta["description"] assert meta["unit"] in [ "tonnes", "tonnes per capita", "%", "kilograms per $PPP", "kilograms per kilowatt-hour", ], (f"Encountered an unexpected unit value for variable {var_id}: " f'"{meta["unit"]}". get_co2_emissions() may not work as ' "expected.") if meta["unit"] == "tonnes": assert "conversionFactor" not in meta["display"], ( f"variable {var_id} has a non-null conversion factor " f"({meta['display']['conversionFactor']}). Variable may not " "actually be stored in tonnes.") df["value"] /= 1e6 # convert tonnes to million tonnes new_description = re.sub("tonnes", "million tonnes", description) assert ( new_description != description and "million tonnes" in new_description ), 'Expected "million tonnes" to be present in modified description.' description = new_description df = df.rename(columns={ "entity": "Country", "year": "Year", "value": meta["name"], }).drop(columns=["variable"]) dataframes.append(df) codebook.append({ "name": meta["name"], "description": description, "source": meta["source"]["name"], }) df = reduce( lambda left, right: pd.merge( left, right, on=["Country", "Year"], how="outer", validate="1:1"), dataframes, ) return df, codebook