def test_convert_magicc7_to_magicc6_variables(magicc7, magicc6): assert convert_magicc6_to_magicc7_variables(magicc7, inverse=True) == magicc6 assert (convert_magicc6_to_magicc7_variables(magicc7.upper(), inverse=True) == magicc6) assert (convert_magicc6_to_magicc7_variables(magicc7.lower(), inverse=True) == magicc6)
def _get_column_headers_and_update_metadata(self, stream, metadata): units = self._read_data_header_line(stream, "UNITS:") variables = convert_magicc7_to_openscm_variables( convert_magicc6_to_magicc7_variables( self._read_data_header_line(stream, "YEARS:") ) ) column_headers = { "variable": variables, "todo": [self._default_todo_fill_value] * len(variables), "unit": units, "region": ["World"] * len(variables), } for k in ["unit", "units", "gas"]: try: metadata.pop(k) except KeyError: pass # get rid of confusing units before passing to read_units column_headers["unit"] = [ v.replace("kt/yr", "kt") for v in column_headers["unit"] ] column_headers = super()._read_units(column_headers) return column_headers, metadata
def _read_magicc6_style_header(self, stream, metadata): # File written in MAGICC6 style with only one header line rest of # data must be inferred. # Note that regions header line is assumed to start with 'COLCODE' # or 'YEARS' instead of 'REGIONS' regions = self._read_data_header_line(stream, ["COLCODE", "YEARS"]) try: unit = metadata["unit"] metadata.pop("unit") except KeyError: unit = metadata["units"] metadata.pop("units") if "(" in unit: regexp_capture_unit = re.compile(r".*\((.*)\)\s*$") unit = regexp_capture_unit.search(unit).group(1) variable = convert_magicc6_to_magicc7_variables( self._get_variable_from_filepath()) column_headers = { "variable": [variable] * len(regions), "todo": [self._default_todo_fill_value] * len(regions), "unit": [unit] * len(regions), "region": regions, } for k in ["unit", "units", "gas"]: try: metadata.pop(k) except KeyError: pass return column_headers, metadata
def _convert_variables_to_openscm_variables(self, rcp_variables): magicc7_vars = convert_magicc6_to_magicc7_variables(rcp_variables) # work out whether we have emissions, concentrations or radiative # forcing, I think this is the best way to do it given the stability # of the format first_var = magicc7_vars[0] if first_var == "CO2I": intermediate_vars = [m + "_EMIS" for m in magicc7_vars] elif first_var == "CO2EQ": intermediate_vars = [m + "_CONC" for m in magicc7_vars] elif first_var == "TOTAL_INCLVOLCANIC_RF": intermediate_vars = [] for m in magicc7_vars: if not m.endswith("_RF"): m = m + "_RF" intermediate_vars.append(m) elif first_var == "TOTAL_INCLVOLCANIC_ERF": intermediate_vars = [] for m in magicc7_vars: if not m.endswith("_ERF"): m = m + "_ERF" intermediate_vars.append(m) else: raise ValueError( "I don't know how you got this file, but the format is not recognised by pymagicc" ) res = convert_magicc7_to_openscm_variables(intermediate_vars) return res
def process_data(cls, reader, stream, metadata): index = np.arange(metadata["firstyear"], metadata["lastyear"] + 1) columns = {} data = [] for i in range(metadata["datacolumns"]): column_header, column_data = cls.process_variable(stream) for k in column_header: if k not in columns: columns[k] = [] columns[k].append(column_header[k]) data.append(column_data) df = pd.DataFrame(np.asarray(data).T, index=index) if isinstance(df.index, pd.core.indexes.numeric.Float64Index): df.index = df.index.to_series().round(3) df.index.name = "time" # Convert the regions to openscm regions columns["region"] = convert_magicc_to_openscm_regions(columns["region"]) # Convert the variable names to openscm variables columns["variable"] = [ d[4:] if d.startswith("DAT_") else d for d in columns["variable"] ] columns["variable"] = convert_magicc6_to_magicc7_variables(columns["variable"]) columns["variable"] = convert_magicc7_to_openscm_variables(columns["variable"]) return df, metadata, columns
def read_data_block(self): number_years = int(self.lines[0].strip()) # go through datablocks until there are none left while True: ch = {} pos_block = self._stream.tell() region = convert_magicc_to_openscm_regions(self._stream.readline().strip()) try: variables = self._read_data_header_line( self._stream, ["Years", "Year", "YEARS", "YEAR"] ) except IndexError: # tried to get variables from empty string break except AssertionError: # tried to get variables from a notes line break variables = convert_magicc6_to_magicc7_variables(variables) ch["variable"] = convert_magicc7_to_openscm_variables( [v + "_EMIS" for v in variables] ) ch["unit"] = self._read_data_header_line(self._stream, ["Yrs", "YEARS"]) ch = self._read_units(ch) ch["todo"] = ["SET"] * len(variables) ch["region"] = [region] * len(variables) region_block = StringIO() for i in range(number_years): region_block.write(self._stream.readline()) region_block.seek(0) region_df = self._convert_data_block_to_df(region_block) try: df = pd.concat([region_df, df], axis="columns") columns = {key: ch[key] + columns[key] for key in columns} except NameError: df = region_df columns = ch self._stream.seek(pos_block) try: return df, columns except NameError: error_msg = ( "This is unexpected, please raise an issue on " "https://github.com/openscm/pymagicc/issues" ) raise Exception(error_msg)
def _get_openscm_var_from_filepath(filepath): """ Determine the OpenSCM variable from a filepath. Uses MAGICC's internal, implicit, filenaming conventions. Parameters ---------- filepath : str Filepath from which to determine the OpenSCM variable. Returns ------- str The OpenSCM variable implied by the filepath. """ from pymagicc.io import determine_tool reader = determine_tool(filepath, "reader")(filepath) openscm_var = convert_magicc7_to_openscm_variables( convert_magicc6_to_magicc7_variables( reader._get_variable_from_filepath())) return openscm_var
def _get_variable_from_filepath(self): variable = super()._get_variable_from_filepath() return convert_magicc6_to_magicc7_variables(variable)
def test_convert_magicc6_to_magicc7_variables_hfc245ca_warning(magicc6): warning_msg = re.escape( "HFC245ca wasn't meant to be included in MAGICC6. Renaming to HFC245fa." ) with pytest.warns(UserWarning, match=warning_msg): convert_magicc6_to_magicc7_variables(magicc6)
def test_convert_magicc6_to_magicc7_variables(magicc6, magicc7): assert convert_magicc6_to_magicc7_variables(magicc6) == magicc7 assert convert_magicc6_to_magicc7_variables(magicc6.upper()) == magicc7 assert convert_magicc6_to_magicc7_variables(magicc6.lower()) == magicc7
def _write_datablock(self, output): # for SCEN files, the data format is vitally important for the source code # we have to work out a better way of matching up all these conventions/testing them, tight coupling between pymagicc and MAGICC may solve it for us... lines = output.getvalue().split(self._newline_char) # notes are everything except the first 6 lines number_notes_lines = len(lines) - 6 def _gip(lines, number_notes_lines): """ Get the point where we should insert the data block. """ return len(lines) - number_notes_lines region_order_db = get_region_order( self._get_df_header_row("region"), scen7=self._scen_7 ) region_order_magicc = self._ensure_file_region_type_consistency(region_order_db) # format is vitally important for SCEN files as far as I can tell time_col_length = 11 first_col_format_str = ("{" + ":{}d".format(time_col_length) + "}").format other_col_format_str = "{:10.4f}".format # TODO: doing it this way, out of the loop, should ensure things # explode if your regions don't all have the same number of emissions # timeseries or does extra timeseries in there (that probably # shouldn't raise an error, another one for the future), although the # explosion will be cryptic so should add a test for good error # message at some point formatters = [other_col_format_str] * ( int(len(self.data_block.columns) / len(region_order_db)) + 1 # for the years column ) formatters[0] = first_col_format_str variables = convert_magicc7_to_openscm_variables( self._get_df_header_row("variable"), inverse=True ) variables = _strip_emis_variables(variables) special_scen_code = get_special_scen_code( regions=region_order_magicc, emissions=variables ) if special_scen_code % 10 == 0: variable_order = PART_OF_SCENFILE_WITH_EMISSIONS_CODE_0 else: variable_order = PART_OF_SCENFILE_WITH_EMISSIONS_CODE_1 for region_db, region_magicc in zip(region_order_db, region_order_magicc): region_block_region = convert_magicc_to_openscm_regions(region_db) region_block = self.data_block.xs( region_block_region, axis=1, level="region", drop_level=False ) region_block.columns = region_block.columns.droplevel("todo") region_block.columns = region_block.columns.droplevel("region") variables = region_block.columns.levels[0] variables = convert_magicc7_to_openscm_variables(variables, inverse=True) region_block.columns = region_block.columns.set_levels( levels=_strip_emis_variables(variables), level="variable", ) region_block = region_block.reindex( variable_order, axis=1, level="variable" ) variables = region_block.columns.get_level_values("variable").tolist() variables = convert_magicc6_to_magicc7_variables( [v.replace("_EMIS", "") for v in variables], inverse=True ) units = convert_pint_to_fortran_safe_units( region_block.columns.get_level_values("unit").tolist() ) # column widths don't work with expressive units units = [u.replace("_", "").replace("peryr", "") for u in units] if not (region_block.columns.names == ["variable", "unit"]): raise AssertionError( "Unexpected region block columns: " "{}".format(region_block.columns.names) ) region_block = region_block.rename(columns=str).reset_index() region_block.columns = [["YEARS"] + variables, ["Yrs"] + units] region_block_str = region_magicc + self._newline_char region_block_str += region_block.to_string( index=False, formatters=formatters, sparsify=False ) region_block_str += self._newline_char * 2 lines.insert(_gip(lines, number_notes_lines), region_block_str) output.seek(0) output.write(self._newline_char.join(lines)) return output
def process_data(reader, stream, metadata): """ Extract the tabulated data from the input file # Arguments stream (Streamlike object): A Streamlike object (nominally StringIO) containing the table to be extracted metadata (dict): metadata read in from the header and the namelist # Returns df (pandas.DataFrame): contains the data, processed to the standard MAGICCData format metadata (dict): updated metadata based on the processing performed """ index = np.arange(metadata["firstyear"], metadata["lastyear"] + 1) # The first variable is the global values globe = stream.read_chunk("d") if not len(globe) == len(index): raise AssertionError( "Length of data doesn't match length of index: " "{} != {}".format(len(globe), len(index)) ) if metadata["datacolumns"] == 1: num_boxes = 0 data = globe[:, np.newaxis] regions = ["World"] else: regions = stream.read_chunk("d") num_boxes = int(len(regions) / len(index)) regions = regions.reshape((-1, num_boxes), order="F") data = np.concatenate((globe[:, np.newaxis], regions), axis=1) regions = [ "World", "World|Northern Hemisphere|Ocean", "World|Northern Hemisphere|Land", "World|Southern Hemisphere|Ocean", "World|Southern Hemisphere|Land", ] df = pd.DataFrame(data, index=index) if isinstance(df.index, pd.core.indexes.numeric.Float64Index): df.index = df.index.to_series().round(3) df.index.name = "time" variable = convert_magicc6_to_magicc7_variables( reader._get_variable_from_filepath() ) variable = convert_magicc7_to_openscm_variables(variable) column_headers = { "variable": [variable] * (num_boxes + 1), "region": regions, "unit": "unknown", } return df, metadata, column_headers