Esempio n. 1
0
def test_convert_magicc7_to_magicc6_variables(magicc7, magicc6):
    assert convert_magicc6_to_magicc7_variables(magicc7,
                                                inverse=True) == magicc6
    assert (convert_magicc6_to_magicc7_variables(magicc7.upper(),
                                                 inverse=True) == magicc6)
    assert (convert_magicc6_to_magicc7_variables(magicc7.lower(),
                                                 inverse=True) == magicc6)
Esempio n. 2
0
    def _get_column_headers_and_update_metadata(self, stream, metadata):
        units = self._read_data_header_line(stream, "UNITS:")
        variables = convert_magicc7_to_openscm_variables(
            convert_magicc6_to_magicc7_variables(
                self._read_data_header_line(stream, "YEARS:")
            )
        )

        column_headers = {
            "variable": variables,
            "todo": [self._default_todo_fill_value] * len(variables),
            "unit": units,
            "region": ["World"] * len(variables),
        }

        for k in ["unit", "units", "gas"]:
            try:
                metadata.pop(k)
            except KeyError:
                pass

        # get rid of confusing units before passing to read_units
        column_headers["unit"] = [
            v.replace("kt/yr", "kt") for v in column_headers["unit"]
        ]
        column_headers = super()._read_units(column_headers)

        return column_headers, metadata
Esempio n. 3
0
    def _read_magicc6_style_header(self, stream, metadata):
        # File written in MAGICC6 style with only one header line rest of
        # data must be inferred.
        # Note that regions header line is assumed to start with 'COLCODE'
        # or 'YEARS' instead of 'REGIONS'
        regions = self._read_data_header_line(stream, ["COLCODE", "YEARS"])

        try:
            unit = metadata["unit"]
            metadata.pop("unit")
        except KeyError:
            unit = metadata["units"]
            metadata.pop("units")

        if "(" in unit:
            regexp_capture_unit = re.compile(r".*\((.*)\)\s*$")
            unit = regexp_capture_unit.search(unit).group(1)

        variable = convert_magicc6_to_magicc7_variables(
            self._get_variable_from_filepath())
        column_headers = {
            "variable": [variable] * len(regions),
            "todo": [self._default_todo_fill_value] * len(regions),
            "unit": [unit] * len(regions),
            "region": regions,
        }

        for k in ["unit", "units", "gas"]:
            try:
                metadata.pop(k)
            except KeyError:
                pass

        return column_headers, metadata
Esempio n. 4
0
    def _convert_variables_to_openscm_variables(self, rcp_variables):
        magicc7_vars = convert_magicc6_to_magicc7_variables(rcp_variables)
        # work out whether we have emissions, concentrations or radiative
        # forcing, I think this is the best way to do it given the stability
        # of the format
        first_var = magicc7_vars[0]
        if first_var == "CO2I":
            intermediate_vars = [m + "_EMIS" for m in magicc7_vars]
        elif first_var == "CO2EQ":
            intermediate_vars = [m + "_CONC" for m in magicc7_vars]
        elif first_var == "TOTAL_INCLVOLCANIC_RF":
            intermediate_vars = []
            for m in magicc7_vars:
                if not m.endswith("_RF"):
                    m = m + "_RF"
                intermediate_vars.append(m)
        elif first_var == "TOTAL_INCLVOLCANIC_ERF":
            intermediate_vars = []
            for m in magicc7_vars:
                if not m.endswith("_ERF"):
                    m = m + "_ERF"
                intermediate_vars.append(m)
        else:
            raise ValueError(
                "I don't know how you got this file, but the format is not recognised by pymagicc"
            )

        res = convert_magicc7_to_openscm_variables(intermediate_vars)

        return res
Esempio n. 5
0
    def process_data(cls, reader, stream, metadata):
        index = np.arange(metadata["firstyear"], metadata["lastyear"] + 1)

        columns = {}
        data = []
        for i in range(metadata["datacolumns"]):
            column_header, column_data = cls.process_variable(stream)

            for k in column_header:
                if k not in columns:
                    columns[k] = []
                columns[k].append(column_header[k])
            data.append(column_data)

        df = pd.DataFrame(np.asarray(data).T, index=index)

        if isinstance(df.index, pd.core.indexes.numeric.Float64Index):
            df.index = df.index.to_series().round(3)

        df.index.name = "time"

        # Convert the regions to openscm regions
        columns["region"] = convert_magicc_to_openscm_regions(columns["region"])

        # Convert the variable names to openscm variables
        columns["variable"] = [
            d[4:] if d.startswith("DAT_") else d for d in columns["variable"]
        ]
        columns["variable"] = convert_magicc6_to_magicc7_variables(columns["variable"])
        columns["variable"] = convert_magicc7_to_openscm_variables(columns["variable"])

        return df, metadata, columns
Esempio n. 6
0
    def read_data_block(self):
        number_years = int(self.lines[0].strip())

        # go through datablocks until there are none left
        while True:
            ch = {}
            pos_block = self._stream.tell()
            region = convert_magicc_to_openscm_regions(self._stream.readline().strip())

            try:
                variables = self._read_data_header_line(
                    self._stream, ["Years", "Year", "YEARS", "YEAR"]
                )
            except IndexError:  # tried to get variables from empty string
                break
            except AssertionError:  # tried to get variables from a notes line
                break

            variables = convert_magicc6_to_magicc7_variables(variables)
            ch["variable"] = convert_magicc7_to_openscm_variables(
                [v + "_EMIS" for v in variables]
            )

            ch["unit"] = self._read_data_header_line(self._stream, ["Yrs", "YEARS"])

            ch = self._read_units(ch)
            ch["todo"] = ["SET"] * len(variables)
            ch["region"] = [region] * len(variables)

            region_block = StringIO()
            for i in range(number_years):
                region_block.write(self._stream.readline())
            region_block.seek(0)

            region_df = self._convert_data_block_to_df(region_block)

            try:
                df = pd.concat([region_df, df], axis="columns")
                columns = {key: ch[key] + columns[key] for key in columns}
            except NameError:
                df = region_df
                columns = ch

        self._stream.seek(pos_block)

        try:
            return df, columns
        except NameError:
            error_msg = (
                "This is unexpected, please raise an issue on "
                "https://github.com/openscm/pymagicc/issues"
            )
            raise Exception(error_msg)
Esempio n. 7
0
def _get_openscm_var_from_filepath(filepath):
    """
    Determine the OpenSCM variable from a filepath.

    Uses MAGICC's internal, implicit, filenaming conventions.

    Parameters
    ----------
    filepath : str
        Filepath from which to determine the OpenSCM variable.

    Returns
    -------
    str
        The OpenSCM variable implied by the filepath.
    """
    from pymagicc.io import determine_tool

    reader = determine_tool(filepath, "reader")(filepath)
    openscm_var = convert_magicc7_to_openscm_variables(
        convert_magicc6_to_magicc7_variables(
            reader._get_variable_from_filepath()))

    return openscm_var
Esempio n. 8
0
 def _get_variable_from_filepath(self):
     variable = super()._get_variable_from_filepath()
     return convert_magicc6_to_magicc7_variables(variable)
Esempio n. 9
0
def test_convert_magicc6_to_magicc7_variables_hfc245ca_warning(magicc6):
    warning_msg = re.escape(
        "HFC245ca wasn't meant to be included in MAGICC6. Renaming to HFC245fa."
    )
    with pytest.warns(UserWarning, match=warning_msg):
        convert_magicc6_to_magicc7_variables(magicc6)
Esempio n. 10
0
def test_convert_magicc6_to_magicc7_variables(magicc6, magicc7):
    assert convert_magicc6_to_magicc7_variables(magicc6) == magicc7
    assert convert_magicc6_to_magicc7_variables(magicc6.upper()) == magicc7
    assert convert_magicc6_to_magicc7_variables(magicc6.lower()) == magicc7
Esempio n. 11
0
    def _write_datablock(self, output):
        # for SCEN files, the data format is vitally important for the source code
        # we have to work out a better way of matching up all these conventions/testing them, tight coupling between pymagicc and MAGICC may solve it for us...
        lines = output.getvalue().split(self._newline_char)
        # notes are everything except the first 6 lines
        number_notes_lines = len(lines) - 6

        def _gip(lines, number_notes_lines):
            """
            Get the point where we should insert the data block.
            """
            return len(lines) - number_notes_lines

        region_order_db = get_region_order(
            self._get_df_header_row("region"), scen7=self._scen_7
        )
        region_order_magicc = self._ensure_file_region_type_consistency(region_order_db)
        # format is vitally important for SCEN files as far as I can tell
        time_col_length = 11
        first_col_format_str = ("{" + ":{}d".format(time_col_length) + "}").format
        other_col_format_str = "{:10.4f}".format

        # TODO: doing it this way, out of the loop,  should ensure things
        # explode if your regions don't all have the same number of emissions
        # timeseries or does extra timeseries in there (that probably
        # shouldn't raise an error, another one for the future), although the
        # explosion will be cryptic so should add a test for good error
        # message at some point
        formatters = [other_col_format_str] * (
            int(len(self.data_block.columns) / len(region_order_db))
            + 1  # for the years column
        )
        formatters[0] = first_col_format_str

        variables = convert_magicc7_to_openscm_variables(
            self._get_df_header_row("variable"), inverse=True
        )
        variables = _strip_emis_variables(variables)

        special_scen_code = get_special_scen_code(
            regions=region_order_magicc, emissions=variables
        )
        if special_scen_code % 10 == 0:
            variable_order = PART_OF_SCENFILE_WITH_EMISSIONS_CODE_0
        else:
            variable_order = PART_OF_SCENFILE_WITH_EMISSIONS_CODE_1

        for region_db, region_magicc in zip(region_order_db, region_order_magicc):
            region_block_region = convert_magicc_to_openscm_regions(region_db)
            region_block = self.data_block.xs(
                region_block_region, axis=1, level="region", drop_level=False
            )
            region_block.columns = region_block.columns.droplevel("todo")
            region_block.columns = region_block.columns.droplevel("region")

            variables = region_block.columns.levels[0]
            variables = convert_magicc7_to_openscm_variables(variables, inverse=True)
            region_block.columns = region_block.columns.set_levels(
                levels=_strip_emis_variables(variables), level="variable",
            )

            region_block = region_block.reindex(
                variable_order, axis=1, level="variable"
            )

            variables = region_block.columns.get_level_values("variable").tolist()
            variables = convert_magicc6_to_magicc7_variables(
                [v.replace("_EMIS", "") for v in variables], inverse=True
            )

            units = convert_pint_to_fortran_safe_units(
                region_block.columns.get_level_values("unit").tolist()
            )
            # column widths don't work with expressive units
            units = [u.replace("_", "").replace("peryr", "") for u in units]

            if not (region_block.columns.names == ["variable", "unit"]):
                raise AssertionError(
                    "Unexpected region block columns: "
                    "{}".format(region_block.columns.names)
                )

            region_block = region_block.rename(columns=str).reset_index()
            region_block.columns = [["YEARS"] + variables, ["Yrs"] + units]

            region_block_str = region_magicc + self._newline_char
            region_block_str += region_block.to_string(
                index=False, formatters=formatters, sparsify=False
            )
            region_block_str += self._newline_char * 2

            lines.insert(_gip(lines, number_notes_lines), region_block_str)

        output.seek(0)
        output.write(self._newline_char.join(lines))
        return output
Esempio n. 12
0
    def process_data(reader, stream, metadata):
        """
        Extract the tabulated data from the input file

        # Arguments
        stream (Streamlike object): A Streamlike object (nominally StringIO)
            containing the table to be extracted
        metadata (dict): metadata read in from the header and the namelist

        # Returns
        df (pandas.DataFrame): contains the data, processed to the standard
            MAGICCData format
        metadata (dict): updated metadata based on the processing performed
        """
        index = np.arange(metadata["firstyear"], metadata["lastyear"] + 1)

        # The first variable is the global values
        globe = stream.read_chunk("d")

        if not len(globe) == len(index):
            raise AssertionError(
                "Length of data doesn't match length of index: "
                "{} != {}".format(len(globe), len(index))
            )

        if metadata["datacolumns"] == 1:
            num_boxes = 0

            data = globe[:, np.newaxis]

            regions = ["World"]

        else:
            regions = stream.read_chunk("d")
            num_boxes = int(len(regions) / len(index))
            regions = regions.reshape((-1, num_boxes), order="F")

            data = np.concatenate((globe[:, np.newaxis], regions), axis=1)

            regions = [
                "World",
                "World|Northern Hemisphere|Ocean",
                "World|Northern Hemisphere|Land",
                "World|Southern Hemisphere|Ocean",
                "World|Southern Hemisphere|Land",
            ]

        df = pd.DataFrame(data, index=index)

        if isinstance(df.index, pd.core.indexes.numeric.Float64Index):
            df.index = df.index.to_series().round(3)

        df.index.name = "time"

        variable = convert_magicc6_to_magicc7_variables(
            reader._get_variable_from_filepath()
        )
        variable = convert_magicc7_to_openscm_variables(variable)
        column_headers = {
            "variable": [variable] * (num_boxes + 1),
            "region": regions,
            "unit": "unknown",
        }

        return df, metadata, column_headers