Ejemplo n.º 1
0
    def test_no_unit(
        self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat.csv"

        del coords_cols["unit"]

        with pytest.raises(ValueError, match="Mandatory dimension 'unit' not defined."):
            pm2io.read_wide_csv_file_if(
                file_input,
                coords_cols=coords_cols,
                coords_defaults=coords_defaults,
                coords_terminologies=coords_terminologies,
                coords_value_mapping=coords_value_mapping,
            )
Ejemplo n.º 2
0
    def test_function_mapping(
        self,
        tmp_path,
        coords_cols,
        coords_defaults,
        coords_terminologies,
        coords_value_mapping,
        filter_keep,
        filter_remove,
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat.csv"
        file_expected = DATA_PATH / "test_read_wide_csv_file_output_unit_def.csv"
        df_expected = pd.read_csv(file_expected, index_col=0)

        del coords_cols["unit"]
        coords_defaults["unit"] = "Gg"
        coords_value_mapping[
            "category"
        ] = pm2io._conversion.convert_ipcc_code_primap_to_primap2
        filter_remove["f1"] = {"gas": "KYOTOGHG"}

        df_result = pm2io.read_wide_csv_file_if(
            file_input,
            coords_cols=coords_cols,
            coords_defaults=coords_defaults,
            coords_terminologies=coords_terminologies,
            coords_value_mapping=coords_value_mapping,
            filter_keep=filter_keep,
            filter_remove=filter_remove,
        )
        df_result.to_csv(tmp_path / "test.csv")
        df_result = pd.read_csv(tmp_path / "test.csv", index_col=0)
        pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False)
Ejemplo n.º 3
0
    def test_entity_default(
        self,
        tmp_path,
        coords_cols,
        coords_defaults,
        coords_terminologies,
        coords_value_mapping,
        filter_keep,
        filter_remove,
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat.csv"
        file_expected = DATA_PATH / "test_read_wide_csv_file_output_entity_def.csv"
        df_expected = pd.read_csv(file_expected, index_col=0)

        del coords_cols["entity"]
        del coords_value_mapping["entity"]
        coords_defaults["entity"] = "CO2"
        del filter_remove["f1"]

        df_result = pm2io.read_wide_csv_file_if(
            file_input,
            coords_cols=coords_cols,
            coords_defaults=coords_defaults,
            coords_terminologies=coords_terminologies,
            coords_value_mapping=coords_value_mapping,
            filter_keep=filter_keep,
            filter_remove=filter_remove,
        )
        df_result.to_csv(tmp_path / "temp.csv")
        df_result = pd.read_csv(tmp_path / "temp.csv", index_col=0)
        pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False)
Ejemplo n.º 4
0
    def test_coords_value_mapping_dict(
        self,
        tmp_path,
        coords_cols,
        coords_defaults,
        coords_terminologies,
        filter_keep,
        filter_remove,
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat.csv"
        file_expected = DATA_PATH / "test_read_wide_csv_file_output.csv"
        df_expected = pd.read_csv(file_expected, index_col=0)

        coords_value_mapping = {
            "category": {"IPC1": "1", "IPC2": "2", "IPC3": "3", "IPC0": "0"},
            "entity": {"KYOTOGHG": "KYOTOGHG (SARGWP100)"},
            "unit": "PRIMAP1",
        }

        df_result = pm2io.read_wide_csv_file_if(
            file_input,
            coords_cols=coords_cols,
            coords_defaults=coords_defaults,
            coords_terminologies=coords_terminologies,
            coords_value_mapping=coords_value_mapping,
            filter_keep=filter_keep,
            filter_remove=filter_remove,
        )
        df_result.to_csv(tmp_path / "temp.csv")
        df_result = pd.read_csv(tmp_path / "temp.csv", index_col=0)
        pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False)
Ejemplo n.º 5
0
    def test_unknown_entity_mapping(
        self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat.csv"

        coords_value_mapping["entity"] = "TESTTEST"

        with pytest.raises(
            ValueError, match="Unknown metadata mapping 'TESTTEST' for column 'entity'."
        ):
            pm2io.read_wide_csv_file_if(
                file_input,
                coords_cols=coords_cols,
                coords_defaults=coords_defaults,
                coords_terminologies=coords_terminologies,
                coords_value_mapping=coords_value_mapping,
            )
Ejemplo n.º 6
0
    def test_overlapping_specification(
        self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat.csv"

        coords_defaults["entity"] = "CO2"

        with pytest.raises(
            ValueError, match="{'entity'} given in coords_cols and coords_defaults."
        ):
            pm2io.read_wide_csv_file_if(
                file_input,
                coords_cols=coords_cols,
                coords_defaults=coords_defaults,
                coords_terminologies=coords_terminologies,
                coords_value_mapping=coords_value_mapping,
            )
Ejemplo n.º 7
0
    def test_overlapping_specification_add_coords(
        self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat.csv"

        add_coords_cols = {"test": ["gas", "category"]}

        with pytest.raises(
            ValueError, match="{'gas'} given in coords_cols and add_coords_cols."
        ):
            pm2io.read_wide_csv_file_if(
                file_input,
                coords_cols=coords_cols,
                add_coords_cols=add_coords_cols,
                coords_defaults=coords_defaults,
                coords_terminologies=coords_terminologies,
                coords_value_mapping=coords_value_mapping,
            )
Ejemplo n.º 8
0
    def test_unknown_mapping(
        self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat.csv"

        coords_value_mapping["category"] = "non-existing"

        with pytest.raises(
            ValueError,
            match="Unknown metadata mapping 'non-existing' for column 'category'.",
        ):
            pm2io.read_wide_csv_file_if(
                file_input,
                coords_cols=coords_cols,
                coords_defaults=coords_defaults,
                coords_terminologies=coords_terminologies,
                coords_value_mapping=coords_value_mapping,
            )
Ejemplo n.º 9
0
    def test_unknown_coordinate(
        self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat.csv"

        coords_defaults["citation"] = "this should go to attrs"

        with pytest.raises(
            ValueError,
            match="'citation' given in coords_defaults is unknown - prefix with "
            "'sec_cats__' to add a secondary category.",
        ):
            pm2io.read_wide_csv_file_if(
                file_input,
                coords_cols=coords_cols,
                coords_defaults=coords_defaults,
                coords_terminologies=coords_terminologies,
                coords_value_mapping=coords_value_mapping,
            )
Ejemplo n.º 10
0
    def test_output(
        self,
        tmp_path,
        coords_cols,
        coords_defaults,
        coords_terminologies,
        coords_value_mapping,
        filter_keep,
        filter_remove,
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat.csv"
        file_expected = DATA_PATH / "test_read_wide_csv_file_output.csv"
        df_expected = pd.read_csv(file_expected, index_col=0)

        meta_data = {"references": "Just ask around."}

        df_result = pm2io.read_wide_csv_file_if(
            file_input,
            coords_cols=coords_cols,
            coords_defaults=coords_defaults,
            coords_terminologies=coords_terminologies,
            coords_value_mapping=coords_value_mapping,
            filter_keep=filter_keep,
            filter_remove=filter_remove,
            meta_data=meta_data,
        )
        attrs_result = df_result.attrs
        df_result.to_csv(tmp_path / "temp.csv")
        df_result = pd.read_csv(tmp_path / "temp.csv", index_col=0)
        pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False)

        attrs_expected = {
            "attrs": {
                "references": "Just ask around.",
                "sec_cats": ["Class (class)", "Type (type)"],
                "scen": "scenario (general)",
                "area": "area (ISO3)",
                "cat": "category (IPCC2006)",
            },
            "time_format": "%Y",
            "dimensions": {
                "*": [
                    "entity",
                    "source",
                    "area (ISO3)",
                    "Type (type)",
                    "unit",
                    "scenario (general)",
                    "Class (class)",
                    "category (IPCC2006)",
                ]
            },
        }

        assert_attrs_equal(attrs_result, attrs_expected)
Ejemplo n.º 11
0
    def test_unprocessed_strs(
        self,
        coords_cols,
        coords_defaults,
        coords_terminologies,
        coords_value_mapping,
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat_strings.csv"

        with pytest.raises(ValueError, match="String values"):
            pm2io.read_wide_csv_file_if(
                file_input,
                coords_cols=coords_cols,
                coords_defaults=coords_defaults,
                coords_terminologies=coords_terminologies,
                coords_value_mapping=coords_value_mapping,
                filter_keep={},
                filter_remove={},
                convert_str=False,
            )
Ejemplo n.º 12
0
    def test_read_wide_fill_col(
        self,
        tmp_path,
        coords_cols,
        add_coords_cols,
        coords_defaults,
        coords_terminologies,
        coords_value_mapping,
        coords_value_filling,
    ):
        file_input = DATA_PATH / "test_csv_data_category_name_fill_cat_code.csv"
        file_expected = DATA_PATH / "test_read_wide_csv_file_no_sec_cats_cat_name.csv"
        df_expected = pd.read_csv(file_expected, index_col=0)

        del coords_cols["sec_cats__Class"]
        del coords_defaults["sec_cats__Type"]
        del coords_terminologies["sec_cats__Class"]
        del coords_terminologies["sec_cats__Type"]

        df_result = pm2io.read_wide_csv_file_if(
            file_input,
            coords_cols=coords_cols,
            add_coords_cols=add_coords_cols,
            coords_defaults=coords_defaults,
            coords_terminologies=coords_terminologies,
            coords_value_mapping=coords_value_mapping,
            coords_value_filling=coords_value_filling,
        )
        attrs_result = df_result.attrs
        df_result.to_csv(tmp_path / "temp.csv")
        df_result = pd.read_csv(tmp_path / "temp.csv", index_col=0)
        pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False)

        attrs_expected = {
            "attrs": {
                "scen": "scenario (general)",
                "area": "area (ISO3)",
                "cat": "category (IPCC2006)",
            },
            "time_format": "%Y",
            "dimensions": {
                "*": [
                    "entity",
                    "source",
                    "area (ISO3)",
                    "unit",
                    "scenario (general)",
                    "category (IPCC2006)",
                ]
            },
            "additional_coordinates": {"category_name": "category (IPCC2006)"},
        }

        assert_attrs_equal(attrs_result, attrs_expected)
Ejemplo n.º 13
0
    def test_entity_terminology(
        self,
        tmp_path,
        coords_cols,
        coords_defaults,
        coords_terminologies,
        coords_value_mapping,
    ):
        file_input = DATA_PATH / "test_csv_data.csv"
        file_expected = DATA_PATH / "test_read_wide_csv_file_no_sec_cats.csv"
        df_expected: pd.DataFrame = pd.read_csv(file_expected, index_col=0)
        df_expected.rename(columns={"entity": "entity (PRIMAP1)"}, inplace=True)

        del coords_cols["sec_cats__Class"]
        del coords_defaults["sec_cats__Type"]
        del coords_terminologies["sec_cats__Class"]
        del coords_terminologies["sec_cats__Type"]

        coords_terminologies["entity"] = "PRIMAP1"

        df_result = pm2io.read_wide_csv_file_if(
            file_input,
            coords_cols=coords_cols,
            coords_defaults=coords_defaults,
            coords_terminologies=coords_terminologies,
            coords_value_mapping=coords_value_mapping,
        )
        attrs_result = df_result.attrs
        df_result.to_csv(tmp_path / "temp.csv")
        df_result = pd.read_csv(tmp_path / "temp.csv", index_col=0)
        pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False)

        attrs_expected = {
            "attrs": {
                "scen": "scenario (general)",
                "area": "area (ISO3)",
                "cat": "category (IPCC2006)",
                "entity_terminology": "PRIMAP1",
            },
            "time_format": "%Y",
            "dimensions": {
                "*": [
                    "entity (PRIMAP1)",
                    "source",
                    "area (ISO3)",
                    "unit",
                    "scenario (general)",
                    "category (IPCC2006)",
                ]
            },
        }

        assert_attrs_equal(attrs_result, attrs_expected)
Ejemplo n.º 14
0
    def test_col_missing(
        self,
        coords_cols,
        coords_defaults,
        coords_terminologies,
        coords_value_mapping,
        filter_keep,
        filter_remove,
    ):
        file_input = DATA_PATH / "test_csv_data_sec_cat.csv"

        coords_cols["sec_cats__Class"] = "class"

        with pytest.raises(ValueError, match="Columns {'class'} not found in CSV."):
            pm2io.read_wide_csv_file_if(
                file_input,
                coords_cols=coords_cols,
                coords_defaults=coords_defaults,
                coords_terminologies=coords_terminologies,
                coords_value_mapping=coords_value_mapping,
                filter_keep=filter_keep,
                filter_remove=filter_remove,
            )
Ejemplo n.º 15
0
    def test_compare_wide(
        self,
        coords_cols,
        coords_defaults,
        coords_terminologies,
        coords_value_mapping,
    ):
        file_input_wide = DATA_PATH / "test_csv_data.csv"
        file_input_long = DATA_PATH / "long.csv"

        del coords_cols["sec_cats__Class"]
        del coords_defaults["sec_cats__Type"]
        del coords_terminologies["sec_cats__Class"]
        del coords_terminologies["sec_cats__Type"]

        meta_data = {"references": "Just ask around"}

        df_result_wide = pm2io.read_wide_csv_file_if(
            file_input_wide,
            coords_cols=coords_cols,
            coords_defaults=coords_defaults,
            coords_terminologies=coords_terminologies,
            coords_value_mapping=coords_value_mapping,
            meta_data=meta_data,
        )

        coords_cols["time"] = "year"
        coords_cols["data"] = "emissions"
        df_result_long = pm2io.read_long_csv_file_if(
            file_input_long,
            coords_cols=coords_cols,
            coords_defaults=coords_defaults,
            coords_terminologies=coords_terminologies,
            coords_value_mapping=coords_value_mapping,
            meta_data=meta_data,
            time_format="%Y",
        )

        pd.testing.assert_frame_equal(df_result_wide, df_result_long)
        assert df_result_wide.attrs == df_result_long.attrs
Ejemplo n.º 16
0
    def test_compare_wide_add_cols(
        self,
        coords_cols,
        add_coords_cols,
        coords_defaults,
        coords_terminologies,
        coords_value_mapping,
    ):
        file_input_wide = DATA_PATH / "test_csv_data_category_name.csv"
        file_input_long = DATA_PATH / "test_csv_data_category_name_long.csv"

        del coords_cols["sec_cats__Class"]
        del coords_defaults["sec_cats__Type"]
        del coords_terminologies["sec_cats__Class"]
        del coords_terminologies["sec_cats__Type"]

        df_result_wide = pm2io.read_wide_csv_file_if(
            file_input_wide,
            coords_cols=coords_cols,
            add_coords_cols=add_coords_cols,
            coords_defaults=coords_defaults,
            coords_terminologies=coords_terminologies,
            coords_value_mapping=coords_value_mapping,
        )

        coords_cols["time"] = "year"
        coords_cols["data"] = "emissions"
        df_result_long = pm2io.read_long_csv_file_if(
            file_input_long,
            coords_cols=coords_cols,
            add_coords_cols=add_coords_cols,
            coords_defaults=coords_defaults,
            coords_terminologies=coords_terminologies,
            coords_value_mapping=coords_value_mapping,
            time_format="%Y",
        )

        pd.testing.assert_frame_equal(df_result_wide, df_result_long)
        assert df_result_wide.attrs == df_result_long.attrs