def test_no_unit( self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping ): file_input = DATA_PATH / "test_csv_data_sec_cat.csv" del coords_cols["unit"] with pytest.raises(ValueError, match="Mandatory dimension 'unit' not defined."): pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, )
def test_function_mapping( self, tmp_path, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping, filter_keep, filter_remove, ): file_input = DATA_PATH / "test_csv_data_sec_cat.csv" file_expected = DATA_PATH / "test_read_wide_csv_file_output_unit_def.csv" df_expected = pd.read_csv(file_expected, index_col=0) del coords_cols["unit"] coords_defaults["unit"] = "Gg" coords_value_mapping[ "category" ] = pm2io._conversion.convert_ipcc_code_primap_to_primap2 filter_remove["f1"] = {"gas": "KYOTOGHG"} df_result = pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, filter_keep=filter_keep, filter_remove=filter_remove, ) df_result.to_csv(tmp_path / "test.csv") df_result = pd.read_csv(tmp_path / "test.csv", index_col=0) pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False)
def test_entity_default( self, tmp_path, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping, filter_keep, filter_remove, ): file_input = DATA_PATH / "test_csv_data_sec_cat.csv" file_expected = DATA_PATH / "test_read_wide_csv_file_output_entity_def.csv" df_expected = pd.read_csv(file_expected, index_col=0) del coords_cols["entity"] del coords_value_mapping["entity"] coords_defaults["entity"] = "CO2" del filter_remove["f1"] df_result = pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, filter_keep=filter_keep, filter_remove=filter_remove, ) df_result.to_csv(tmp_path / "temp.csv") df_result = pd.read_csv(tmp_path / "temp.csv", index_col=0) pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False)
def test_coords_value_mapping_dict( self, tmp_path, coords_cols, coords_defaults, coords_terminologies, filter_keep, filter_remove, ): file_input = DATA_PATH / "test_csv_data_sec_cat.csv" file_expected = DATA_PATH / "test_read_wide_csv_file_output.csv" df_expected = pd.read_csv(file_expected, index_col=0) coords_value_mapping = { "category": {"IPC1": "1", "IPC2": "2", "IPC3": "3", "IPC0": "0"}, "entity": {"KYOTOGHG": "KYOTOGHG (SARGWP100)"}, "unit": "PRIMAP1", } df_result = pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, filter_keep=filter_keep, filter_remove=filter_remove, ) df_result.to_csv(tmp_path / "temp.csv") df_result = pd.read_csv(tmp_path / "temp.csv", index_col=0) pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False)
def test_unknown_entity_mapping( self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping ): file_input = DATA_PATH / "test_csv_data_sec_cat.csv" coords_value_mapping["entity"] = "TESTTEST" with pytest.raises( ValueError, match="Unknown metadata mapping 'TESTTEST' for column 'entity'." ): pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, )
def test_overlapping_specification( self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping ): file_input = DATA_PATH / "test_csv_data_sec_cat.csv" coords_defaults["entity"] = "CO2" with pytest.raises( ValueError, match="{'entity'} given in coords_cols and coords_defaults." ): pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, )
def test_overlapping_specification_add_coords( self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping ): file_input = DATA_PATH / "test_csv_data_sec_cat.csv" add_coords_cols = {"test": ["gas", "category"]} with pytest.raises( ValueError, match="{'gas'} given in coords_cols and add_coords_cols." ): pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, add_coords_cols=add_coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, )
def test_unknown_mapping( self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping ): file_input = DATA_PATH / "test_csv_data_sec_cat.csv" coords_value_mapping["category"] = "non-existing" with pytest.raises( ValueError, match="Unknown metadata mapping 'non-existing' for column 'category'.", ): pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, )
def test_unknown_coordinate( self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping ): file_input = DATA_PATH / "test_csv_data_sec_cat.csv" coords_defaults["citation"] = "this should go to attrs" with pytest.raises( ValueError, match="'citation' given in coords_defaults is unknown - prefix with " "'sec_cats__' to add a secondary category.", ): pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, )
def test_output( self, tmp_path, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping, filter_keep, filter_remove, ): file_input = DATA_PATH / "test_csv_data_sec_cat.csv" file_expected = DATA_PATH / "test_read_wide_csv_file_output.csv" df_expected = pd.read_csv(file_expected, index_col=0) meta_data = {"references": "Just ask around."} df_result = pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, filter_keep=filter_keep, filter_remove=filter_remove, meta_data=meta_data, ) attrs_result = df_result.attrs df_result.to_csv(tmp_path / "temp.csv") df_result = pd.read_csv(tmp_path / "temp.csv", index_col=0) pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False) attrs_expected = { "attrs": { "references": "Just ask around.", "sec_cats": ["Class (class)", "Type (type)"], "scen": "scenario (general)", "area": "area (ISO3)", "cat": "category (IPCC2006)", }, "time_format": "%Y", "dimensions": { "*": [ "entity", "source", "area (ISO3)", "Type (type)", "unit", "scenario (general)", "Class (class)", "category (IPCC2006)", ] }, } assert_attrs_equal(attrs_result, attrs_expected)
def test_unprocessed_strs( self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping, ): file_input = DATA_PATH / "test_csv_data_sec_cat_strings.csv" with pytest.raises(ValueError, match="String values"): pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, filter_keep={}, filter_remove={}, convert_str=False, )
def test_read_wide_fill_col( self, tmp_path, coords_cols, add_coords_cols, coords_defaults, coords_terminologies, coords_value_mapping, coords_value_filling, ): file_input = DATA_PATH / "test_csv_data_category_name_fill_cat_code.csv" file_expected = DATA_PATH / "test_read_wide_csv_file_no_sec_cats_cat_name.csv" df_expected = pd.read_csv(file_expected, index_col=0) del coords_cols["sec_cats__Class"] del coords_defaults["sec_cats__Type"] del coords_terminologies["sec_cats__Class"] del coords_terminologies["sec_cats__Type"] df_result = pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, add_coords_cols=add_coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, coords_value_filling=coords_value_filling, ) attrs_result = df_result.attrs df_result.to_csv(tmp_path / "temp.csv") df_result = pd.read_csv(tmp_path / "temp.csv", index_col=0) pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False) attrs_expected = { "attrs": { "scen": "scenario (general)", "area": "area (ISO3)", "cat": "category (IPCC2006)", }, "time_format": "%Y", "dimensions": { "*": [ "entity", "source", "area (ISO3)", "unit", "scenario (general)", "category (IPCC2006)", ] }, "additional_coordinates": {"category_name": "category (IPCC2006)"}, } assert_attrs_equal(attrs_result, attrs_expected)
def test_entity_terminology( self, tmp_path, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping, ): file_input = DATA_PATH / "test_csv_data.csv" file_expected = DATA_PATH / "test_read_wide_csv_file_no_sec_cats.csv" df_expected: pd.DataFrame = pd.read_csv(file_expected, index_col=0) df_expected.rename(columns={"entity": "entity (PRIMAP1)"}, inplace=True) del coords_cols["sec_cats__Class"] del coords_defaults["sec_cats__Type"] del coords_terminologies["sec_cats__Class"] del coords_terminologies["sec_cats__Type"] coords_terminologies["entity"] = "PRIMAP1" df_result = pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, ) attrs_result = df_result.attrs df_result.to_csv(tmp_path / "temp.csv") df_result = pd.read_csv(tmp_path / "temp.csv", index_col=0) pd.testing.assert_frame_equal(df_result, df_expected, check_column_type=False) attrs_expected = { "attrs": { "scen": "scenario (general)", "area": "area (ISO3)", "cat": "category (IPCC2006)", "entity_terminology": "PRIMAP1", }, "time_format": "%Y", "dimensions": { "*": [ "entity (PRIMAP1)", "source", "area (ISO3)", "unit", "scenario (general)", "category (IPCC2006)", ] }, } assert_attrs_equal(attrs_result, attrs_expected)
def test_col_missing( self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping, filter_keep, filter_remove, ): file_input = DATA_PATH / "test_csv_data_sec_cat.csv" coords_cols["sec_cats__Class"] = "class" with pytest.raises(ValueError, match="Columns {'class'} not found in CSV."): pm2io.read_wide_csv_file_if( file_input, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, filter_keep=filter_keep, filter_remove=filter_remove, )
def test_compare_wide( self, coords_cols, coords_defaults, coords_terminologies, coords_value_mapping, ): file_input_wide = DATA_PATH / "test_csv_data.csv" file_input_long = DATA_PATH / "long.csv" del coords_cols["sec_cats__Class"] del coords_defaults["sec_cats__Type"] del coords_terminologies["sec_cats__Class"] del coords_terminologies["sec_cats__Type"] meta_data = {"references": "Just ask around"} df_result_wide = pm2io.read_wide_csv_file_if( file_input_wide, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, meta_data=meta_data, ) coords_cols["time"] = "year" coords_cols["data"] = "emissions" df_result_long = pm2io.read_long_csv_file_if( file_input_long, coords_cols=coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, meta_data=meta_data, time_format="%Y", ) pd.testing.assert_frame_equal(df_result_wide, df_result_long) assert df_result_wide.attrs == df_result_long.attrs
def test_compare_wide_add_cols( self, coords_cols, add_coords_cols, coords_defaults, coords_terminologies, coords_value_mapping, ): file_input_wide = DATA_PATH / "test_csv_data_category_name.csv" file_input_long = DATA_PATH / "test_csv_data_category_name_long.csv" del coords_cols["sec_cats__Class"] del coords_defaults["sec_cats__Type"] del coords_terminologies["sec_cats__Class"] del coords_terminologies["sec_cats__Type"] df_result_wide = pm2io.read_wide_csv_file_if( file_input_wide, coords_cols=coords_cols, add_coords_cols=add_coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, ) coords_cols["time"] = "year" coords_cols["data"] = "emissions" df_result_long = pm2io.read_long_csv_file_if( file_input_long, coords_cols=coords_cols, add_coords_cols=add_coords_cols, coords_defaults=coords_defaults, coords_terminologies=coords_terminologies, coords_value_mapping=coords_value_mapping, time_format="%Y", ) pd.testing.assert_frame_equal(df_result_wide, df_result_long) assert df_result_wide.attrs == df_result_long.attrs