def test_single_dim_order(self): spec = Spec(name='test', dims=['technology_type'], coords={ 'technology_type': ['water_meter', 'electricity_meter', 'other', 'aaa'] }, dtype='float') df = pd.DataFrame([ { 'technology_type': 'water_meter', 'test': 5 }, { 'technology_type': 'electricity_meter', 'test': 6 }, { 'technology_type': 'other', 'test': 7 }, { 'technology_type': 'aaa', 'test': 8 }, ]) da = DataArray(spec, numpy.array([5., 6., 7., 8.])) da_from_df = DataArray.from_df(spec, df) da_from_df_2 = DataArray.from_df(spec, df) assert da == da_from_df assert da == da_from_df_2
def test_multi_dim_order(self): spec = Spec(name='test', coords={ 'lad': ['c', 'a', 'b'], 'interval': [4, 2] }, dims=['lad', 'interval'], dtype='float') data = numpy.array( [ # 4 2 [1, 2], # c [5, 6], # a [9, 0] # b ], dtype='float') da = DataArray(spec, data) df = pd.DataFrame([ { 'test': 6.0, 'lad': 'a', 'interval': 2 }, { 'test': 0.0, 'lad': 'b', 'interval': 2 }, { 'test': 2.0, 'lad': 'c', 'interval': 2 }, { 'test': 5.0, 'lad': 'a', 'interval': 4 }, { 'test': 9.0, 'lad': 'b', 'interval': 4 }, { 'test': 1.0, 'lad': 'c', 'interval': 4 }, ]).set_index(['lad', 'interval']) da_from_df = DataArray.from_df(spec, df) assert da_from_df == da da_to_df = da.as_df().sort_index() df = df.sort_index() pd.testing.assert_frame_equal(da_to_df, df)
def test_from_multiindex(self): spec = Spec(name='test', dims=['multi'], coords={'multi': ['b', 'a', 'c']}, dtype='float') index = pd.MultiIndex.from_product([['b', 'a', 'c']], names=['multi']) df = pd.DataFrame({'test': [1, 2, 3]}, index=index) da_from_df = DataArray.from_df(spec, df) da = DataArray(spec, numpy.array([1, 2, 3])) assert da == da_from_df
def test_scalar(self): # should handle zero-dimensional case (numpy array as scalar) data = numpy.array(2.0) spec = Spec(name='test', dims=[], coords={}, dtype='float') da = DataArray(spec, data) df = pd.DataFrame([{'test': 2.0}]) da_from_df = DataArray.from_df(spec, df) assert da_from_df == da df_from_da = da.as_df() pd.testing.assert_frame_equal(df_from_da, df)
def test_df_round_trip_2d(self): spec = Spec.from_dict({ 'name': 'two_d', 'dims': ['a', 'z'], 'coords': { 'a': ['q', 'p'], 'z': ['a', 'c', 'b'], }, 'dtype': 'float' }) da = DataArray(spec, numpy.array([ [5., 6., 7.], [8., 9., 0.], ])) df = pd.DataFrame([ { 'z': 'a', 'a': 'p', 'two_d': 8. }, { 'z': 'c', 'a': 'q', 'two_d': 6. }, { 'z': 'a', 'a': 'q', 'two_d': 5. }, { 'z': 'b', 'a': 'q', 'two_d': 7. }, { 'z': 'b', 'a': 'p', 'two_d': 0. }, { 'z': 'c', 'a': 'p', 'two_d': 9. }, ]) df = df.set_index(spec.dims) df_from_da = da.as_df() da_from_df = DataArray.from_df(spec, df_from_da) assert_array_equal(da.data, da_from_df.data)
def dataframe_to_data_array(dataframe, spec, path): if spec.dims: data_array = DataArray.from_df(spec, dataframe) else: # zero-dimensional case (scalar) data = dataframe[spec.name] if data.shape != (1, ): msg = "Data for '{}' should contain a single value, instead got {} while " + \ "reading from {}" raise SmifDataMismatchError( msg.format(spec.name, len(data), path)) data_array = DataArray(spec, data.iloc[0]) return data_array
def test_time_only_conversion(self, months, seasons): """Aggregate from months to seasons, summing groups of months """ adaptor = IntervalAdaptor('test-month-season') from_spec = Spec(name='test-var', dtype='float', dims=['months'], coords={'months': months}) adaptor.add_input(from_spec) to_spec = Spec(name='test-var', dtype='float', dims=['seasons'], coords={'seasons': seasons}) adaptor.add_output(to_spec) actual_coefficients = adaptor.generate_coefficients(from_spec, to_spec) data = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) data_array = DataArray(from_spec, data) data_handle = Mock() data_handle.get_data = Mock(return_value=data_array) data_handle.read_coefficients = Mock(return_value=actual_coefficients) adaptor.simulate(data_handle) actual = data_handle.set_results.call_args[0][1] expected = np.array([3, 3, 3, 3]) np.testing.assert_array_equal(actual, expected)
def test_aggregate_from_hour_to_day(self, twenty_four_hours, one_day): """Aggregate hours to a single value for a day """ data = np.ones((24, )) adaptor = IntervalAdaptor('test-hourly-day') from_spec = Spec(name='test-var', dtype='float', dims=['hourly_day'], coords={'hourly_day': twenty_four_hours}) adaptor.add_input(from_spec) to_spec = Spec(name='test-var', dtype='float', dims=['one_day'], coords={'one_day': one_day}) adaptor.add_output(to_spec) actual_coefficients = adaptor.generate_coefficients(from_spec, to_spec) data_array = DataArray(from_spec, data) data_handle = Mock() data_handle.get_data = Mock(return_value=data_array) data_handle.read_coefficients = Mock(return_value=actual_coefficients) adaptor.simulate(data_handle) actual = data_handle.set_results.call_args[0][1] expected = np.array([24]) assert np.allclose(actual, expected, rtol=1e-05, atol=1e-08)
def test_one_region_convert_from_hour_to_day(self, regions_rect, twenty_four_hours, one_day): """One region, time aggregation required """ data = np.ones((1, 24)) # area a, hours 0-23 expected = np.array([[24]]) # area a, day 0 adaptor = IntervalAdaptor('test-hours-day') from_spec = Spec(name='test-var', dtype='float', dims=['rect', 'twenty_four_hours'], coords={ 'twenty_four_hours': twenty_four_hours, 'rect': regions_rect }) adaptor.add_input(from_spec) to_spec = Spec(name='test-var', dtype='float', dims=['rect', 'one_day'], coords={ 'one_day': one_day, 'rect': regions_rect }) adaptor.add_output(to_spec) data_array = DataArray(from_spec, data) data_handle = Mock() data_handle.get_data = Mock(return_value=data_array) data_handle.read_coefficients = Mock(side_effect=SmifDataNotFoundError) adaptor.simulate(data_handle) actual = data_handle.set_results.call_args[0][1] assert np.allclose(actual, expected)
def test_aggregate_from_month_to_seasons(self, months, seasons, monthly_data, monthly_data_as_seasons): """Aggregate months to values for each season """ adaptor = IntervalAdaptor('test-month-season') from_spec = Spec(name='test-var', dtype='float', dims=['months'], coords={'months': months}) adaptor.add_input(from_spec) to_spec = Spec(name='test-var', dtype='float', dims=['seasons'], coords={'seasons': seasons}) adaptor.add_output(to_spec) actual_coefficients = adaptor.generate_coefficients(from_spec, to_spec) data_array = DataArray(from_spec, monthly_data) data_handle = Mock() data_handle.get_data = Mock(return_value=data_array) data_handle.read_coefficients = Mock(return_value=actual_coefficients) adaptor.simulate(data_handle) actual = data_handle.set_results.call_args[0][1] expected = monthly_data_as_seasons assert np.allclose(actual, expected, rtol=1e-05, atol=1e-08)
def test_time_only_conversion_disagg(self, months, seasons): """Disaggregate from seasons to months based on duration of each month/season """ adaptor = IntervalAdaptor('test-season-month') from_spec = Spec(name='test-var', dtype='float', dims=['seasons'], coords={'seasons': seasons}) adaptor.add_input(from_spec) to_spec = Spec(name='test-var', dtype='float', dims=['months'], coords={'months': months}) adaptor.add_output(to_spec) actual_coefficients = adaptor.generate_coefficients(from_spec, to_spec) data = np.array([3, 3, 3, 3]) data_array = DataArray(from_spec, data) data_handle = Mock() data_handle.get_data = Mock(return_value=data_array) data_handle.read_coefficients = Mock(return_value=actual_coefficients) adaptor.simulate(data_handle) actual = data_handle.set_results.call_args[0][1] expected = np.array([ 1.033333, 0.933333, 1.01087, 0.978261, 1.01087, 0.978261, 1.01087, 1.01087, 0.989011, 1.021978, 0.989011, 1.033333 ]) np.testing.assert_allclose(actual, expected, rtol=1e-3)
def sample_narrative_data(sample_narratives, get_sector_model, energy_supply_sector_model, water_supply_sector_model): narrative_data = {} sos_model_name = 'energy' sector_models = {} sector_models[get_sector_model['name']] = get_sector_model sector_models[ energy_supply_sector_model['name']] = energy_supply_sector_model sector_models[ water_supply_sector_model['name']] = water_supply_sector_model for narrative in sample_narratives: for sector_model_name, param_names in narrative['provides'].items(): sector_model = sector_models[sector_model_name] for param_name in param_names: param = _pick_from_list(sector_model['parameters'], param_name) for variant in narrative['variants']: spec = Spec.from_dict(param) nda = np.random.random(spec.shape) da = DataArray(spec, nda) key = (sos_model_name, narrative['name'], variant['name'], param_name) narrative_data[key] = da return narrative_data
def test_canonical_missing_results(self, store, sample_dimensions, get_sos_model, get_sector_model, energy_supply_sector_model, model_run): for dim in sample_dimensions: store.write_dimension(dim) store.write_sos_model(get_sos_model) store.write_model_run(model_run) store.write_model(get_sector_model) store.write_model(energy_supply_sector_model) # All the results are missing missing_results = set() missing_results.add((2015, 0, 'energy_demand', 'gas_demand')) missing_results.add((2020, 0, 'energy_demand', 'gas_demand')) missing_results.add((2025, 0, 'energy_demand', 'gas_demand')) assert (store.canonical_missing_results( model_run['name']) == missing_results) spec = Spec(name='gas_demand', dtype='float') data = np.array(1, dtype=float) fake_data = DataArray(spec, data) store.write_results(fake_data, model_run['name'], 'energy_demand', 2015, 0) missing_results.remove((2015, 0, 'energy_demand', 'gas_demand')) assert (store.canonical_missing_results( model_run['name']) == missing_results)
def set_results(self, output_name, data): """Set results values for model outputs Parameters ---------- output_name : str data : numpy.ndarray """ if hasattr(data, 'as_ndarray'): raise TypeError("Pass in a numpy array") if output_name not in self._outputs: raise KeyError("'{}' not recognised as output for '{}'".format( output_name, self._model_name)) self.logger.debug("Write %s %s %s", self._model_name, output_name, self._current_timestep) spec = self._outputs[output_name] da = DataArray(spec, data) self._store.write_results(da, self._modelrun_name, self._model_name, self._current_timestep, self._decision_iteration)
def test_convert_custom(): """Convert custom units """ data_handle = Mock() data = np.array([0.18346346], dtype=float) from_spec = Spec(name='test_variable', dtype='float', unit='mcm') to_spec = Spec(name='test_variable', dtype='float', unit='GW') data_array = DataArray(from_spec, data) data_handle.get_data = Mock(return_value=data_array) data_handle.read_unit_definitions = Mock( return_value=['mcm = 10.901353 * GW']) adaptor = UnitAdaptor('test-mcm-GW') adaptor.add_input(from_spec) adaptor.add_output(to_spec) adaptor.before_model_run( data_handle) # must have run before_model_run to register units adaptor.simulate(data_handle) actual = data_handle.set_results.call_args[0][1] expected = np.array([2], dtype=float) np.testing.assert_allclose(actual, expected)
def test_aggregate_region(self, regions_rect, regions_half_squares): """Two regions aggregated to one, one interval """ adaptor = RegionAdaptor('test-square-half') from_spec = Spec(name='test-var', dtype='float', dims=['half_squares'], coords={'half_squares': regions_half_squares}) adaptor.add_input(from_spec) to_spec = Spec(name='test-var', dtype='float', dims=['rect'], coords={'rect': regions_rect}) adaptor.add_output(to_spec) actual_coefficients = adaptor.generate_coefficients(from_spec, to_spec) expected = np.ones((2, 1)) # aggregating coefficients np.testing.assert_allclose(actual_coefficients, expected, rtol=1e-3) data = np.array([24, 24]) # area a,b data_array = DataArray(from_spec, data) data_handle = Mock() data_handle.get_data = Mock(return_value=data_array) data_handle.read_coefficients = Mock(return_value=actual_coefficients) adaptor.simulate(data_handle) actual = data_handle.set_results.call_args[0][1] expected = np.array([48]) # area zero assert np.allclose(actual, expected)
def test_narrative_data(self, setup_folder_structure, config_handler, get_narrative): """ Test to dump a narrative (yml) data-file and then read the file using the datafile interface. Finally check the data shows up in the returned dictionary. """ basefolder = setup_folder_structure filepath = os.path.join(str(basefolder), 'data', 'narratives', 'central_planning.csv') with open(filepath, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=['homogeneity_coefficient']) writer.writeheader() writer.writerow({'homogeneity_coefficient': 8}) spec = Spec.from_dict({ 'name': 'homogeneity_coefficient', 'description': "How homegenous the centralisation process is", 'absolute_range': [0, 1], 'expected_range': [0, 1], 'unit': 'percentage', 'dtype': 'float' }) actual = config_handler.read_narrative_variant_data( 'central_planning', spec) assert actual == DataArray(spec, np.array(8, dtype=float))
def test_from_df_partial(self, spec): """Should create a DataArray that can handle missing data, returning nan/null """ df = pd.DataFrame({ 'a': ['a1'], 'b': ['b1'], 'c': ['c2'], 'test_data': [1] }).set_index(['a', 'b', 'c']) expected_data = numpy.full(spec.shape, numpy.nan) expected_data[0, 0, 1] = 1.0 expected = DataArray(spec, expected_data) actual = DataArray.from_df(spec, df) assert_array_equal(actual.data, expected.data) assert actual == expected
def test_read_write_data_array(self, handler, scenario): spec_config = deepcopy(scenario['provides'][0]) spec_config['dims'] = ['timestep'] + spec_config['dims'] spec_config['coords']['timestep'] = [{'name': 2010}] spec = Spec.from_dict(spec_config) data = np.array([[0, 1]], dtype='float') da = DataArray(spec, data) handler.write_scenario_variant_data('mortality.csv', da) spec_config = deepcopy(scenario['provides'][0]) spec = Spec.from_dict(spec_config) data = np.array([0, 1], dtype='float') expected = DataArray(spec, data) actual = handler.read_scenario_variant_data('mortality.csv', spec, 2010) assert actual == expected np.testing.assert_array_equal(actual.as_ndarray(), expected.as_ndarray())
def test_error_duplicate_rows_multi_index(self): spec = Spec(name='test', dims=['a', 'b'], coords={ 'a': [1, 2], 'b': [3, 4] }, dtype='int') df = pd.DataFrame([ { 'a': 1, 'b': 3, 'test': 0 }, { 'a': 2, 'b': 3, 'test': 1 }, { 'a': 1, 'b': 4, 'test': 2 }, { 'a': 2, 'b': 4, 'test': 3 }, { 'a': 2, 'b': 4, 'test': 4 }, ]) with raises(SmifDataMismatchError) as ex: DataArray.from_df(spec, df) msg = "Data for 'test' contains duplicate values at [{'a': 2, 'b': 4}]" msg_alt = "Data for 'test' contains duplicate values at [{'b': 4, 'a': 2}]" assert msg in str(ex.value) or msg_alt in str(ex.value)
def get_sector_model_parameter_defaults(get_sector_model): """DataArray for each parameter default """ data = { 'smart_meter_savings': np.array(0.5), 'homogeneity_coefficient': np.array(0.1) } for param in get_sector_model['parameters']: nda = data[param['name']] spec = Spec.from_dict(param) data[param['name']] = DataArray(spec, nda) return data
def test_to_from_df(self): df = pd.DataFrame([{ 'test': 3, 'region': 'oxford', 'interval': 1 }]).set_index(['region', 'interval']) spec = Spec(name='test', dims=['region', 'interval'], coords={ 'region': ['oxford'], 'interval': [1] }, dtype='int64') da = DataArray(spec, numpy.array([[3.]], dtype='int64')) da_from_df = DataArray.from_df(spec, df) assert da_from_df == da da_to_df = da.as_df() pd.testing.assert_frame_equal(da_to_df, df)
def sample_gas_demand_results(lad, hourly): spec = Spec.from_dict({ 'name': 'gas_demand', 'dtype': 'float', 'dims': ['lad', 'hourly'], 'coords': { 'lad': lad, 'hourly': hourly } }) data = np.zeros(spec.shape, dtype=float) return DataArray(spec, data)
def test_combine(self, small_da, data): """Should override values where present (use case: full array of default values, overridden by a partial array of specific values). See variously: - http://xarray.pydata.org/en/stable/combining.html#merging-with-no-conflicts - https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.update.html """ partial_data = numpy.full(small_da.shape, numpy.nan) partial_data[0, 0, 1] = 99 partial = DataArray(small_da.spec, partial_data) # update in-place small_da.update(partial) # match fixture data expected_data = numpy.arange(24, dtype='float').reshape((2, 3, 4)) expected_data[0, 0, 1] = 99 expected = DataArray(small_da.spec, expected_data) assert small_da == expected assert_array_equal(small_da.data, expected.data)
def test_df_round_trip(self): spec = Spec.from_dict({ 'name': 'multi_savings', 'description': 'The savings from various technologies', 'dims': ['technology_type'], 'coords': { 'technology_type': ['water_meter', 'electricity_meter', 'other', 'aaa'] }, 'dtype': 'float', 'abs_range': (0, 100), 'exp_range': (3, 10), 'unit': '%' }) da = DataArray(spec, numpy.array([5., 6., 7., 8.])) df = pd.DataFrame([ { 'technology_type': 'water_meter', 'multi_savings': 5. }, { 'technology_type': 'electricity_meter', 'multi_savings': 6. }, { 'technology_type': 'other', 'multi_savings': 7. }, { 'technology_type': 'aaa', 'multi_savings': 8. }, ]) df = df.set_index(spec.dims) df_from_da = da.as_df() da_from_df = DataArray.from_df(spec, df_from_da) assert_array_equal(da.data, da_from_df.data)
def test_read_data_array_missing_timestep(self, handler, scenario): data = np.array([[0, 1]], dtype=float) spec_config = deepcopy(scenario['provides'][0]) spec_config['dims'] = ['timestep'] + spec_config['dims'] spec_config['coords']['timestep'] = [{'name': 2010}] spec = Spec.from_dict(spec_config) da = DataArray(spec, data) handler.write_scenario_variant_data('mortality.csv', da) msg = "not found for timestep 2011" with raises(SmifDataNotFoundError) as ex: handler.read_scenario_variant_data('mortality.csv', spec, 2011) assert msg in str(ex.value)
def test_string_data(self, handler): spec = Spec(name='string_data', dims=['timestep', 'zones'], coords={ 'timestep': [2010], 'zones': ['a', 'b', 'c'] }, dtype='object') data = np.array([['alpha', 'beta', 'γάμμα']], dtype='object') expected = DataArray(spec, data) handler.write_scenario_variant_data('key', expected) actual = handler.read_scenario_variant_data('key', spec) assert actual == expected
def read_results(self, modelrun_name, model_name, output_spec, timestep=None, decision_iteration=None): key = (modelrun_name, model_name, output_spec.name, timestep, decision_iteration) try: results = self._results[key] except KeyError: raise SmifDataNotFoundError( "Cannot find results for {}".format(key)) return DataArray(output_spec, results)
def test_remap_timeslices_to_months(self, regions_rect, seasons, months): """One region, time remapping required """ data = np.array( [[ 3, # winter month 3, # spring month 3, # summer month 3 # autumn month ]], dtype=float) expected = np.array([[ 1.03333333, 0.93333333, 1.01086957, 0.97826087, 1.01086957, 0.97826087, 1.01086957, 1.01086957, 0.98901099, 1.02197802, 0.98901099, 1.03333333 ]]) adaptor = IntervalAdaptor('test-month-remap') from_spec = Spec(name='test-var', dtype='float', dims=['rect', 'seasons'], coords={ 'seasons': seasons, 'rect': regions_rect }) adaptor.add_input(from_spec) to_spec = Spec(name='test-var', dtype='float', dims=['rect', 'months'], coords={ 'months': months, 'rect': regions_rect }) adaptor.add_output(to_spec) data_array = DataArray(from_spec, data) data_handle = Mock() data_handle.get_data = Mock(return_value=data_array) data_handle.read_coefficients = Mock(side_effect=SmifDataNotFoundError) adaptor.simulate(data_handle) actual = data_handle.set_results.call_args[0][1] assert np.allclose(actual, expected)
def test_match_metadata(self): spec = Spec(name='test', dims=['region'], coords={'region': ['oxford']}, dtype='int64') # must have a column named the same as the spec.name df = pd.DataFrame([{ 'region': 'oxford', 'other': 'else' }]).set_index(['region']) msg = "Data for 'test' expected a data column called 'test' and index names " + \ "['region'], instead got data columns ['other'] and index names ['region']" with raises(SmifDataMismatchError) as ex: DataArray.from_df(spec, df) assert msg in str(ex.value) # may not be indexed, if columns are otherwise all okay df = pd.DataFrame([{'region': 'oxford', 'test': 1}]) DataArray.from_df(spec, df) # must have an index level for each spec dimension df = pd.DataFrame([{'test': 3.14}]) msg = "Data for 'test' expected a data column called 'test' and index names " + \ "['region'], instead got data columns ['test'] and index names [None]" with raises(SmifDataMismatchError) as ex: DataArray.from_df(spec, df) assert msg in str(ex.value) # must not have dimension labels outside of the spec dimension df = pd.DataFrame([{ 'test': 3.14, 'region': 'oxford' }, { 'test': 3.14, 'region': 'extra' }]).set_index(['region']) msg = "Data for 'test' contained unexpected values in the set of coordinates for " + \ "dimension 'region': ['extra']" with raises(SmifDataMismatchError) as ex: DataArray.from_df(spec, df) assert msg in str(ex.value)