Exemple #1
0
 def test_single_dim_order(self):
     spec = Spec(name='test',
                 dims=['technology_type'],
                 coords={
                     'technology_type':
                     ['water_meter', 'electricity_meter', 'other', 'aaa']
                 },
                 dtype='float')
     df = pd.DataFrame([
         {
             'technology_type': 'water_meter',
             'test': 5
         },
         {
             'technology_type': 'electricity_meter',
             'test': 6
         },
         {
             'technology_type': 'other',
             'test': 7
         },
         {
             'technology_type': 'aaa',
             'test': 8
         },
     ])
     da = DataArray(spec, numpy.array([5., 6., 7., 8.]))
     da_from_df = DataArray.from_df(spec, df)
     da_from_df_2 = DataArray.from_df(spec, df)
     assert da == da_from_df
     assert da == da_from_df_2
Exemple #2
0
    def test_multi_dim_order(self):
        spec = Spec(name='test',
                    coords={
                        'lad': ['c', 'a', 'b'],
                        'interval': [4, 2]
                    },
                    dims=['lad', 'interval'],
                    dtype='float')
        data = numpy.array(
            [
                # 4  2
                [1, 2],  # c
                [5, 6],  # a
                [9, 0]  # b
            ],
            dtype='float')
        da = DataArray(spec, data)

        df = pd.DataFrame([
            {
                'test': 6.0,
                'lad': 'a',
                'interval': 2
            },
            {
                'test': 0.0,
                'lad': 'b',
                'interval': 2
            },
            {
                'test': 2.0,
                'lad': 'c',
                'interval': 2
            },
            {
                'test': 5.0,
                'lad': 'a',
                'interval': 4
            },
            {
                'test': 9.0,
                'lad': 'b',
                'interval': 4
            },
            {
                'test': 1.0,
                'lad': 'c',
                'interval': 4
            },
        ]).set_index(['lad', 'interval'])
        da_from_df = DataArray.from_df(spec, df)
        assert da_from_df == da

        da_to_df = da.as_df().sort_index()
        df = df.sort_index()
        pd.testing.assert_frame_equal(da_to_df, df)
Exemple #3
0
 def test_from_multiindex(self):
     spec = Spec(name='test',
                 dims=['multi'],
                 coords={'multi': ['b', 'a', 'c']},
                 dtype='float')
     index = pd.MultiIndex.from_product([['b', 'a', 'c']], names=['multi'])
     df = pd.DataFrame({'test': [1, 2, 3]}, index=index)
     da_from_df = DataArray.from_df(spec, df)
     da = DataArray(spec, numpy.array([1, 2, 3]))
     assert da == da_from_df
Exemple #4
0
    def test_scalar(self):
        # should handle zero-dimensional case (numpy array as scalar)
        data = numpy.array(2.0)
        spec = Spec(name='test', dims=[], coords={}, dtype='float')
        da = DataArray(spec, data)
        df = pd.DataFrame([{'test': 2.0}])
        da_from_df = DataArray.from_df(spec, df)
        assert da_from_df == da

        df_from_da = da.as_df()
        pd.testing.assert_frame_equal(df_from_da, df)
Exemple #5
0
    def test_df_round_trip_2d(self):
        spec = Spec.from_dict({
            'name': 'two_d',
            'dims': ['a', 'z'],
            'coords': {
                'a': ['q', 'p'],
                'z': ['a', 'c', 'b'],
            },
            'dtype': 'float'
        })
        da = DataArray(spec, numpy.array([
            [5., 6., 7.],
            [8., 9., 0.],
        ]))
        df = pd.DataFrame([
            {
                'z': 'a',
                'a': 'p',
                'two_d': 8.
            },
            {
                'z': 'c',
                'a': 'q',
                'two_d': 6.
            },
            {
                'z': 'a',
                'a': 'q',
                'two_d': 5.
            },
            {
                'z': 'b',
                'a': 'q',
                'two_d': 7.
            },
            {
                'z': 'b',
                'a': 'p',
                'two_d': 0.
            },
            {
                'z': 'c',
                'a': 'p',
                'two_d': 9.
            },
        ])
        df = df.set_index(spec.dims)
        df_from_da = da.as_df()

        da_from_df = DataArray.from_df(spec, df_from_da)
        assert_array_equal(da.data, da_from_df.data)
Exemple #6
0
    def dataframe_to_data_array(dataframe, spec, path):
        if spec.dims:
            data_array = DataArray.from_df(spec, dataframe)
        else:
            # zero-dimensional case (scalar)
            data = dataframe[spec.name]
            if data.shape != (1, ):
                msg = "Data for '{}' should contain a single value, instead got {} while " + \
                        "reading from {}"
                raise SmifDataMismatchError(
                    msg.format(spec.name, len(data), path))
            data_array = DataArray(spec, data.iloc[0])

        return data_array
Exemple #7
0
    def test_time_only_conversion(self, months, seasons):
        """Aggregate from months to seasons, summing groups of months
        """
        adaptor = IntervalAdaptor('test-month-season')
        from_spec = Spec(name='test-var',
                         dtype='float',
                         dims=['months'],
                         coords={'months': months})
        adaptor.add_input(from_spec)
        to_spec = Spec(name='test-var',
                       dtype='float',
                       dims=['seasons'],
                       coords={'seasons': seasons})
        adaptor.add_output(to_spec)
        actual_coefficients = adaptor.generate_coefficients(from_spec, to_spec)

        data = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

        data_array = DataArray(from_spec, data)

        data_handle = Mock()
        data_handle.get_data = Mock(return_value=data_array)
        data_handle.read_coefficients = Mock(return_value=actual_coefficients)

        adaptor.simulate(data_handle)
        actual = data_handle.set_results.call_args[0][1]
        expected = np.array([3, 3, 3, 3])
        np.testing.assert_array_equal(actual, expected)
Exemple #8
0
    def test_aggregate_from_hour_to_day(self, twenty_four_hours, one_day):
        """Aggregate hours to a single value for a day
        """
        data = np.ones((24, ))

        adaptor = IntervalAdaptor('test-hourly-day')
        from_spec = Spec(name='test-var',
                         dtype='float',
                         dims=['hourly_day'],
                         coords={'hourly_day': twenty_four_hours})
        adaptor.add_input(from_spec)
        to_spec = Spec(name='test-var',
                       dtype='float',
                       dims=['one_day'],
                       coords={'one_day': one_day})
        adaptor.add_output(to_spec)
        actual_coefficients = adaptor.generate_coefficients(from_spec, to_spec)

        data_array = DataArray(from_spec, data)

        data_handle = Mock()
        data_handle.get_data = Mock(return_value=data_array)
        data_handle.read_coefficients = Mock(return_value=actual_coefficients)

        adaptor.simulate(data_handle)
        actual = data_handle.set_results.call_args[0][1]
        expected = np.array([24])

        assert np.allclose(actual, expected, rtol=1e-05, atol=1e-08)
Exemple #9
0
    def test_one_region_convert_from_hour_to_day(self, regions_rect,
                                                 twenty_four_hours, one_day):
        """One region, time aggregation required
        """
        data = np.ones((1, 24))  # area a, hours 0-23
        expected = np.array([[24]])  # area a, day 0

        adaptor = IntervalAdaptor('test-hours-day')
        from_spec = Spec(name='test-var',
                         dtype='float',
                         dims=['rect', 'twenty_four_hours'],
                         coords={
                             'twenty_four_hours': twenty_four_hours,
                             'rect': regions_rect
                         })
        adaptor.add_input(from_spec)
        to_spec = Spec(name='test-var',
                       dtype='float',
                       dims=['rect', 'one_day'],
                       coords={
                           'one_day': one_day,
                           'rect': regions_rect
                       })
        adaptor.add_output(to_spec)

        data_array = DataArray(from_spec, data)

        data_handle = Mock()
        data_handle.get_data = Mock(return_value=data_array)
        data_handle.read_coefficients = Mock(side_effect=SmifDataNotFoundError)

        adaptor.simulate(data_handle)
        actual = data_handle.set_results.call_args[0][1]

        assert np.allclose(actual, expected)
Exemple #10
0
    def test_aggregate_from_month_to_seasons(self, months, seasons,
                                             monthly_data,
                                             monthly_data_as_seasons):
        """Aggregate months to values for each season
        """
        adaptor = IntervalAdaptor('test-month-season')
        from_spec = Spec(name='test-var',
                         dtype='float',
                         dims=['months'],
                         coords={'months': months})
        adaptor.add_input(from_spec)
        to_spec = Spec(name='test-var',
                       dtype='float',
                       dims=['seasons'],
                       coords={'seasons': seasons})
        adaptor.add_output(to_spec)
        actual_coefficients = adaptor.generate_coefficients(from_spec, to_spec)

        data_array = DataArray(from_spec, monthly_data)

        data_handle = Mock()
        data_handle.get_data = Mock(return_value=data_array)
        data_handle.read_coefficients = Mock(return_value=actual_coefficients)

        adaptor.simulate(data_handle)
        actual = data_handle.set_results.call_args[0][1]
        expected = monthly_data_as_seasons

        assert np.allclose(actual, expected, rtol=1e-05, atol=1e-08)
Exemple #11
0
    def test_time_only_conversion_disagg(self, months, seasons):
        """Disaggregate from seasons to months based on duration of each month/season
        """
        adaptor = IntervalAdaptor('test-season-month')
        from_spec = Spec(name='test-var',
                         dtype='float',
                         dims=['seasons'],
                         coords={'seasons': seasons})
        adaptor.add_input(from_spec)
        to_spec = Spec(name='test-var',
                       dtype='float',
                       dims=['months'],
                       coords={'months': months})
        adaptor.add_output(to_spec)
        actual_coefficients = adaptor.generate_coefficients(from_spec, to_spec)

        data = np.array([3, 3, 3, 3])
        data_array = DataArray(from_spec, data)
        data_handle = Mock()
        data_handle.get_data = Mock(return_value=data_array)
        data_handle.read_coefficients = Mock(return_value=actual_coefficients)

        adaptor.simulate(data_handle)
        actual = data_handle.set_results.call_args[0][1]
        expected = np.array([
            1.033333, 0.933333, 1.01087, 0.978261, 1.01087, 0.978261, 1.01087,
            1.01087, 0.989011, 1.021978, 0.989011, 1.033333
        ])
        np.testing.assert_allclose(actual, expected, rtol=1e-3)
Exemple #12
0
def sample_narrative_data(sample_narratives, get_sector_model,
                          energy_supply_sector_model,
                          water_supply_sector_model):
    narrative_data = {}
    sos_model_name = 'energy'
    sector_models = {}
    sector_models[get_sector_model['name']] = get_sector_model
    sector_models[
        energy_supply_sector_model['name']] = energy_supply_sector_model
    sector_models[
        water_supply_sector_model['name']] = water_supply_sector_model

    for narrative in sample_narratives:
        for sector_model_name, param_names in narrative['provides'].items():
            sector_model = sector_models[sector_model_name]
            for param_name in param_names:
                param = _pick_from_list(sector_model['parameters'], param_name)
                for variant in narrative['variants']:
                    spec = Spec.from_dict(param)
                    nda = np.random.random(spec.shape)
                    da = DataArray(spec, nda)
                    key = (sos_model_name, narrative['name'], variant['name'],
                           param_name)
                    narrative_data[key] = da
    return narrative_data
Exemple #13
0
    def test_canonical_missing_results(self, store, sample_dimensions,
                                       get_sos_model, get_sector_model,
                                       energy_supply_sector_model, model_run):

        for dim in sample_dimensions:
            store.write_dimension(dim)
        store.write_sos_model(get_sos_model)
        store.write_model_run(model_run)
        store.write_model(get_sector_model)
        store.write_model(energy_supply_sector_model)

        # All the results are missing
        missing_results = set()
        missing_results.add((2015, 0, 'energy_demand', 'gas_demand'))
        missing_results.add((2020, 0, 'energy_demand', 'gas_demand'))
        missing_results.add((2025, 0, 'energy_demand', 'gas_demand'))

        assert (store.canonical_missing_results(
            model_run['name']) == missing_results)

        spec = Spec(name='gas_demand', dtype='float')
        data = np.array(1, dtype=float)
        fake_data = DataArray(spec, data)

        store.write_results(fake_data, model_run['name'], 'energy_demand',
                            2015, 0)
        missing_results.remove((2015, 0, 'energy_demand', 'gas_demand'))

        assert (store.canonical_missing_results(
            model_run['name']) == missing_results)
Exemple #14
0
    def set_results(self, output_name, data):
        """Set results values for model outputs

        Parameters
        ----------
        output_name : str
        data : numpy.ndarray
        """
        if hasattr(data, 'as_ndarray'):
            raise TypeError("Pass in a numpy array")

        if output_name not in self._outputs:
            raise KeyError("'{}' not recognised as output for '{}'".format(
                output_name, self._model_name))

        self.logger.debug("Write %s %s %s", self._model_name, output_name,
                          self._current_timestep)

        spec = self._outputs[output_name]

        da = DataArray(spec, data)

        self._store.write_results(da, self._modelrun_name, self._model_name,
                                  self._current_timestep,
                                  self._decision_iteration)
Exemple #15
0
def test_convert_custom():
    """Convert custom units
    """
    data_handle = Mock()
    data = np.array([0.18346346], dtype=float)

    from_spec = Spec(name='test_variable', dtype='float', unit='mcm')

    to_spec = Spec(name='test_variable', dtype='float', unit='GW')

    data_array = DataArray(from_spec, data)

    data_handle.get_data = Mock(return_value=data_array)
    data_handle.read_unit_definitions = Mock(
        return_value=['mcm = 10.901353 * GW'])

    adaptor = UnitAdaptor('test-mcm-GW')
    adaptor.add_input(from_spec)
    adaptor.add_output(to_spec)
    adaptor.before_model_run(
        data_handle)  # must have run before_model_run to register units
    adaptor.simulate(data_handle)

    actual = data_handle.set_results.call_args[0][1]
    expected = np.array([2], dtype=float)
    np.testing.assert_allclose(actual, expected)
Exemple #16
0
    def test_aggregate_region(self, regions_rect, regions_half_squares):
        """Two regions aggregated to one, one interval
        """
        adaptor = RegionAdaptor('test-square-half')
        from_spec = Spec(name='test-var',
                         dtype='float',
                         dims=['half_squares'],
                         coords={'half_squares': regions_half_squares})
        adaptor.add_input(from_spec)
        to_spec = Spec(name='test-var',
                       dtype='float',
                       dims=['rect'],
                       coords={'rect': regions_rect})
        adaptor.add_output(to_spec)

        actual_coefficients = adaptor.generate_coefficients(from_spec, to_spec)
        expected = np.ones((2, 1))  # aggregating coefficients
        np.testing.assert_allclose(actual_coefficients, expected, rtol=1e-3)

        data = np.array([24, 24])  # area a,b

        data_array = DataArray(from_spec, data)

        data_handle = Mock()
        data_handle.get_data = Mock(return_value=data_array)
        data_handle.read_coefficients = Mock(return_value=actual_coefficients)
        adaptor.simulate(data_handle)

        actual = data_handle.set_results.call_args[0][1]
        expected = np.array([48])  # area zero
        assert np.allclose(actual, expected)
Exemple #17
0
    def test_narrative_data(self, setup_folder_structure, config_handler,
                            get_narrative):
        """ Test to dump a narrative (yml) data-file and then read the file
        using the datafile interface. Finally check the data shows up in the
        returned dictionary.
        """
        basefolder = setup_folder_structure
        filepath = os.path.join(str(basefolder), 'data', 'narratives',
                                'central_planning.csv')
        with open(filepath, 'w') as csvfile:
            writer = csv.DictWriter(csvfile,
                                    fieldnames=['homogeneity_coefficient'])
            writer.writeheader()
            writer.writerow({'homogeneity_coefficient': 8})

        spec = Spec.from_dict({
            'name': 'homogeneity_coefficient',
            'description': "How homegenous the centralisation process is",
            'absolute_range': [0, 1],
            'expected_range': [0, 1],
            'unit': 'percentage',
            'dtype': 'float'
        })

        actual = config_handler.read_narrative_variant_data(
            'central_planning', spec)
        assert actual == DataArray(spec, np.array(8, dtype=float))
Exemple #18
0
    def test_from_df_partial(self, spec):
        """Should create a DataArray that can handle missing data, returning nan/null
        """
        df = pd.DataFrame({
            'a': ['a1'],
            'b': ['b1'],
            'c': ['c2'],
            'test_data': [1]
        }).set_index(['a', 'b', 'c'])
        expected_data = numpy.full(spec.shape, numpy.nan)
        expected_data[0, 0, 1] = 1.0
        expected = DataArray(spec, expected_data)

        actual = DataArray.from_df(spec, df)

        assert_array_equal(actual.data, expected.data)
        assert actual == expected
Exemple #19
0
    def test_read_write_data_array(self, handler, scenario):
        spec_config = deepcopy(scenario['provides'][0])
        spec_config['dims'] = ['timestep'] + spec_config['dims']
        spec_config['coords']['timestep'] = [{'name': 2010}]
        spec = Spec.from_dict(spec_config)
        data = np.array([[0, 1]], dtype='float')
        da = DataArray(spec, data)
        handler.write_scenario_variant_data('mortality.csv', da)

        spec_config = deepcopy(scenario['provides'][0])
        spec = Spec.from_dict(spec_config)
        data = np.array([0, 1], dtype='float')
        expected = DataArray(spec, data)

        actual = handler.read_scenario_variant_data('mortality.csv', spec,
                                                    2010)
        assert actual == expected
        np.testing.assert_array_equal(actual.as_ndarray(),
                                      expected.as_ndarray())
Exemple #20
0
    def test_error_duplicate_rows_multi_index(self):
        spec = Spec(name='test',
                    dims=['a', 'b'],
                    coords={
                        'a': [1, 2],
                        'b': [3, 4]
                    },
                    dtype='int')
        df = pd.DataFrame([
            {
                'a': 1,
                'b': 3,
                'test': 0
            },
            {
                'a': 2,
                'b': 3,
                'test': 1
            },
            {
                'a': 1,
                'b': 4,
                'test': 2
            },
            {
                'a': 2,
                'b': 4,
                'test': 3
            },
            {
                'a': 2,
                'b': 4,
                'test': 4
            },
        ])

        with raises(SmifDataMismatchError) as ex:
            DataArray.from_df(spec, df)

        msg = "Data for 'test' contains duplicate values at [{'a': 2, 'b': 4}]"
        msg_alt = "Data for 'test' contains duplicate values at [{'b': 4, 'a': 2}]"
        assert msg in str(ex.value) or msg_alt in str(ex.value)
Exemple #21
0
def get_sector_model_parameter_defaults(get_sector_model):
    """DataArray for each parameter default
    """
    data = {
        'smart_meter_savings': np.array(0.5),
        'homogeneity_coefficient': np.array(0.1)
    }
    for param in get_sector_model['parameters']:
        nda = data[param['name']]
        spec = Spec.from_dict(param)
        data[param['name']] = DataArray(spec, nda)
    return data
Exemple #22
0
    def test_to_from_df(self):
        df = pd.DataFrame([{
            'test': 3,
            'region': 'oxford',
            'interval': 1
        }]).set_index(['region', 'interval'])

        spec = Spec(name='test',
                    dims=['region', 'interval'],
                    coords={
                        'region': ['oxford'],
                        'interval': [1]
                    },
                    dtype='int64')

        da = DataArray(spec, numpy.array([[3.]], dtype='int64'))
        da_from_df = DataArray.from_df(spec, df)
        assert da_from_df == da

        da_to_df = da.as_df()
        pd.testing.assert_frame_equal(da_to_df, df)
Exemple #23
0
def sample_gas_demand_results(lad, hourly):

    spec = Spec.from_dict({
        'name': 'gas_demand',
        'dtype': 'float',
        'dims': ['lad', 'hourly'],
        'coords': {
            'lad': lad,
            'hourly': hourly
        }
    })
    data = np.zeros(spec.shape, dtype=float)
    return DataArray(spec, data)
Exemple #24
0
    def test_combine(self, small_da, data):
        """Should override values where present (use case: full array of default values,
        overridden by a partial array of specific values).

        See variously:
        - http://xarray.pydata.org/en/stable/combining.html#merging-with-no-conflicts
        - https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.update.html
        """
        partial_data = numpy.full(small_da.shape, numpy.nan)
        partial_data[0, 0, 1] = 99
        partial = DataArray(small_da.spec, partial_data)

        # update in-place
        small_da.update(partial)

        # match fixture data
        expected_data = numpy.arange(24, dtype='float').reshape((2, 3, 4))
        expected_data[0, 0, 1] = 99
        expected = DataArray(small_da.spec, expected_data)

        assert small_da == expected
        assert_array_equal(small_da.data, expected.data)
Exemple #25
0
    def test_df_round_trip(self):
        spec = Spec.from_dict({
            'name': 'multi_savings',
            'description': 'The savings from various technologies',
            'dims': ['technology_type'],
            'coords': {
                'technology_type':
                ['water_meter', 'electricity_meter', 'other', 'aaa']
            },
            'dtype': 'float',
            'abs_range': (0, 100),
            'exp_range': (3, 10),
            'unit': '%'
        })
        da = DataArray(spec, numpy.array([5., 6., 7., 8.]))
        df = pd.DataFrame([
            {
                'technology_type': 'water_meter',
                'multi_savings': 5.
            },
            {
                'technology_type': 'electricity_meter',
                'multi_savings': 6.
            },
            {
                'technology_type': 'other',
                'multi_savings': 7.
            },
            {
                'technology_type': 'aaa',
                'multi_savings': 8.
            },
        ])
        df = df.set_index(spec.dims)
        df_from_da = da.as_df()

        da_from_df = DataArray.from_df(spec, df_from_da)
        assert_array_equal(da.data, da_from_df.data)
Exemple #26
0
    def test_read_data_array_missing_timestep(self, handler, scenario):
        data = np.array([[0, 1]], dtype=float)
        spec_config = deepcopy(scenario['provides'][0])
        spec_config['dims'] = ['timestep'] + spec_config['dims']
        spec_config['coords']['timestep'] = [{'name': 2010}]
        spec = Spec.from_dict(spec_config)

        da = DataArray(spec, data)

        handler.write_scenario_variant_data('mortality.csv', da)
        msg = "not found for timestep 2011"
        with raises(SmifDataNotFoundError) as ex:
            handler.read_scenario_variant_data('mortality.csv', spec, 2011)
        assert msg in str(ex.value)
Exemple #27
0
    def test_string_data(self, handler):
        spec = Spec(name='string_data',
                    dims=['timestep', 'zones'],
                    coords={
                        'timestep': [2010],
                        'zones': ['a', 'b', 'c']
                    },
                    dtype='object')
        data = np.array([['alpha', 'beta', 'γάμμα']], dtype='object')
        expected = DataArray(spec, data)

        handler.write_scenario_variant_data('key', expected)
        actual = handler.read_scenario_variant_data('key', spec)
        assert actual == expected
Exemple #28
0
    def read_results(self,
                     modelrun_name,
                     model_name,
                     output_spec,
                     timestep=None,
                     decision_iteration=None):
        key = (modelrun_name, model_name, output_spec.name, timestep,
               decision_iteration)

        try:
            results = self._results[key]
        except KeyError:
            raise SmifDataNotFoundError(
                "Cannot find results for {}".format(key))

        return DataArray(output_spec, results)
Exemple #29
0
    def test_remap_timeslices_to_months(self, regions_rect, seasons, months):
        """One region, time remapping required
        """
        data = np.array(
            [[
                3,  # winter month
                3,  # spring month
                3,  # summer month
                3  # autumn month
            ]],
            dtype=float)
        expected = np.array([[
            1.03333333, 0.93333333, 1.01086957, 0.97826087, 1.01086957,
            0.97826087, 1.01086957, 1.01086957, 0.98901099, 1.02197802,
            0.98901099, 1.03333333
        ]])

        adaptor = IntervalAdaptor('test-month-remap')
        from_spec = Spec(name='test-var',
                         dtype='float',
                         dims=['rect', 'seasons'],
                         coords={
                             'seasons': seasons,
                             'rect': regions_rect
                         })
        adaptor.add_input(from_spec)
        to_spec = Spec(name='test-var',
                       dtype='float',
                       dims=['rect', 'months'],
                       coords={
                           'months': months,
                           'rect': regions_rect
                       })
        adaptor.add_output(to_spec)

        data_array = DataArray(from_spec, data)

        data_handle = Mock()
        data_handle.get_data = Mock(return_value=data_array)
        data_handle.read_coefficients = Mock(side_effect=SmifDataNotFoundError)

        adaptor.simulate(data_handle)
        actual = data_handle.set_results.call_args[0][1]

        assert np.allclose(actual, expected)
Exemple #30
0
    def test_match_metadata(self):
        spec = Spec(name='test',
                    dims=['region'],
                    coords={'region': ['oxford']},
                    dtype='int64')

        # must have a column named the same as the spec.name
        df = pd.DataFrame([{
            'region': 'oxford',
            'other': 'else'
        }]).set_index(['region'])
        msg = "Data for 'test' expected a data column called 'test' and index names " + \
              "['region'], instead got data columns ['other'] and index names ['region']"
        with raises(SmifDataMismatchError) as ex:
            DataArray.from_df(spec, df)
        assert msg in str(ex.value)

        # may not be indexed, if columns are otherwise all okay
        df = pd.DataFrame([{'region': 'oxford', 'test': 1}])
        DataArray.from_df(spec, df)

        # must have an index level for each spec dimension
        df = pd.DataFrame([{'test': 3.14}])
        msg = "Data for 'test' expected a data column called 'test' and index names " + \
              "['region'], instead got data columns ['test'] and index names [None]"
        with raises(SmifDataMismatchError) as ex:
            DataArray.from_df(spec, df)
        assert msg in str(ex.value)

        # must not have dimension labels outside of the spec dimension
        df = pd.DataFrame([{
            'test': 3.14,
            'region': 'oxford'
        }, {
            'test': 3.14,
            'region': 'extra'
        }]).set_index(['region'])
        msg = "Data for 'test' contained unexpected values in the set of coordinates for " + \
              "dimension 'region': ['extra']"
        with raises(SmifDataMismatchError) as ex:
            DataArray.from_df(spec, df)
        assert msg in str(ex.value)