def test_order_zero_2d_fails_on_extrapolation(): a = np.mgrid[0:5, 0:5][0].reshape(25) b = np.mgrid[0:5, 0:5][1].reshape(25) df = pd.DataFrame({ 'a': a + 0.5, 'b': b + 0.5, 'c': b * 3, 'garbage': ['test'] * len(a) }) df = make_bin_edges(df, 'a') df = make_bin_edges(df, 'b') df = df.sample( frac=1 ) # Shuffle table to assure interpolation works given unsorted input i = Interpolation(df, ('garbage', ), [('a', 'a_left', 'a_right'), ('b', 'b_left', 'b_right')], order=0, extrapolate=False) column = np.arange(4, step=0.011) query = pd.DataFrame({ 'a': column, 'b': column, 'garbage': ['test'] * (len(column)) }) with pytest.raises(ValueError) as error: i(query) message = error.value.args[0] assert 'Extrapolation' in message and 'a' in message
def test_order_zero_2d(): a = np.mgrid[0:5, 0:5][0].reshape(25) b = np.mgrid[0:5, 0:5][1].reshape(25) df = pd.DataFrame({ 'a': a + 0.5, 'b': b + 0.5, 'c': b * 3, 'garbage': ['test'] * len(a) }) df = make_bin_edges(df, 'a') df = make_bin_edges(df, 'b') df = df.sample( frac=1 ) # Shuffle table to assure interpolation works given unsorted input i = Interpolation(df, ('garbage', ), [('a', 'a_left', 'a_right'), ('b', 'b_left', 'b_right')], order=0, extrapolate=True) column = np.arange(0.5, 4, step=0.011) query = pd.DataFrame({ 'a': column, 'b': column, 'garbage': ['test'] * (len(column)) }) assert np.allclose(query.b.astype(int) * 3, i(query).c)
def test_interpolation_with_function(): df = pd.DataFrame({'a': np.arange(100), 'b': np.arange(100), 'c': np.arange(100, 0, -1)}) df = df.sample(frac=1) # Shuffle table to assure interpolation works given unsorted input i = Interpolation(df, (), ('a',), func=lambda x: x * 2) query = pd.DataFrame({'a': np.arange(100, step=0.01)}) assert np.allclose(query.a * 2, i(query).b)
def test_order_zero_1d(): s = pd.Series({0: 0, 1: 1}).reset_index() f = Interpolation(s, tuple(), ('index', ), order=0) assert f(index=[0])[0] == 0, 'should be precise at index values' assert f(index=[1])[0] == 1 assert f( index=[2] )[0] == 1, 'should be constant extrapolation outside of input range' assert f(index=[-1])[0] == 0
def test_2d_interpolation(): a = np.mgrid[0:5,0:5][0].reshape(25) b = np.mgrid[0:5,0:5][1].reshape(25) df = pd.DataFrame({'a': a, 'b': b, 'c': b, 'd': a}) df = df.sample(frac=1) # Shuffle table to assure interpolation works given unsorted input i = Interpolation(df, (), ('a', 'b')) query = pd.DataFrame({'a': np.arange(4, step=0.01), 'b': np.arange(4, step=0.01)}) assert np.allclose(query.b, i(query).c) assert np.allclose(query.a, i(query).d)
def test_order_zero_1d_constant_extrapolation(): s = pd.Series({0: 0, 1: 1}).reset_index() s = make_bin_edges(s, 'index') f = Interpolation(s, tuple(), [['index', 'index_left', 'index_right']], order=0, extrapolate=True) assert f(pd.DataFrame({'index': [1]}))[0][0] == 1 assert f(pd.DataFrame({ 'index': [2] }))[0][0] == 1, 'should be constant extrapolation outside of input range' assert f(pd.DataFrame({'index': [-1]}))[0][0] == 0
def test_interpolation_called_missing_key_col(): a = [range(1990, 1995), range(25, 30), ['Male', 'Female']] df = pd.DataFrame(list(itertools.product(*a)), columns=['year', 'age', 'sex']) df['pop'] = df.age * 11.1 df = df.sample( frac=1 ) # Shuffle table to assure interpolation works given unsorted input i = Interpolation(df, [ 'sex', ], ['year', 'age'], 1, True) query = pd.DataFrame({'year': [1990, 1990], 'age': [35, 35]}) with pytest.raises(ValueError): i(query)
def test_interpolation_with_categorical_parameters(): a = ['one']*100 + ['two']*100 b = np.append(np.arange(100), np.arange(100)) c = np.append(np.arange(100), np.arange(100, 0, -1)) df = pd.DataFrame({'a': a, 'b': b, 'c': c}) df = df.sample(frac=1) # Shuffle table to assure interpolation works given unsorted input i = Interpolation(df, ('a',), ('b',)) query_one = pd.DataFrame({'a': 'one', 'b': np.arange(100, step=0.01)}) query_two = pd.DataFrame({'a': 'two', 'b': np.arange(100, step=0.01)}) assert np.allclose(np.arange(100, step=0.01), i(query_one)) assert np.allclose(np.arange(100, 0, step=-0.01), i(query_two))
def test_age_year_interpolation(): years = list(range(1990,2010)) ages = list(range(0,90)) pops = np.array(ages)*11.1 data = [] for age, pop in zip(ages, pops): for year in years: for sex in ['Male', 'Female']: data.append({'age':age, 'sex':sex, 'year':year, 'pop':pop}) df = pd.DataFrame(data) df = df.sample(frac=1) # Shuffle table to assure interpolation works given unsorted input i = Interpolation(df, ('sex', 'age'), ('year',)) assert np.allclose(i(year=[1990,1990], age=[35,35], sex=['Male', 'Female']), 388.5)
def test_1d_interpolation(): df = pd.DataFrame({ 'a': np.arange(100), 'b': np.arange(100), 'c': np.arange(100, 0, -1) }) df = df.sample( frac=1 ) # Shuffle table to assure interpolation works given unsorted input i = Interpolation(df, (), ('a', ), 1, True) query = pd.DataFrame({'a': np.arange(100, step=0.01)}) assert np.allclose(query.a, i(query).b) assert np.allclose(100 - query.a, i(query).c)
def test_order_zero_1d_no_extrapolation(): s = pd.Series({0: 0, 1: 1}).reset_index() s = make_bin_edges(s, 'index') f = Interpolation(s, tuple(), [['index', 'index_left', 'index_right']], order=0, extrapolate=False) assert f(pd.DataFrame({'index': [0] }))[0][0] == 0, 'should be precise at index values' assert f(pd.DataFrame({'index': [0.999]}))[0][0] == 1 with pytest.raises(ValueError) as error: f(pd.DataFrame({'index': [1]})) message = error.value.args[0] assert 'Extrapolation' in message and 'index' in message
def build_table(self, data, key_columns=('sex', ), parameter_columns=('age', 'year'), interpolation_order=1): """Construct a TableView from a ``pandas.DataFrame``. An interpolation function of the specified order will be calculated for each permutation of the set of key_columns. The columns in parameter_columns will be used as parameters for the interpolation functions which will estimate all remaining columns in the table. If parameter_columns is empty then no interpolation will be attempted and the data will be delegated to MergedTableManager.build_table. Parameters ---------- data : pandas.DataFrame The source data which will be accessible through the resulting TableView. key_columns : [str] Columns used to select between interpolation functions. These should be the non-continuous variables in the data. For example 'sex' in data about a population. parameter_columns : [str] The columns which contain the parameters to the interpolation functions. These should be the continuous variables. For example 'age' in data about a population. interpolation_order : int The order of the interpolation function. Defaults to linear. Returns ------- TableView """ if isinstance(data, Number): return ScalarView(data) data = data if isinstance(data, Interpolation) else Interpolation( data, key_columns, parameter_columns, order=interpolation_order) view_columns = sorted((set(key_columns) | set(parameter_columns)) - {'year'}) return InterpolatedTableView( data, self._pop_view_builder(view_columns), self.clock if 'year' in parameter_columns else None)
def __init__(self, data: pd.DataFrame, population_view: PopulationView, key_columns: Union[List[str], Tuple[str]], parameter_columns: Union[List[str], Tuple], value_columns: Union[List[str], Tuple[str]], interpolation_order: int, clock: Callable, extrapolate: bool): self.data = data self.population_view = population_view self.key_columns = key_columns param_cols_with_edges = [] for p in parameter_columns: param_cols_with_edges += [(p, f'{p}_start', f'{p}_end')] self.parameter_columns = param_cols_with_edges self.interpolation_order = interpolation_order self.value_columns = value_columns self.clock = clock self.extrapolate = extrapolate self.interpolation = Interpolation(data, self.key_columns, self.parameter_columns, order=self.interpolation_order, extrapolate=self.extrapolate)
def test_order_zero_diff_bin_sizes(): data = pd.DataFrame({ 'year_start': [ 1990, 1995, 1996, 2005, 2005.5, ], 'year_end': [1995, 1996, 2005, 2005.5, 2010], 'value': [1, 5, 2.3, 6, 100] }) i = Interpolation(data, tuple(), [('year', 'year_start', 'year_end')], 0, False) query = pd.DataFrame( {'year': [2007, 1990, 2005.4, 1994, 2004, 1995, 2002, 1995.5, 1996]}) expected_result = pd.DataFrame( {'value': [100, 1, 6, 1, 2.3, 5, 2.3, 5, 2.3]}) assert i(query).equals(expected_result)