Ejemplo n.º 1
0
def test_order_zero_3d_with_key_col():
    data = pd.DataFrame({
        'year_start': [1990, 1990, 1990, 1990, 1995, 1995, 1995, 1995] * 2,
        'year_end': [1995, 1995, 1995, 1995, 2000, 2000, 2000, 2000] * 2,
        'age_start': [15, 10, 10, 15, 10, 10, 15, 15] * 2,
        'age_end': [20, 15, 15, 20, 15, 15, 20, 20] * 2,
        'height_start': [140, 160, 140, 160, 140, 160, 140, 160] * 2,
        'height_end': [160, 180, 160, 180, 160, 180, 160, 180] * 2,
        'sex': ['Male'] * 8 + ['Female'] * 8,
        'value': [5, 3, 1, 7, 8, 6, 4, 2, 6, 4, 2, 8, 9, 7, 5, 3]
    })

    interp = Interpolation(data, ('sex', ),
                           [('age', 'age_start', 'age_end'),
                            ('year', 'year_start', 'year_end'),
                            ('height', 'height_start', 'height_end')], 0, True)

    interpolants = pd.DataFrame(
        {
            'age': [12, 17, 8, 24, 12],
            'year': [1992, 1998, 1985, 1992, 1992],
            'height': [160, 145, 140, 185, 160],
            'sex': ['Male', 'Female', 'Female', 'Male', 'Male']
        },
        index=[10, 4, 7, 0, 9])

    result = interp(interpolants)
    assert result.equals(
        pd.DataFrame({'value': [3.0, 5.0, 2.0, 7.0, 3.0]},
                     index=[10, 4, 7, 0, 9]))
Ejemplo n.º 2
0
def test_order_zero_2d():
    a = np.mgrid[0:5, 0:5][0].reshape(25)
    b = np.mgrid[0:5, 0:5][1].reshape(25)
    df = pd.DataFrame({
        'a': a + 0.5,
        'b': b + 0.5,
        'c': b * 3,
        'garbage': ['test'] * len(a)
    })
    df = make_bin_edges(df, 'a')
    df = make_bin_edges(df, 'b')
    df = df.sample(
        frac=1
    )  # Shuffle table to assure interpolation works given unsorted input

    i = Interpolation(df, ('garbage', ), [('a', 'a_left', 'a_right'),
                                          ('b', 'b_left', 'b_right')],
                      order=0,
                      extrapolate=True)

    column = np.arange(0.5, 4, step=0.011)
    query = pd.DataFrame({
        'a': column,
        'b': column,
        'garbage': ['test'] * (len(column))
    })

    assert np.allclose(query.b.astype(int) * 3, i(query).c)
Ejemplo n.º 3
0
def test_order_zero_1d_with_key_column():
    data = pd.DataFrame({
        'year_start': [1990, 1990, 1995, 1995],
        'year_end': [1995, 1995, 2000, 2000],
        'sex': ['Male', 'Female', 'Male', 'Female'],
        'value_1': [10, 7, 2, 12],
        'value_2': [1200, 1350, 1476, 1046]
    })

    i = Interpolation(data, [
        'sex',
    ], [
        ('year', 'year_start', 'year_end'),
    ], 0, True)

    query = pd.DataFrame({
        'year': [
            1992,
            1993,
        ],
        'sex': ['Male', 'Female']
    })

    expected_result = pd.DataFrame({
        'value_1': [10.0, 7.0],
        'value_2': [1200.0, 1350.0]
    })

    assert i(query).equals(expected_result)
Ejemplo n.º 4
0
def test_order_zero_non_numeric_values():
    data = pd.DataFrame({
        'year_start': [1990, 1990],
        'year_end': [1995, 1995],
        'age_start': [
            15,
            24,
        ],
        'age_end': [24, 30],
        'value_1': ['blue', 'red']
    })

    i = Interpolation(data, tuple(), [('year', 'year_start', 'year_end'),
                                      ('age', 'age_start', 'age_end')], 0,
                      True)

    query = pd.DataFrame({
        'year': [1990, 1990],
        'age': [
            15,
            24,
        ]
    },
                         index=[1, 0])

    expected_result = pd.DataFrame({'value_1': ['blue', 'red']}, index=[1, 0])

    assert i(query).equals(expected_result)
Ejemplo n.º 5
0
def test_order_zero_2d_fails_on_extrapolation():
    a = np.mgrid[0:5, 0:5][0].reshape(25)
    b = np.mgrid[0:5, 0:5][1].reshape(25)
    df = pd.DataFrame({
        'a': a + 0.5,
        'b': b + 0.5,
        'c': b * 3,
        'garbage': ['test'] * len(a)
    })
    df = make_bin_edges(df, 'a')
    df = make_bin_edges(df, 'b')
    df = df.sample(
        frac=1
    )  # Shuffle table to assure interpolation works given unsorted input

    i = Interpolation(df, ('garbage', ), [('a', 'a_left', 'a_right'),
                                          ('b', 'b_left', 'b_right')],
                      order=0,
                      extrapolate=False)

    column = np.arange(4, step=0.011)
    query = pd.DataFrame({
        'a': column,
        'b': column,
        'garbage': ['test'] * (len(column))
    })

    with pytest.raises(ValueError) as error:
        i(query)

    message = error.value.args[0]

    assert 'Extrapolation' in message and 'a' in message
Ejemplo n.º 6
0
def test_interpolation_with_function():
    df = pd.DataFrame({'a': np.arange(100), 'b': np.arange(100), 'c': np.arange(100, 0, -1)})
    df = df.sample(frac=1) # Shuffle table to assure interpolation works given unsorted input

    i = Interpolation(df, (), ('a',), func=lambda x: x * 2)

    query = pd.DataFrame({'a': np.arange(100, step=0.01)})

    assert np.allclose(query.a * 2, i(query).b)
Ejemplo n.º 7
0
def test_order_zero_1d():
    s = pd.Series({0: 0, 1: 1}).reset_index()
    f = Interpolation(s, tuple(), ('index', ), order=0)

    assert f(index=[0])[0] == 0, 'should be precise at index values'
    assert f(index=[1])[0] == 1
    assert f(
        index=[2]
    )[0] == 1, 'should be constant extrapolation outside of input range'
    assert f(index=[-1])[0] == 0
Ejemplo n.º 8
0
def test_2d_interpolation():
    a = np.mgrid[0:5,0:5][0].reshape(25)
    b = np.mgrid[0:5,0:5][1].reshape(25)
    df = pd.DataFrame({'a': a, 'b': b, 'c': b, 'd': a})
    df = df.sample(frac=1) # Shuffle table to assure interpolation works given unsorted input

    i = Interpolation(df, (), ('a', 'b'))

    query = pd.DataFrame({'a': np.arange(4, step=0.01), 'b': np.arange(4, step=0.01)})

    assert np.allclose(query.b, i(query).c)
    assert np.allclose(query.a, i(query).d)
Ejemplo n.º 9
0
def test_order_zero_1d_constant_extrapolation():
    s = pd.Series({0: 0, 1: 1}).reset_index()
    s = make_bin_edges(s, 'index')
    f = Interpolation(s,
                      tuple(), [['index', 'index_left', 'index_right']],
                      order=0,
                      extrapolate=True)

    assert f(pd.DataFrame({'index': [1]}))[0][0] == 1
    assert f(pd.DataFrame({
        'index': [2]
    }))[0][0] == 1, 'should be constant extrapolation outside of input range'
    assert f(pd.DataFrame({'index': [-1]}))[0][0] == 0
Ejemplo n.º 10
0
def test_interpolation_called_missing_key_col():
    a = [range(1990, 1995), range(25, 30), ['Male', 'Female']]
    df = pd.DataFrame(list(itertools.product(*a)),
                      columns=['year', 'age', 'sex'])
    df['pop'] = df.age * 11.1
    df = df.sample(
        frac=1
    )  # Shuffle table to assure interpolation works given unsorted input
    i = Interpolation(df, [
        'sex',
    ], ['year', 'age'], 1, True)
    query = pd.DataFrame({'year': [1990, 1990], 'age': [35, 35]})
    with pytest.raises(ValueError):
        i(query)
Ejemplo n.º 11
0
def test_interpolation_with_categorical_parameters():
    a = ['one']*100 + ['two']*100
    b = np.append(np.arange(100), np.arange(100))
    c = np.append(np.arange(100), np.arange(100, 0, -1))
    df = pd.DataFrame({'a': a, 'b': b, 'c': c})
    df = df.sample(frac=1) # Shuffle table to assure interpolation works given unsorted input

    i = Interpolation(df, ('a',), ('b',))

    query_one = pd.DataFrame({'a': 'one', 'b': np.arange(100, step=0.01)})
    query_two = pd.DataFrame({'a': 'two', 'b': np.arange(100, step=0.01)})

    assert np.allclose(np.arange(100, step=0.01), i(query_one))

    assert np.allclose(np.arange(100, 0, step=-0.01), i(query_two))
Ejemplo n.º 12
0
def test_age_year_interpolation():
    years = list(range(1990,2010))
    ages = list(range(0,90))
    pops = np.array(ages)*11.1
    data = []
    for age, pop in zip(ages, pops):
        for year in years:
            for sex in ['Male', 'Female']:
                data.append({'age':age, 'sex':sex, 'year':year, 'pop':pop})
    df = pd.DataFrame(data)
    df = df.sample(frac=1) # Shuffle table to assure interpolation works given unsorted input

    i = Interpolation(df, ('sex', 'age'), ('year',))

    assert np.allclose(i(year=[1990,1990], age=[35,35], sex=['Male', 'Female']), 388.5)
Ejemplo n.º 13
0
def test_1d_interpolation():
    df = pd.DataFrame({
        'a': np.arange(100),
        'b': np.arange(100),
        'c': np.arange(100, 0, -1)
    })
    df = df.sample(
        frac=1
    )  # Shuffle table to assure interpolation works given unsorted input

    i = Interpolation(df, (), ('a', ), 1, True)

    query = pd.DataFrame({'a': np.arange(100, step=0.01)})

    assert np.allclose(query.a, i(query).b)
    assert np.allclose(100 - query.a, i(query).c)
Ejemplo n.º 14
0
def test_order_zero_1d_no_extrapolation():
    s = pd.Series({0: 0, 1: 1}).reset_index()
    s = make_bin_edges(s, 'index')
    f = Interpolation(s,
                      tuple(), [['index', 'index_left', 'index_right']],
                      order=0,
                      extrapolate=False)

    assert f(pd.DataFrame({'index': [0]
                           }))[0][0] == 0, 'should be precise at index values'
    assert f(pd.DataFrame({'index': [0.999]}))[0][0] == 1

    with pytest.raises(ValueError) as error:
        f(pd.DataFrame({'index': [1]}))

    message = error.value.args[0]
    assert 'Extrapolation' in message and 'index' in message
Ejemplo n.º 15
0
    def build_table(self,
                    data,
                    key_columns=('sex', ),
                    parameter_columns=('age', 'year'),
                    interpolation_order=1):
        """Construct a TableView from a ``pandas.DataFrame``. An interpolation
        function of the specified order will be calculated for each permutation
        of the set of key_columns. The columns in parameter_columns will be used
        as parameters for the interpolation functions which will estimate all
        remaining columns in the table.

        If parameter_columns is empty then no interpolation will be
        attempted and the data will be delegated to MergedTableManager.build_table.

        Parameters
        ----------
        data        : pandas.DataFrame
                      The source data which will be accessible through the resulting TableView.
        key_columns : [str]
                      Columns used to select between interpolation functions. These
                      should be the non-continuous variables in the data. For
                      example 'sex' in data about a population.
        parameter_columns : [str]
                      The columns which contain the parameters to the interpolation functions.
                      These should be the continuous variables. For example 'age'
                      in data about a population.
        interpolation_order : int
                      The order of the interpolation function. Defaults to linear.

        Returns
        -------
        TableView
        """

        if isinstance(data, Number):
            return ScalarView(data)

        data = data if isinstance(data, Interpolation) else Interpolation(
            data, key_columns, parameter_columns, order=interpolation_order)

        view_columns = sorted((set(key_columns) | set(parameter_columns)) -
                              {'year'})
        return InterpolatedTableView(
            data, self._pop_view_builder(view_columns),
            self.clock if 'year' in parameter_columns else None)
Ejemplo n.º 16
0
    def __init__(self, data: pd.DataFrame, population_view: PopulationView,
                 key_columns: Union[List[str], Tuple[str]],
                 parameter_columns: Union[List[str], Tuple],
                 value_columns: Union[List[str], Tuple[str]],
                 interpolation_order: int, clock: Callable, extrapolate: bool):

        self.data = data
        self.population_view = population_view
        self.key_columns = key_columns
        param_cols_with_edges = []
        for p in parameter_columns:
            param_cols_with_edges += [(p, f'{p}_start', f'{p}_end')]
        self.parameter_columns = param_cols_with_edges
        self.interpolation_order = interpolation_order
        self.value_columns = value_columns
        self.clock = clock
        self.extrapolate = extrapolate
        self.interpolation = Interpolation(data,
                                           self.key_columns,
                                           self.parameter_columns,
                                           order=self.interpolation_order,
                                           extrapolate=self.extrapolate)
Ejemplo n.º 17
0
def test_order_zero_diff_bin_sizes():
    data = pd.DataFrame({
        'year_start': [
            1990,
            1995,
            1996,
            2005,
            2005.5,
        ],
        'year_end': [1995, 1996, 2005, 2005.5, 2010],
        'value': [1, 5, 2.3, 6, 100]
    })

    i = Interpolation(data, tuple(), [('year', 'year_start', 'year_end')], 0,
                      False)

    query = pd.DataFrame(
        {'year': [2007, 1990, 2005.4, 1994, 2004, 1995, 2002, 1995.5, 1996]})

    expected_result = pd.DataFrame(
        {'value': [100, 1, 6, 1, 2.3, 5, 2.3, 5, 2.3]})

    assert i(query).equals(expected_result)