Esempio n. 1
0
def natural_cubic_spline(*,
                         X: pd.Series,
                         y: pd.Series,
                         number_knots: int,
                         list_knots: List[int] = None) -> Pipeline:
    """
    Piecewise natural cubic spline helper function

    If number_knots is given, the calculated knots are equally-spaced
    within minval and maxval. The endpoints are not included as knots.

    The X series must be in increasing order.
    The y series must not contain missing values.

    Parameters
    ----------
    X : pd.Series
        The data series of the abscissa.
    y : pd.Series
        The data series of the ordinate.
    number_knots : int
        The number of knots for the spline.
    list_knots : List[int] = None
        A list of specific knots.

    Returns
    -------
    p: Pipeline
        The model object.

    Example
    -------
    >>> import matplotlib.pyplot as plt
    >>> import datasense as ds
    >>> X = ds.random_data(distribution="uniform").sort_values()
    >>> y = ds.random_data(distribution="norm")
    >>> p = ds.natural_cubic_spline(
    >>>     X=X,
    >>>     y=y,
    >>>     number_knots=10
    >>> )
    >>> fig, ax = ds.plot_scatter_line_x_y1_y2(
    >>>     X=X,
    >>>     y1=y,
    >>>     y2=p.predict(X)
    >>> )
    >>> plt.show()
    """

    if list_knots:
        spline = NaturalCubicSpline(knots=list_knots)
    else:
        spline = NaturalCubicSpline(max=max(X),
                                    min=min(X),
                                    n_knots=number_knots)
    p = Pipeline([("natural_cubic_spline", spline),
                  ("linear_regression", LinearRegression(fit_intercept=True))])
    p.fit(X, y)
    return p
Esempio n. 2
0
def simple_spline_specification(name, knots):
    select_name = "{}_select".format(name)
    spline_name = "{}_spline".format(name)
    return Pipeline([
        (select_name, ColumnSelector(name=name)),
        (spline_name, NaturalCubicSpline(knots=knots))
    ])
Esempio n. 3
0
def make_natural_cubic_regression(n_knots, knot_range=(-2, 2)):
    """A helper function for constructing a pipeline fiting a one dimensional
    regression with a cubic spline feature."""
    return Pipeline([
        ('standardizer', StandardScaler()),
        ('nat_cubic', NaturalCubicSpline(knot_range[0], knot_range[1], n_knots=n_knots)),
        ('regression', LinearRegression(fit_intercept=True))
    ])
Esempio n. 4
0
def simple_spline_specification(name, knots):
    '''
        making a lot of these simple spline specifications, let's write a small function to make them for us.
        income_spec = Pipeline([
            ('Income_select', ColumnSelector(name="Income")),
            ('Income_spline', NaturalCubicSpline(knots=[25, 50, 75, 100, 125]))
        ])
    '''
    select_name = "{}_select".format(name)
    spline_name = "{}_spline".format(name)
    return Pipeline([
        (select_name, ColumnSelector(name=name)),
        (spline_name, NaturalCubicSpline(knots=knots))
    ])
Esempio n. 5
0
def make_natural_cubic_regression(n_knots):
    return Pipeline([
        ('standardizer', StandardScaler()),
        ('nat_cubic', NaturalCubicSpline(-2, 2, n_knots=n_knots)),
        ('regression', LinearRegression(fit_intercept=True))
    ])
Esempio n. 6
0
def make_natural_cubic_regression(n_knots):
    return Pipeline([
           ('standardizer', StandardScaler()),
           ('nat_cubic', NaturalCubicSpline(-2, 2, n_knots=n_knots)),
           ('regression', HuberRegressor())
    ])