def natural_cubic_spline(*, X: pd.Series, y: pd.Series, number_knots: int, list_knots: List[int] = None) -> Pipeline: """ Piecewise natural cubic spline helper function If number_knots is given, the calculated knots are equally-spaced within minval and maxval. The endpoints are not included as knots. The X series must be in increasing order. The y series must not contain missing values. Parameters ---------- X : pd.Series The data series of the abscissa. y : pd.Series The data series of the ordinate. number_knots : int The number of knots for the spline. list_knots : List[int] = None A list of specific knots. Returns ------- p: Pipeline The model object. Example ------- >>> import matplotlib.pyplot as plt >>> import datasense as ds >>> X = ds.random_data(distribution="uniform").sort_values() >>> y = ds.random_data(distribution="norm") >>> p = ds.natural_cubic_spline( >>> X=X, >>> y=y, >>> number_knots=10 >>> ) >>> fig, ax = ds.plot_scatter_line_x_y1_y2( >>> X=X, >>> y1=y, >>> y2=p.predict(X) >>> ) >>> plt.show() """ if list_knots: spline = NaturalCubicSpline(knots=list_knots) else: spline = NaturalCubicSpline(max=max(X), min=min(X), n_knots=number_knots) p = Pipeline([("natural_cubic_spline", spline), ("linear_regression", LinearRegression(fit_intercept=True))]) p.fit(X, y) return p
def simple_spline_specification(name, knots): select_name = "{}_select".format(name) spline_name = "{}_spline".format(name) return Pipeline([ (select_name, ColumnSelector(name=name)), (spline_name, NaturalCubicSpline(knots=knots)) ])
def make_natural_cubic_regression(n_knots, knot_range=(-2, 2)): """A helper function for constructing a pipeline fiting a one dimensional regression with a cubic spline feature.""" return Pipeline([ ('standardizer', StandardScaler()), ('nat_cubic', NaturalCubicSpline(knot_range[0], knot_range[1], n_knots=n_knots)), ('regression', LinearRegression(fit_intercept=True)) ])
def simple_spline_specification(name, knots): ''' making a lot of these simple spline specifications, let's write a small function to make them for us. income_spec = Pipeline([ ('Income_select', ColumnSelector(name="Income")), ('Income_spline', NaturalCubicSpline(knots=[25, 50, 75, 100, 125])) ]) ''' select_name = "{}_select".format(name) spline_name = "{}_spline".format(name) return Pipeline([ (select_name, ColumnSelector(name=name)), (spline_name, NaturalCubicSpline(knots=knots)) ])
def make_natural_cubic_regression(n_knots): return Pipeline([ ('standardizer', StandardScaler()), ('nat_cubic', NaturalCubicSpline(-2, 2, n_knots=n_knots)), ('regression', LinearRegression(fit_intercept=True)) ])
def make_natural_cubic_regression(n_knots): return Pipeline([ ('standardizer', StandardScaler()), ('nat_cubic', NaturalCubicSpline(-2, 2, n_knots=n_knots)), ('regression', HuberRegressor()) ])