Example #1
0
def _fit_theil_sen_one_track(x_coords_metres, y_coords_metres,
                             valid_times_unix_sec):
    """Fits Theil-Sen model for one storm track.

    P = number of points in track

    :param x_coords_metres: length-P numpy array of x-coordinates.
    :param y_coords_metres: length-P numpy array of y-coordinates.
    :param valid_times_unix_sec: length-P numpy array of times.
    :return: theil_sen_dict: Dictionary with the following keys.
    theil_sen_dict['x_intercept_metres']: x-intercept.
    theil_sen_dict['x_velocity_m_s01']: x-velocity (metres per second).
    theil_sen_dict['y_intercept_metres']: y-intercept.
    theil_sen_dict['y_velocity_m_s01']: y-velocity (metres per second).
    """

    num_points = len(x_coords_metres)
    valid_times_unix_sec = numpy.reshape(valid_times_unix_sec, (num_points, 1))

    model_object_for_x = TheilSenRegressor(fit_intercept=True)
    model_object_for_x.fit(valid_times_unix_sec, x_coords_metres)
    model_object_for_y = TheilSenRegressor(fit_intercept=True)
    model_object_for_y.fit(valid_times_unix_sec, y_coords_metres)

    return {
        X_INTERCEPT_KEY: model_object_for_x.intercept_,
        X_VELOCITY_KEY: model_object_for_x.coef_,
        Y_INTERCEPT_KEY: model_object_for_y.intercept_,
        Y_VELOCITY_KEY: model_object_for_y.coef_
    }
def test_verbosity():
    X, y, w, c = gen_toy_problem_1d()
    # Check that Theil-Sen can be verbose
    with no_stdout_stderr():
        TheilSenRegressor(verbose=True, random_state=0).fit(X, y)
        TheilSenRegressor(verbose=True, max_subpopulation=10,
                          random_state=0).fit(X, y)
Example #3
0
def createTheilSenRegressor(params):
    info("Creating TheilSen Regressor", ind=4)

    ## Params
    params = mergeParams(TheilSenRegressor(), params)
    tuneParams = getTheilSenRegressorParams()
    info("Without Parameters", ind=4)

    ## estimator
    reg = TheilSenRegressor()

    return {"estimator": reg, "params": tuneParams}
Example #4
0
def test_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    # Check that Theil-Sen falls back to Least Squares if fit_intercept=False
    theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y)
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
    # Check fit_intercept=True case. This will not be equal to the Least
    # Squares solution since the intercept is calculated differently.
    theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
    y_pred = theil_sen.predict(X)
    assert_array_almost_equal(y_pred, y, 12)
Example #5
0
def compute_quantal_size(scan):
    """ Estimate the unit change in calcium response corresponding to a unit change in
    pixel intensity (dubbed quantal size, lower is better).

    Assumes images are stationary from one timestep to the next. Uses it to calculate a
    measure of noise per bright intensity (which increases linearly given that imaging
    noise is poisson), fits a line to it and uses the slope as the estimate.

    :param np.array scan: 3-dimensional scan (image_height, image_width, num_frames).

    :returns: int minimum pixel value in the scan (that appears a min number of times)
    :returns: int maximum pixel value in the scan (that appears a min number of times)
    :returns: np.array pixel intensities used for the estimation.
    :returns: np.array noise variances used for the estimation.
    :returns: float the estimated quantal size
    :returns: float the estimated zero value
    """
    # Set some params
    num_frames = scan.shape[2]
    min_count = num_frames * 0.1  # pixel values with fewer appearances will be ignored
    max_acceptable_intensity = 3000  # pixel values higher than this will be ignored

    # Make sure field is at least 32 bytes (int16 overflows if summed to itself)
    scan = scan.astype(np.float32, copy=False)

    # Create pixel values at each position in field
    eps = 1e-4 # needed for np.round to not be biased towards even numbers (0.5 -> 1, 1.5 -> 2, 2.5 -> 3, etc.)
    pixels = np.round((scan[:, :, :-1] + scan[:, :, 1:]) / 2 + eps)
    pixels = pixels.astype(np.int16 if np.max(abs(pixels)) < 2 ** 15 else np.int32)

    # Compute a good range of pixel values (common, not too bright values)
    unique_pixels, counts = np.unique(pixels, return_counts=True)
    min_intensity = min(unique_pixels[counts > min_count])
    max_intensity = max(unique_pixels[counts > min_count])
    max_acceptable_intensity = min(max_intensity, max_acceptable_intensity)
    pixels_mask = np.logical_and(pixels >= min_intensity, pixels <= max_acceptable_intensity)

    # Select pixels in good range
    pixels = pixels[pixels_mask]
    unique_pixels, counts = np.unique(pixels, return_counts=True)

    # Compute noise variance
    variances = ((scan[:, :, :-1] - scan[:, :, 1:]) ** 2 / 2)[pixels_mask]
    pixels -= min_intensity
    variance_sum = np.zeros(len(unique_pixels)) # sum of variances per pixel value
    for i in range(0, len(pixels), int(1e8)):  # chunk it for memory efficiency
        variance_sum += np.bincount(pixels[i: i + int(1e8)], weights=variances[i: i + int(1e8)],
                                    minlength=len(unique_pixels))[unique_pixels - min_intensity]
    unique_variances = variance_sum / counts # average variance per intensity

    # Compute quantal size (by fitting a linear regressor to predict the variance from intensity)
    X = unique_pixels.reshape(-1, 1)
    y = unique_variances
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    quantal_size = model.coef_[0]
    zero_level = - model.intercept_ / model.coef_[0]

    return (min_intensity, max_intensity, unique_pixels, unique_variances,
           quantal_size, zero_level)
def show():
    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 4, 5, 0]
    y = [0, 5, 9, 12, 13, 12, 9, 5, 0, 1, 0, 7]
    X = list(map(lambda x: [x], X))

    import pylab

    pylab.scatter(X, y)

    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.pipeline import make_pipeline
    import numpy as np
    from sklearn.linear_model import LinearRegression, TheilSenRegressor, HuberRegressor, RANSACRegressor

    for regressor in [
        [LinearRegression(), "linreg"],
        [TheilSenRegressor(), "theil-sen"],
        [HuberRegressor(), "huber"],
        [RANSACRegressor(), "ransac"],
    ]:
        model = make_pipeline(PolynomialFeatures(2), regressor[0])
        model.fit(X, y)

        print("")
        print(regressor[1])
        print(model.score(X, y))

        test_x = np.linspace(-1, 10, 100)
        test_y = []
        for x in test_x:
            test_y.append(model.predict([[x]])[0])

        pylab.plot(test_x, test_y, label=regressor[1])
    pylab.legend(loc="best")
    pylab.show()
Example #7
0
def robust_cor(x, y):
    if isinstance(x[0], list):
        x = list(map(list, zip(*x)))
    else:
        x = np.array(x).reshape(-1, 1)
    X = np.array(x)
    Y = np.array(y)
    theil_regr = TheilSenRegressor(random_state=42)
    theil_regr.fit(X, Y)
    y_pred = theil_regr.predict(X)
    res = y_pred - y
    tot_dev = y - np.mean(y)
    SSres = np.dot(res, res)
    SStot = np.dot(tot_dev, tot_dev)
    adjR2 = 1 - (SSres / SStot) * (X.shape[0] - 1) / (X.shape[0] - X.shape[1] -
                                                      1)
    sgn = np.sign(theil_regr.coef_)[0]
    if adjR2 > 0:
        corr_val = sgn * np.sqrt(adjR2)
    else:
        corr_val = 0
    return [
        corr_val, theil_regr.coef_, theil_regr.intercept_,
        theil_regr.breakdown_
    ]
Example #8
0
def test_checksubparams_n_subsamples_if_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0)
    assert_raises(ValueError, theil_sen.fit, X, y)
Example #9
0
def underline_regression(x, y, method="ramp"):
    start_params = guess(x, y)
    if method == "ramp":
        reg = minimize(asymmetric_ramp_loss,
                       x0=start_params,
                       args=(x, y),
                       bounds=((None, None), (0, None)),
                       method="Powell")
    elif method == 'quadratic' or method == "parabolic":
        reg = ParabolicRegressor.regress(x, y)
        return reg
    elif method == "squashed":
        reg = minimize(squashed_loss,
                       x0=start_params,
                       jac=squashed_grad,
                       args=(x, y),
                       bounds=((None, None), (0, 1)),
                       method="L-BFGS-B")
    elif method == "median":
        y = y.reshape(-1, 1)
        X = np.vstack((np.ones(y.shape).transpose(), x.reshape(-1,
                                                               1).transpose()))
        reg = TheilSenRegressor(random_state=0).fit(X.transpose(), np.ravel(y))
        offset = np.min(subtract_bg(y, x, [reg.coef_[0], reg.coef_[1]]))
        return np.array([reg.coef_[0] + offset, reg.coef_[1]])
    elif method == "huber":
        reg = HubelRegressor.regress(x, y)
        return reg
    return (reg.x[0], reg.x[1])
Example #10
0
def log_log_robust_regression(cfs, y, kind=0):
    assert y.shape[0] == 40
    y = y.reshape(40, -1)
    x = np.tile(cfs[:, np.newaxis], (1, y.shape[1]))
    y = np.log(y).ravel()
    x = np.log(x).ravel()[:, np.newaxis]
    if kind == 0:
        model = RANSACRegressor()
    elif kind == 1:
        model = TheilSenRegressor(n_jobs=-1)
    elif kind == 2:
        model = HuberRegressor()
    else:
        raise ValueError
    model.fit(x, y)
    yp = model.predict(x)
    u = np.square(y - yp)
    v = np.square(y - y.mean())
    R2 = 1. - u / v
    if kind == 0:
        return model.estimator_.coef_, model.estimator_.intercept_, np.median(
            R2)
    elif kind in [1, 2]:
        return model.coef_, model.intercept_, np.median(R2)
    else:
        raise ValueError
def getscore_getnext(df, days_ahead, coin):

    forecast_val = days_ahead

    forecast_col = 'close'
    df.fillna(value=-99999, inplace=True)
    df['label'] = df[forecast_col].shift(-forecast_val)

    #X = X[:-forecast_val]

    X = np.array(df.drop(['label', 'date'], 1))

    X = preprocessing.scale(X)

    futureX = X[-1:]
    X = X[:-forecast_val]
    df.dropna(inplace=True)

    y = np.array(df['label'])

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.15)
    '''
    inPickle = open('%s.pickle' %(coin), 'rb')
    clf = pickle.load(inPickle)
    '''
    clf = TheilSenRegressor()

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    #print "accuracy with 1.0 being perfect:", (confidence)
    futureval = clf.predict(futureX)
    return (confidence, futureval)
def calculate_scaling_params(events, kmer_mean_levels):
    events = pd.DataFrame(events)
    events['pos'] = events['move'].cumsum()
    jump_positions = events[events['move'] > 1]['pos']
    jump_positions = set(jump_positions - 1) | set(jump_positions)
    nonjump_positions = set(events['pos']) - jump_positions
    if len(nonjump_positions) < MINIMUM_NONJUMP_POSITIONS:
        return

    statelevels = []
    statelevels_jump = []
    for pos, posevents in events.groupby('pos'):
        state = posevents['model_state'].iloc[0]
        if '_' in state:
            continue

        medlevel = posevents['mean'].median()
        if pos in nonjump_positions:
            statelevels.append([medlevel, kmer_mean_levels[state]])
        else:
            statelevels_jump.append([medlevel, kmer_mean_levels[state]])

    statelevels_jump = np.array(statelevels_jump)
    statelevels = np.array(statelevels)
    regr = TheilSenRegressor(random_state=922)
    regr.fit(statelevels[:, 0][:, np.newaxis], statelevels[:, 1])

    return regr.coef_[0], regr.intercept_
Example #13
0
def test_subsamples():
    X, y, w, c = gen_toy_problem_4d()
    theil_sen = TheilSenRegressor(n_subsamples=X.shape[0],
                                  random_state=0).fit(X, y)
    lstq = LinearRegression().fit(X, y)
    # Check for exact the same results as Least Squares
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
Example #14
0
    def fit(self, smiles_list, logS_list):
        X = []
        y = []
        for i, smiles in enumerate(smiles_list):
            mol = Chem.MolFromSmiles(smiles)
            (mw, logp, rotors, ap) = self._calc_esol_descriptors(mol)
            X.append([mw, logp, rotors, ap])
            y.append(logS_list[i])

        if self.model == 'linear':
            model = LinearRegression()
        elif self.model == 'pls':
            model = PLSRegression(n_components=2)
        elif self.model == 'huber':
            model = HuberRegressor(epsilon=1.5, alpha=2.0)
        elif self.model == 'ts':
            logging.debug(f'Model: {self.model}')
            model = TheilSenRegressor()
        else:
            self.model = 'linear'
            model = LinearRegression()

        logging.debug(f'Model: {self.model}')

        model.fit(X, y)
        self._intercept = model.intercept_
        self._coef["MW"] = model.coef_[0]
        self._coef["LogP"] = model.coef_[1]
        self._coef["RB"] = model.coef_[2]
        self._coef["AP"] = model.coef_[3]
def get_best_degree(data):
    degrees = range(1, 6)

    errors = []

    degrees = list(degrees)

    for deg in degrees:
        reg = Pipeline([
            ("quad", PolynomialFeatures(degree=deg)),
            (
                "linear",
                TheilSenRegressor(max_subpopulation=50, max_iter=300),
            ),
        ])

        numDims = np.size(data, 1)

        X = data[:, 0:numDims - 1]  # noqa
        Y = data[:, numDims - 1]

        reg.fit(X, Y)

        out = reg.predict(X)

        Sr = np.sum(np.square(Y - out))

        errors.append(Sr)

    min_degree = degrees[np.argmin(errors)]

    return min_degree
def estimate_txty(cluster, k=20):
    xs = []
    ys = []
    zs = []
    tx = []
    ty = []

    for i, node in cluster.nodes(data=True):
        xs.append(node['features']['SX'])
        ys.append(node['features']['SY'])
        zs.append(node['features']['SZ'])
        tx.append(node['features']['TX'])
        ty.append(node['features']['TY'])

    xs = np.array(xs)
    ys = np.array(ys)
    zs = np.array(zs)
    tx = np.array(tx)
    ty = np.array(ty)

    argosorted_z = np.argsort(zs)

    lr = TheilSenRegressor()
    lr.fit(zs[argosorted_z][:k].reshape((-1, 1)), xs[argosorted_z][:k])
    TX = lr.coef_[0]

    lr.fit(zs[argosorted_z][:k].reshape((-1, 1)), ys[argosorted_z][:k])
    TY = lr.coef_[0]

    return TX, TY
def regression(
    data,
    theilsen_max_iter=100,
    order="auto",
    threshold_multiplier=2,
):
    if order == "auto":
        order = get_best_degree(data)
    elif not isinstance(order, int):
        order = 1

    reg = Pipeline([
        ("quad", PolynomialFeatures(degree=order)),
        (
            "linear",
            TheilSenRegressor(max_subpopulation=50,
                              max_iter=theilsen_max_iter),
        ),
    ])

    numDims = np.size(data, 1)

    X = data[:, 0:numDims - 1]  # noqa
    Y = data[:, numDims - 1]

    inlier_mask = np.ones(np.size(data, 0), dtype=bool)

    mask_length = 0
    threshold = 0

    for _ in range(10):
        if mask_length == sum(inlier_mask):
            break
        else:
            mask_length = sum(inlier_mask)

        inlier_mask = inlier_mask.astype(bool)
        i_X = X[inlier_mask]
        i_Y = Y[inlier_mask]

        if i_X.shape[0] == 0:
            inlier_mask = inlier_mask.astype(int)
            break

        reg.fit(i_X, i_Y)
        ts = reg.predict(X)

        residuals = abs(ts - Y)

        inlier_residuals = abs(reg.predict(i_X) - i_Y)

        threshold = np.median(inlier_residuals)

        within = residuals < (threshold_multiplier * threshold)

        inlier_mask = within.astype(int)

    return reg, inlier_mask, threshold_multiplier * threshold, order
Example #18
0
def test_theil_sen_2d():
    X, y, w, c = gen_toy_problem_2d()
    # Check that Least Squares fails
    lstq = LinearRegression().fit(X, y)
    assert norm(lstq.coef_ - w) > 1.0
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(max_subpopulation=1e3, random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w, 1)
    assert_array_almost_equal(theil_sen.intercept_, c, 1)
 def __init__(self, X, y, tsr_params, nfolds=3, n_jobs=1, scoring=None, verbose=True):
     
     self._code="tsr"
     
     if verbose:
         print ("Constructed TheilSenRegressor: " +self._code)
     
     AbstractRegressorPredictiveModel.__init__(self, "regressor", X, y, tsr_params, nfolds, n_jobs, scoring, verbose)
     self._model = self.constructRegressor(TheilSenRegressor())
Example #20
0
def get_models():
    models = list()
    models.append(LinearRegression(fit_intercept=False))
    models.append(HuberRegressor(fit_intercept=False))
    #models.append(RANSACRegressor())#fit_intercept=False)) # Doesnt have option to not fit the intercept
    models.append(
        TheilSenRegressor(fit_intercept=False)
    )  # Strunggling a bit with this one as the output varies a lot given n_samples (if n_samples=1 then it returns the median of the ratio, if it equals the number of data points then it returns essentially the output of least square fitting)
    return models
Example #21
0
def test_theil_sen_1d():
    X, y, w, c = gen_toy_problem_1d()
    # Check that Least Squares fails
    lstq = LinearRegression().fit(X, y)
    assert np.abs(lstq.coef_ - w) > 0.9
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w, 1)
    assert_array_almost_equal(theil_sen.intercept_, c, 1)
Example #22
0
def train(X, Y, selected="Linear", modelName='best_model.sav'):
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.10,
                                                        shuffle=True)

    # Scaling
    sc = StandardScaler()
    sc.fit(X_train)
    X_train = sc.transform(X_train)
    X_test = sc.transform(X_test)

    # create and fit the best regression model
    seed = 5
    models = {}
    models["Linear"] = LinearRegression()
    #models["RANSAC"]        = RANSACRegressor()
    models["Huber"] = HuberRegressor(max_iter=1000)
    models["TheilSen"] = TheilSenRegressor()
    #models["SGD"]           = SGDRegressor(max_iter=500,penalty=None, eta0=0.01, tol=0.00001)
    models["Ridge"] = Ridge()
    models["Lasso"] = Lasso()
    models["ElasticNet"] = ElasticNet()
    models["KNN"] = KNeighborsRegressor()
    models["DecisionTree"] = DecisionTreeRegressor()
    models["SVR"] = SVR()
    models["AdaBoost"] = AdaBoostRegressor()
    models["GradientBoost"] = GradientBoostingRegressor()
    models["RandomForest"] = RandomForestRegressor()
    models["ExtraTrees"] = ExtraTreesRegressor()

    best_model = models[selected]
    best_model.fit(X_train, Y_train)

    # Save model
    pickle.dump(best_model, open(modelName, 'wb'))

    # make predictions using the model (train and test)
    Y_test_pred = best_model.predict(X_test)
    Y_train_pred = best_model.predict(X_train)
    #print("[INFO] MSE : {}".format(round(mean_squared_error(Y_test, Y_test_pred), 3)))

    # R2 score coefficient of determination (quanto gli input influscono sulla predizione)
    # 0 male 1 bene
    validate(Y_train, Y_train_pred, name="Training")
    R2 = best_model.score(X_train, Y_train)
    print("[Training] R2 Score: ", round(R2, 3))

    validate(Y_test, Y_test_pred, name="Test")
    R2 = best_model.score(X_test, Y_test)
    print("[Test] R2 Score: ", round(R2, 3))

    fig_train = plot_fig([Y_train, Y_train_pred],
                         ["Train Real", "Train Predicted"])
    fig_test = plot_fig([Y_test, Y_test_pred], ["Test Real", "Test Predicted"])
    return fig_train, fig_test
Example #23
0
def test_theil_sen_1d_no_intercept():
    X, y, w, c = gen_toy_problem_1d(intercept=False)
    # Check that Least Squares fails
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert_greater(np.abs(lstq.coef_ - w - c), 0.5)
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(fit_intercept=False,
                                  random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w + c, 1)
    assert_almost_equal(theil_sen.intercept_, 0.)
Example #24
0
 def test_model_theilsen(self):
     model, X = fit_regression_model(TheilSenRegressor())
     model_onnx = convert_sklearn(
         model, "thiel-sen regressor",
         [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         basename="SklearnTheilSen-Dec4")
Example #25
0
def _fit_robust_line(shifts):
    """ Use a robust linear regression algorithm to fit a line to the data."""
    from sklearn.linear_model import TheilSenRegressor

    X = np.arange(len(shifts)).reshape(-1, 1)
    y = shifts
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    line = model.predict(X)

    return line
def translated_huber_regression(x, y):
    y_reshape = y.reshape(-1, 1)
    X = np.vstack(
        (np.ones(y_reshape.shape).transpose(), x.reshape(-1, 1).transpose()))
    reg = TheilSenRegressor(random_state=0).fit(X.transpose(),
                                                np.ravel(y_reshape))

    # subtracted_data = subtract_bg(y, x, [reg.coef_[0], reg.coef_[1]])
    subtracted_data = y - reg.coef_[0] - reg.coef_[1] * x
    offset = np.min(subtracted_data)
    return np.array([reg.coef_[0] + offset, reg.coef_[1]])
Example #27
0
def test_theil_sen_parallel():
    X, y, w, c = gen_toy_problem_2d()
    # Check that Least Squares fails
    lstq = LinearRegression().fit(X, y)
    assert_greater(norm(lstq.coef_ - w), 1.0)
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(n_jobs=-1,
                                  random_state=0,
                                  max_subpopulation=2e3).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w, 1)
    assert_array_almost_equal(theil_sen.intercept_, c, 1)
Example #28
0
def get_hyperparameters_model():
    param_dist = {}

    clf = TheilSenRegressor()

    model = {
        'theil_sen_regressor': {
            'model': clf,
            'param_distributions': param_dist
        }
    }
    return model
Example #29
0
def main():
    np.random.seed(42)
    X = np.random.uniform(low=-10, high=10, size=400)
    x_predict = np.linspace(-10, 10, 1000)
    y = np.sin(2 * np.pi * 0.1 * X) + 0.1 * np.abs(X) + np.abs(np.arctan(X))
    X_test = np.random.uniform(low=-30, high=30, size=200)
    y_test = np.sin(2 * np.pi * 0.1 * X_test)

    y_errors_large = y.copy()
    y_errors_large[::10] = 6

    # Make sure that X is 2D
    X = X[:, np.newaxis]
    X_test = X_test[:, np.newaxis]

    # predict y
    knots = np.linspace(-30, 30, 20)
    bspline_features = BSplineFeatures(knots, degree=3, periodic=False)
    estimators = [
        ('Least-Square', '-', 'C0', LinearRegression(fit_intercept=False)),
        ('Theil-Sen', '-.', 'C1', TheilSenRegressor(random_state=42)),
        ('RANSAC', ':', 'C2', RANSACRegressor(random_state=42)),
        ('HuberRegressor', '--', 'C3', HuberRegressor())
    ]

    fig, ax = plt.subplots(1, 1, figsize=(8, 3))
    fig.suptitle('Robust B-Spline Regression with SKLearn')
    ax.plot(X[:, 0],
            y_errors_large,
            'o',
            ms=5,
            c='black',
            label='data points [10% outliers]')

    for label, style, color, estimator in estimators:
        model = make_pipeline(bspline_features, estimator)
        model.fit(X, y_errors_large)
        mse = mean_squared_error(model.predict(X_test), y_test)
        y_predicted = model.predict(x_predict[:, None])
        ax.plot(x_predict,
                y_predicted,
                style,
                lw=2,
                markevery=8,
                ms=6,
                color=color,
                label=label + ' E={:2.2g}'.format(mse))
    ax.legend(loc='upper right', framealpha=0.95, fontsize='xx-small')
    ax.set(ylim=(-2, 8), xlabel='time [s]', ylabel='amplitude')
    fig.tight_layout()
    fig.savefig('../results/fitting_experiments/robust_bspline_regression.png',
                bbox_inches='tight',
                dpi=300)
Example #30
0
def select_regressor(X, y, scoring='neg_mean_squared_error', show=True):
    regressors = [
        AdaBoostRegressor(),
        # ARDRegression(),
        BaggingRegressor(),
        DecisionTreeRegressor(),
        ElasticNet(),
        ExtraTreeRegressor(),
        ExtraTreesRegressor(),
        # GaussianProcessRegressor(),
        GradientBoostingRegressor(),
        HuberRegressor(),
        KNeighborsRegressor(),
        Lasso(),
        LinearRegression(),
        # LogisticRegression(),
        MLPRegressor(),
        PassiveAggressiveRegressor(),
        PLSRegression(),
        # RadiusNeighborsRegressor(),
        RandomForestRegressor(),
        RANSACRegressor(),
        Ridge(),
        SGDRegressor(),
        TheilSenRegressor(),
    ]
    names = [reg.__class__.__name__ for reg in regressors]
    # cv = StratifiedShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=random_state)
    scores = {}
    for i, (name, reg) in enumerate(zip(names, regressors)):
        print('Processing {}...'.format(name))
        ss = cross_val_score(reg, X, y, scoring=scoring, cv=10)
        scores[name] = ss
        # for train_index, test_index in cv.split(X, y):
        #     X_train, X_test = X[train_index], X[test_index]
        #     y_train, y_test = y[train_index], y[test_index]
        #     try:
        #         clf.fit(X_train, y_train)
        #         train_predictions = clf.predict(X_test)
        #         rmse = np.sqrt(mean_squared_error(y_test, train_predictions))
        #     except:
        #         rmse = 0
        #     s = scores.get(name, [])
        #     s.append(acc)
        #     scores[name] = s
    scores = [[n, np.sqrt(-s).mean()] for n, s in scores.items()]
    scores = pd.DataFrame(scores,
                          columns=['Regressor',
                                   'Score']).sort_values(by='Score',
                                                         ascending=True)
    if show:
        print(scores)
    return scores.iloc[0, 0], regressors[scores.iloc[0].name], scores