Exemple #1
0
class Simple:
    def __init__(self, a, b, c, d):
        self.model = TheilSenRegressor()

    def update_a_b(self, x, y):
        self.model.fit(x.reshape(-1, 1), y)

    def set_c_d(self, c, d):
        pass

    def get_y(self, x):
        return self.model.predict(x.reshape(-1, 1))

    def get_likelihood(self, x, y):
        return 1 / float(x.shape[0]) * np.sum(np.abs(y - self.get_y(x)))

    def to_string(self):
        return "a:{}, b:{}".format(self.model.coef_, self.model.intercept_)

    def get_a_b(self):
        return self.model.coef_, self.model.intercept_

    @staticmethod
    def var_to_weight(v):
        return 1

    @staticmethod
    def get_c_d(x, r):
        return None, None
def test_verbosity():
    X, y, w, c = gen_toy_problem_1d()
    # Check that Theil-Sen can be verbose
    with no_stdout_stderr():
        TheilSenRegressor(verbose=True, random_state=0).fit(X, y)
        TheilSenRegressor(verbose=True, max_subpopulation=10,
                          random_state=0).fit(X, y)
Exemple #3
0
def compute_quantal_size(scan):
    """ Estimate the unit change in calcium response corresponding to a unit change in
    pixel intensity (dubbed quantal size, lower is better).

    Assumes images are stationary from one timestep to the next. Uses it to calculate a
    measure of noise per bright intensity (which increases linearly given that imaging
    noise is poisson), fits a line to it and uses the slope as the estimate.

    :param np.array scan: 3-dimensional scan (image_height, image_width, num_frames).

    :returns: int minimum pixel value in the scan (that appears a min number of times)
    :returns: int maximum pixel value in the scan (that appears a min number of times)
    :returns: np.array pixel intensities used for the estimation.
    :returns: np.array noise variances used for the estimation.
    :returns: float the estimated quantal size
    :returns: float the estimated zero value
    """
    # Set some params
    num_frames = scan.shape[2]
    min_count = num_frames * 0.1  # pixel values with fewer appearances will be ignored
    max_acceptable_intensity = 3000  # pixel values higher than this will be ignored

    # Make sure field is at least 32 bytes (int16 overflows if summed to itself)
    scan = scan.astype(np.float32, copy=False)

    # Create pixel values at each position in field
    eps = 1e-4 # needed for np.round to not be biased towards even numbers (0.5 -> 1, 1.5 -> 2, 2.5 -> 3, etc.)
    pixels = np.round((scan[:, :, :-1] + scan[:, :, 1:]) / 2 + eps)
    pixels = pixels.astype(np.int16 if np.max(abs(pixels)) < 2 ** 15 else np.int32)

    # Compute a good range of pixel values (common, not too bright values)
    unique_pixels, counts = np.unique(pixels, return_counts=True)
    min_intensity = min(unique_pixels[counts > min_count])
    max_intensity = max(unique_pixels[counts > min_count])
    max_acceptable_intensity = min(max_intensity, max_acceptable_intensity)
    pixels_mask = np.logical_and(pixels >= min_intensity, pixels <= max_acceptable_intensity)

    # Select pixels in good range
    pixels = pixels[pixels_mask]
    unique_pixels, counts = np.unique(pixels, return_counts=True)

    # Compute noise variance
    variances = ((scan[:, :, :-1] - scan[:, :, 1:]) ** 2 / 2)[pixels_mask]
    pixels -= min_intensity
    variance_sum = np.zeros(len(unique_pixels)) # sum of variances per pixel value
    for i in range(0, len(pixels), int(1e8)):  # chunk it for memory efficiency
        variance_sum += np.bincount(pixels[i: i + int(1e8)], weights=variances[i: i + int(1e8)],
                                    minlength=len(unique_pixels))[unique_pixels - min_intensity]
    unique_variances = variance_sum / counts # average variance per intensity

    # Compute quantal size (by fitting a linear regressor to predict the variance from intensity)
    X = unique_pixels.reshape(-1, 1)
    y = unique_variances
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    quantal_size = model.coef_[0]
    zero_level = - model.intercept_ / model.coef_[0]

    return (min_intensity, max_intensity, unique_pixels, unique_variances,
           quantal_size, zero_level)
Exemple #4
0
def compute_quantal_size(scan):
    """ Estimate the unit change in calcium response corresponding to a unit change in
    pixel intensity (dubbed quantal size, lower is better).

    Assumes images are stationary from one timestep to the next. Uses it to calculate a
    measure of noise per bright intensity (which increases linearly given that imaging
    noise is poisson), fits a line to it and uses the slope as the estimate.

    :param np.array scan: 3-dimensional scan (image_height, image_width, num_frames).

    :returns: int minimum pixel value in the scan (that appears a min number of times)
    :returns: int maximum pixel value in the scan (that appears a min number of times)
    :returns: np.array pixel intensities used for the estimation.
    :returns: np.array noise variances used for the estimation.
    :returns: float the estimated quantal size
    :returns: float the estimated zero value
    """
    # Set some params
    num_frames = scan.shape[2]
    min_count = num_frames * 0.1  # pixel values with fewer appearances will be ignored
    max_acceptable_intensity = 3000  # pixel values higher than this will be ignored

    # Make sure field is at least 32 bytes (int16 overflows if summed to itself)
    scan = scan.astype(np.float32, copy=False)

    # Create pixel values at each position in field
    eps = 1e-4 # needed for np.round to not be biased towards even numbers (0.5 -> 1, 1.5 -> 2, 2.5 -> 3, etc.)
    pixels = np.round((scan[:, :, :-1] + scan[:, :, 1:]) / 2 + eps)
    pixels = pixels.astype(np.int16 if np.max(abs(pixels)) < 2 ** 15 else np.int32)

    # Compute a good range of pixel values (common, not too bright values)
    unique_pixels, counts = np.unique(pixels, return_counts=True)
    min_intensity = min(unique_pixels[counts > min_count])
    max_intensity = max(unique_pixels[counts > min_count])
    max_acceptable_intensity = min(max_intensity, max_acceptable_intensity)
    pixels_mask = np.logical_and(pixels >= min_intensity, pixels <= max_acceptable_intensity)

    # Select pixels in good range
    pixels = pixels[pixels_mask]
    unique_pixels, counts = np.unique(pixels, return_counts=True)

    # Compute noise variance
    variances = ((scan[:, :, :-1] - scan[:, :, 1:]) ** 2 / 2)[pixels_mask]
    pixels -= min_intensity
    variance_sum = np.zeros(len(unique_pixels)) # sum of variances per pixel value
    for i in range(0, len(pixels), int(1e8)):  # chunk it for memory efficiency
        variance_sum += np.bincount(pixels[i: i + int(1e8)], weights=variances[i: i + int(1e8)],
                                    minlength=len(unique_pixels))[unique_pixels - min_intensity]
    unique_variances = variance_sum / counts # average variance per intensity

    # Compute quantal size (by fitting a linear regressor to predict the variance from intensity)
    X = unique_pixels.reshape(-1, 1)
    y = unique_variances
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    quantal_size = model.coef_[0]
    zero_level = - model.intercept_ / model.coef_[0]

    return (min_intensity, max_intensity, unique_pixels, unique_variances,
           quantal_size, zero_level)
def calculate_scaling_params(events, kmer_mean_levels):
    events = pd.DataFrame(events)
    events['pos'] = events['move'].cumsum()
    jump_positions = events[events['move'] > 1]['pos']
    jump_positions = set(jump_positions - 1) | set(jump_positions)
    nonjump_positions = set(events['pos']) - jump_positions
    if len(nonjump_positions) < MINIMUM_NONJUMP_POSITIONS:
        return

    statelevels = []
    statelevels_jump = []
    for pos, posevents in events.groupby('pos'):
        state = posevents['model_state'].iloc[0]
        if '_' in state:
            continue

        medlevel = posevents['mean'].median()
        if pos in nonjump_positions:
            statelevels.append([medlevel, kmer_mean_levels[state]])
        else:
            statelevels_jump.append([medlevel, kmer_mean_levels[state]])

    statelevels_jump = np.array(statelevels_jump)
    statelevels = np.array(statelevels)
    regr = TheilSenRegressor(random_state=922)
    regr.fit(statelevels[:, 0][:, np.newaxis], statelevels[:, 1])

    return regr.coef_[0], regr.intercept_
Exemple #6
0
def robust_cor(x, y):
    if isinstance(x[0], list):
        x = list(map(list, zip(*x)))
    else:
        x = np.array(x).reshape(-1, 1)
    X = np.array(x)
    Y = np.array(y)
    theil_regr = TheilSenRegressor(random_state=42)
    theil_regr.fit(X, Y)
    y_pred = theil_regr.predict(X)
    res = y_pred - y
    tot_dev = y - np.mean(y)
    SSres = np.dot(res, res)
    SStot = np.dot(tot_dev, tot_dev)
    adjR2 = 1 - (SSres / SStot) * (X.shape[0] - 1) / (X.shape[0] - X.shape[1] -
                                                      1)
    sgn = np.sign(theil_regr.coef_)[0]
    if adjR2 > 0:
        corr_val = sgn * np.sqrt(adjR2)
    else:
        corr_val = 0
    return [
        corr_val, theil_regr.coef_, theil_regr.intercept_,
        theil_regr.breakdown_
    ]
def test_checksubparams_n_subsamples_if_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0)
    with pytest.raises(ValueError):
        theil_sen.fit(X, y)
Exemple #8
0
class Regressor(BaseEstimator):
    def __init__(self):

        self.regressorName="linear"
        if self.regressorName=="rf":
            self.clf= RandomForestRegressor(n_estimators=30, max_depth=63,max_features=50, n_jobs=-1)
        elif self.regressorName=="gb":

            self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls'
                                ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0
                                ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True)
            #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1)
            #self.clf=gb
        elif self.regressorName=="ridge":
            self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False)
        elif self.regressorName=="linear":
            self.clf = LinearRegression()
        elif self.regressorName=="lasso":
            self.clf = LassoCV(cv=10)
        elif self.regressorName=="svr":
             self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01)
        elif self.regressorName=="knn":
            self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1)
        elif self.regressorName=="gauss":
            self.clf = TheilSenRegressor()

    def fit(self, X, y):
        X=csc_matrix(X)
        print "Training Algorithm"
        self.clf.fit(X, y)
        #print self.clf.best_estimator_

    def predict(self, X):
        X=csr_matrix(X)
        print "Testing Algorithm"
        return self.clf.predict(X)

    def getRegressor(self):
        return self.clf

    def getRegressorName(self):
        return self.regressorName

    def getParamGrid(self):
        if self.regressorName=="rf":
            defaultGrid=[None]
            maxDepthGrid=np.arange(10,70,7)
            maxFeaturesGrid=["sqrt","log2",None]
            maxTreesGrid=np.arange(10,100,10)
            param_grid = {'max_features': defaultGrid}
        elif self.regressorName == "gb":
            #maxDepthGrid=np.arange(3,20,5)
            learningRateGrid=np.arange(50,100,10)
            #param_grid = {'max_depth': maxDepthGrid}
            #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']}
            param_grid={'alpha':[0.9]}
        return param_grid
Exemple #9
0
class Regressor(BaseEstimator):
    def __init__(self):
 
        self.regressorName="gb"
        if self.regressorName=="rf":
            self.clf= RandomForestRegressor(n_estimators=400, max_depth=63,max_features=50, n_jobs=-1)
        elif self.regressorName=="gb":
 
            self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls'
                                ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0
                                ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True)
            #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1)
            #self.clf=gb
        elif self.regressorName=="ridge":
            self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False)
        elif self.regressorName=="linear":
            self.clf = LinearRegression(alpha=0.01,max_iter=5000)
        elif self.regressorName=="lasso":
            self.clf = LassoCV(cv=10)
        elif self.regressorName=="svr":
             self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01)
        elif self.regressorName=="knn":
            self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1)
        elif self.regressorName=="gauss":
            self.clf = TheilSenRegressor()
 
    def fit(self, X, y):
        #X=csc_matrix(X)
        self.clf.fit(X, y)
        #print self.clf.best_estimator_
 
    def predict(self, X):
        #X=csr_matrix(X)
        return self.clf.predict(X)
 
    def getRegressor(self):
        return self.clf
 
    def getRegressorName(self):
        return self.regressorName
 
    def getParamGrid(self):
        if self.regressorName=="rf":
            defaultGrid=[None]
            maxDepthGrid=np.arange(10,70,7)
            maxFeaturesGrid=["sqrt","log2",None]
            maxTreesGrid=np.arange(10,100,10)
            param_grid = {'max_features': defaultGrid}
        elif self.regressorName == "gb":
            #maxDepthGrid=np.arange(3,20,5)
            learningRateGrid=np.arange(50,100,10)
            #param_grid = {'max_depth': maxDepthGrid}
            #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']}
            param_grid={'alpha':[0.9]}
        return param_grid
Exemple #10
0
def _fit_robust_line(shifts):
    """ Use a robust linear regression algorithm to fit a line to the data."""
    from sklearn.linear_model import TheilSenRegressor

    X = np.arange(len(shifts)).reshape(-1, 1)
    y = shifts
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    line = model.predict(X)

    return line
 def trainAlgo(self):
     self.model = TheilSenRegressor(
         fit_intercept=self.param['fit_intercept'],
         copy_X=self.param['copy_X'],
         max_subpopulation=self.param['max_subpopulation'],
         n_subsamples=self.param['n_subsamples'],
         max_iter=self.param['max_iter'],
         tol=self.param['tol'],
         random_state=self.param['random_state'],
         verbose=self.param['verbose'],
     )
     self.model.fit(self.inputData['X'], self.outputData['Y'])
Exemple #12
0
def test_theil_sen_1d_no_intercept():
    X, y, w, c = gen_toy_problem_1d(intercept=False)
    # Check that Least Squares fails
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert np.abs(lstq.coef_ - w - c) > 0.5
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w + c, 1)
    assert_almost_equal(theil_sen.intercept_, 0.0)

    # non-regression test for #18104
    theil_sen.score(X, y)
Exemple #13
0
def createTheilSenRegressor(params):
    info("Creating TheilSen Regressor", ind=4)

    ## Params
    params = mergeParams(TheilSenRegressor(), params)
    tuneParams = getTheilSenRegressorParams()
    info("Without Parameters", ind=4)

    ## estimator
    reg = TheilSenRegressor()

    return {"estimator": reg, "params": tuneParams}
def theilsen_regress_predict(var):
    """
    Input:-
    var: 1-D array var
    regressortype = LinearRegression, TheilSenRegressor

    Output: regression coefficient

    """
    regressor = TheilSenRegressor()
    y = np.asarray(var).reshape(-1, 1)
    X = np.arange(len(y)).reshape(-1, 1)
    regressor.fit(X, y)
    return regressor.predict(X)
Exemple #15
0
def _fit_theil_sen_one_track(x_coords_metres, y_coords_metres,
                             valid_times_unix_sec):
    """Fits Theil-Sen model for one storm track.

    P = number of points in track

    :param x_coords_metres: length-P numpy array of x-coordinates.
    :param y_coords_metres: length-P numpy array of y-coordinates.
    :param valid_times_unix_sec: length-P numpy array of times.
    :return: theil_sen_dict: Dictionary with the following keys.
    theil_sen_dict['x_intercept_metres']: x-intercept.
    theil_sen_dict['x_velocity_m_s01']: x-velocity (metres per second).
    theil_sen_dict['y_intercept_metres']: y-intercept.
    theil_sen_dict['y_velocity_m_s01']: y-velocity (metres per second).
    """

    num_points = len(x_coords_metres)
    valid_times_unix_sec = numpy.reshape(valid_times_unix_sec, (num_points, 1))

    model_object_for_x = TheilSenRegressor(fit_intercept=True)
    model_object_for_x.fit(valid_times_unix_sec, x_coords_metres)
    model_object_for_y = TheilSenRegressor(fit_intercept=True)
    model_object_for_y.fit(valid_times_unix_sec, y_coords_metres)

    return {
        X_INTERCEPT_KEY: model_object_for_x.intercept_,
        X_VELOCITY_KEY: model_object_for_x.coef_,
        Y_INTERCEPT_KEY: model_object_for_y.intercept_,
        Y_VELOCITY_KEY: model_object_for_y.coef_
    }
Exemple #16
0
def test_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    # Check that Theil-Sen falls back to Least Squares if fit_intercept=False
    theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y)
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
    # Check fit_intercept=True case. This will not be equal to the Least
    # Squares solution since the intercept is calculated differently.
    theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
    y_pred = theil_sen.predict(X)
    assert_array_almost_equal(y_pred, y, 12)
Exemple #17
0
class _TheilSenRegressorImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Exemple #18
0
 def __init__(self,
              fit_intercept=True,
              copy_X=True,
              max_subpopulation=1e4,
              n_subsamples=None,
              max_iter=300,
              tol=1.e-3,
              random_state=None,
              n_jobs=1,
              verbose=False):
     max_iter = int(max_iter)
     _TheilSenRegressor.__init__(self, fit_intercept, copy_X,
                                 max_subpopulation, n_subsamples, max_iter,
                                 tol, random_state, n_jobs, verbose)
     BaseWrapperReg.__init__(self)
def theilsen_regress_coeff(var, a):
    """
    Input:-
    var: 1-D array var
    a: 1-D array index
    regressortype = LinearRegression, TheilSenRegressor

    Output: regression coefficient

    """
    regressor = TheilSenRegressor()
    y = np.asarray(var).reshape(-1, 1)
    X = a.reshape(-1, 1)
    regressor.fit(X, y)
    return np.array([regressor.coef_])
def test_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    # Check that Theil-Sen falls back to Least Squares if fit_intercept=False
    theil_sen = TheilSenRegressor(fit_intercept=False,
                                  random_state=0).fit(X, y)
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
    # Check fit_intercept=True case. This will not be equal to the Least
    # Squares solution since the intercept is calculated differently.
    theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
    y_pred = theil_sen.predict(X)
    assert_array_almost_equal(y_pred, y, 12)
Exemple #21
0
    def _regress_a(X, y, robust, n_jobs):
        """
        Calculates the slope and intercept
        """

        if robust:
            model = TheilSenRegressor(n_jobs=n_jobs)
        else:
            model = LinearRegression(n_jobs=n_jobs)

        model.fit(X, y)

        slope_m = model.coef_[0]
        intercept_b = model.intercept_

        return slope_m, intercept_b
def get_best_degree(data):
    degrees = range(1, 6)

    errors = []

    degrees = list(degrees)

    for deg in degrees:
        reg = Pipeline([
            ("quad", PolynomialFeatures(degree=deg)),
            (
                "linear",
                TheilSenRegressor(max_subpopulation=50, max_iter=300),
            ),
        ])

        numDims = np.size(data, 1)

        X = data[:, 0:numDims - 1]  # noqa
        Y = data[:, numDims - 1]

        reg.fit(X, Y)

        out = reg.predict(X)

        Sr = np.sum(np.square(Y - out))

        errors.append(Sr)

    min_degree = degrees[np.argmin(errors)]

    return min_degree
def test_subsamples():
    X, y, w, c = gen_toy_problem_4d()
    theil_sen = TheilSenRegressor(n_subsamples=X.shape[0],
                                  random_state=0).fit(X, y)
    lstq = LinearRegression().fit(X, y)
    # Check for exact the same results as Least Squares
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
def log_log_robust_regression(cfs, y, kind=0):
    assert y.shape[0] == 40
    y = y.reshape(40, -1)
    x = np.tile(cfs[:, np.newaxis], (1, y.shape[1]))
    y = np.log(y).ravel()
    x = np.log(x).ravel()[:, np.newaxis]
    if kind == 0:
        model = RANSACRegressor()
    elif kind == 1:
        model = TheilSenRegressor(n_jobs=-1)
    elif kind == 2:
        model = HuberRegressor()
    else:
        raise ValueError
    model.fit(x, y)
    yp = model.predict(x)
    u = np.square(y - yp)
    v = np.square(y - y.mean())
    R2 = 1. - u / v
    if kind == 0:
        return model.estimator_.coef_, model.estimator_.intercept_, np.median(
            R2)
    elif kind in [1, 2]:
        return model.coef_, model.intercept_, np.median(R2)
    else:
        raise ValueError
def show():
    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 4, 5, 0]
    y = [0, 5, 9, 12, 13, 12, 9, 5, 0, 1, 0, 7]
    X = list(map(lambda x: [x], X))

    import pylab

    pylab.scatter(X, y)

    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.pipeline import make_pipeline
    import numpy as np
    from sklearn.linear_model import LinearRegression, TheilSenRegressor, HuberRegressor, RANSACRegressor

    for regressor in [
        [LinearRegression(), "linreg"],
        [TheilSenRegressor(), "theil-sen"],
        [HuberRegressor(), "huber"],
        [RANSACRegressor(), "ransac"],
    ]:
        model = make_pipeline(PolynomialFeatures(2), regressor[0])
        model.fit(X, y)

        print("")
        print(regressor[1])
        print(model.score(X, y))

        test_x = np.linspace(-1, 10, 100)
        test_y = []
        for x in test_x:
            test_y.append(model.predict([[x]])[0])

        pylab.plot(test_x, test_y, label=regressor[1])
    pylab.legend(loc="best")
    pylab.show()
Exemple #26
0
def underline_regression(x, y, method="ramp"):
    start_params = guess(x, y)
    if method == "ramp":
        reg = minimize(asymmetric_ramp_loss,
                       x0=start_params,
                       args=(x, y),
                       bounds=((None, None), (0, None)),
                       method="Powell")
    elif method == 'quadratic' or method == "parabolic":
        reg = ParabolicRegressor.regress(x, y)
        return reg
    elif method == "squashed":
        reg = minimize(squashed_loss,
                       x0=start_params,
                       jac=squashed_grad,
                       args=(x, y),
                       bounds=((None, None), (0, 1)),
                       method="L-BFGS-B")
    elif method == "median":
        y = y.reshape(-1, 1)
        X = np.vstack((np.ones(y.shape).transpose(), x.reshape(-1,
                                                               1).transpose()))
        reg = TheilSenRegressor(random_state=0).fit(X.transpose(), np.ravel(y))
        offset = np.min(subtract_bg(y, x, [reg.coef_[0], reg.coef_[1]]))
        return np.array([reg.coef_[0] + offset, reg.coef_[1]])
    elif method == "huber":
        reg = HubelRegressor.regress(x, y)
        return reg
    return (reg.x[0], reg.x[1])
Exemple #27
0
    def _cfunc_theilsen(x, y):
        """
        Get Theil-Sen regression score for data set.

        Args:
            x: (list<float>) independent property (x-axis)
            y: (list<float>) dependent property (y-axis)

        Returns: (float) Theil-Sen score

        """
        from sklearn.linear_model import TheilSenRegressor
        r = TheilSenRegressor(random_state=21)
        x_coeff = np.array(x)[:, np.newaxis]
        r.fit(x_coeff, y)
        return r.score(x_coeff, y)
Exemple #28
0
    def fit(self, smiles_list, logS_list):
        X = []
        y = []
        for i, smiles in enumerate(smiles_list):
            mol = Chem.MolFromSmiles(smiles)
            (mw, logp, rotors, ap) = self._calc_esol_descriptors(mol)
            X.append([mw, logp, rotors, ap])
            y.append(logS_list[i])

        if self.model == 'linear':
            model = LinearRegression()
        elif self.model == 'pls':
            model = PLSRegression(n_components=2)
        elif self.model == 'huber':
            model = HuberRegressor(epsilon=1.5, alpha=2.0)
        elif self.model == 'ts':
            logging.debug(f'Model: {self.model}')
            model = TheilSenRegressor()
        else:
            self.model = 'linear'
            model = LinearRegression()

        logging.debug(f'Model: {self.model}')

        model.fit(X, y)
        self._intercept = model.intercept_
        self._coef["MW"] = model.coef_[0]
        self._coef["LogP"] = model.coef_[1]
        self._coef["RB"] = model.coef_[2]
        self._coef["AP"] = model.coef_[3]
def regression(
    data,
    theilsen_max_iter=100,
    order="auto",
    threshold_multiplier=2,
):
    if order == "auto":
        order = get_best_degree(data)
    elif not isinstance(order, int):
        order = 1

    reg = Pipeline([
        ("quad", PolynomialFeatures(degree=order)),
        (
            "linear",
            TheilSenRegressor(max_subpopulation=50,
                              max_iter=theilsen_max_iter),
        ),
    ])

    numDims = np.size(data, 1)

    X = data[:, 0:numDims - 1]  # noqa
    Y = data[:, numDims - 1]

    inlier_mask = np.ones(np.size(data, 0), dtype=bool)

    mask_length = 0
    threshold = 0

    for _ in range(10):
        if mask_length == sum(inlier_mask):
            break
        else:
            mask_length = sum(inlier_mask)

        inlier_mask = inlier_mask.astype(bool)
        i_X = X[inlier_mask]
        i_Y = Y[inlier_mask]

        if i_X.shape[0] == 0:
            inlier_mask = inlier_mask.astype(int)
            break

        reg.fit(i_X, i_Y)
        ts = reg.predict(X)

        residuals = abs(ts - Y)

        inlier_residuals = abs(reg.predict(i_X) - i_Y)

        threshold = np.median(inlier_residuals)

        within = residuals < (threshold_multiplier * threshold)

        inlier_mask = within.astype(int)

    return reg, inlier_mask, threshold_multiplier * threshold, order
Exemple #30
0
    def fit(self, X, y, random_state=None):
        """
        Train ENOLS on the given training set.

        Parameters
        ----------
        X: an input array of shape (n_sample, n_features)
        y: an array of shape (n_sample,) containing the classes for the input examples

        Return
        ------
        self: the fitted model
        """

        # use random instead of np.random to sample random numbers below
        random = check_random_state(random_state)

        estimators = [('lr', LinearRegression())]

        if isinstance(self.sample_size, int):
            self.sample_size = 'reservoir_sampling'

        # add all the trained OLS models to this list
        self.estimators_lr, self.estimators_TSR, self.estimators_enols = [], [], []
        for i in range(self.n_estimators):
            samples = sample_without_replacement(n_population=random.choice([50, 100]),
                                                 n_samples=random.choice([10, 20]),
                                                 random_state=random_state, method=self.sample_size)

            X_train, y_train = [], []
            for i in samples:
                X_train.append(X[i]), y_train.append(y[i])

            reg = LinearRegression()
            reg.fit(np.array(X_train), np.array(y_train))

            tsr = TheilSenRegressor()
            tsr.fit(np.array(X_train), np.array(y_train))

            enol = StackingRegressor(estimators=estimators, final_estimator=LinearRegression())
            enol.fit(np.array(X_train), np.array(y_train))

            self.estimators_lr.append(reg), self.estimators_TSR.append(tsr), self.estimators_enols.append(enol)

        return self
def test_theil_sen_1d():
    X, y, w, c = gen_toy_problem_1d()
    # Check that Least Squares fails
    lstq = LinearRegression().fit(X, y)
    assert np.abs(lstq.coef_ - w) > 0.9
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w, 1)
    assert_array_almost_equal(theil_sen.intercept_, c, 1)
Exemple #32
0
def get_models():
    models = list()
    models.append(LinearRegression(fit_intercept=False))
    models.append(HuberRegressor(fit_intercept=False))
    #models.append(RANSACRegressor())#fit_intercept=False)) # Doesnt have option to not fit the intercept
    models.append(
        TheilSenRegressor(fit_intercept=False)
    )  # Strunggling a bit with this one as the output varies a lot given n_samples (if n_samples=1 then it returns the median of the ratio, if it equals the number of data points then it returns essentially the output of least square fitting)
    return models
Exemple #33
0
def test_theil_sen_2d():
    X, y, w, c = gen_toy_problem_2d()
    # Check that Least Squares fails
    lstq = LinearRegression().fit(X, y)
    assert norm(lstq.coef_ - w) > 1.0
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(max_subpopulation=1e3, random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w, 1)
    assert_array_almost_equal(theil_sen.intercept_, c, 1)
Exemple #34
0
    def __init__(self):

        self.regressorName="gb"
        if self.regressorName=="rf":
            self.clf= RandomForestRegressor(n_estimators=400, max_depth=63,max_features=50, n_jobs=-1)
        elif self.regressorName=="gb":

            self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls'
                                ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0
                                ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True)
            #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1)
            #self.clf=gb
        elif self.regressorName=="ridge":
            self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False)
        elif self.regressorName=="linear":
            self.clf = LinearRegression(alpha=0.01,max_iter=5000)
        elif self.regressorName=="lasso":
            self.clf = LassoCV(cv=10)
        elif self.regressorName=="svr":
             self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01)
        elif self.regressorName=="knn":
            self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1)
        elif self.regressorName=="gauss":
            self.clf = TheilSenRegressor()
def fit_TheilSen(features_train, labels_train, features_pred):
	model = TheilSenRegressor()
	model.fit(features_train, labels_train)
	labels_pred = model.predict(features_pred)
	print "TheilSen - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train)
	return labels_pred