Esempio n. 1
0
class Simple:
    def __init__(self, a, b, c, d):
        self.model = TheilSenRegressor()

    def update_a_b(self, x, y):
        self.model.fit(x.reshape(-1, 1), y)

    def set_c_d(self, c, d):
        pass

    def get_y(self, x):
        return self.model.predict(x.reshape(-1, 1))

    def get_likelihood(self, x, y):
        return 1 / float(x.shape[0]) * np.sum(np.abs(y - self.get_y(x)))

    def to_string(self):
        return "a:{}, b:{}".format(self.model.coef_, self.model.intercept_)

    def get_a_b(self):
        return self.model.coef_, self.model.intercept_

    @staticmethod
    def var_to_weight(v):
        return 1

    @staticmethod
    def get_c_d(x, r):
        return None, None
Esempio n. 2
0
def test_verbosity():
    X, y, w, c = gen_toy_problem_1d()
    # Check that Theil-Sen can be verbose
    with no_stdout_stderr():
        TheilSenRegressor(verbose=True, random_state=0).fit(X, y)
        TheilSenRegressor(verbose=True, max_subpopulation=10,
                          random_state=0).fit(X, y)
Esempio n. 3
0
def compute_quantal_size(scan):
    """ Estimate the unit change in calcium response corresponding to a unit change in
    pixel intensity (dubbed quantal size, lower is better).

    Assumes images are stationary from one timestep to the next. Uses it to calculate a
    measure of noise per bright intensity (which increases linearly given that imaging
    noise is poisson), fits a line to it and uses the slope as the estimate.

    :param np.array scan: 3-dimensional scan (image_height, image_width, num_frames).

    :returns: int minimum pixel value in the scan (that appears a min number of times)
    :returns: int maximum pixel value in the scan (that appears a min number of times)
    :returns: np.array pixel intensities used for the estimation.
    :returns: np.array noise variances used for the estimation.
    :returns: float the estimated quantal size
    :returns: float the estimated zero value
    """
    # Set some params
    num_frames = scan.shape[2]
    min_count = num_frames * 0.1  # pixel values with fewer appearances will be ignored
    max_acceptable_intensity = 3000  # pixel values higher than this will be ignored

    # Make sure field is at least 32 bytes (int16 overflows if summed to itself)
    scan = scan.astype(np.float32, copy=False)

    # Create pixel values at each position in field
    eps = 1e-4 # needed for np.round to not be biased towards even numbers (0.5 -> 1, 1.5 -> 2, 2.5 -> 3, etc.)
    pixels = np.round((scan[:, :, :-1] + scan[:, :, 1:]) / 2 + eps)
    pixels = pixels.astype(np.int16 if np.max(abs(pixels)) < 2 ** 15 else np.int32)

    # Compute a good range of pixel values (common, not too bright values)
    unique_pixels, counts = np.unique(pixels, return_counts=True)
    min_intensity = min(unique_pixels[counts > min_count])
    max_intensity = max(unique_pixels[counts > min_count])
    max_acceptable_intensity = min(max_intensity, max_acceptable_intensity)
    pixels_mask = np.logical_and(pixels >= min_intensity, pixels <= max_acceptable_intensity)

    # Select pixels in good range
    pixels = pixels[pixels_mask]
    unique_pixels, counts = np.unique(pixels, return_counts=True)

    # Compute noise variance
    variances = ((scan[:, :, :-1] - scan[:, :, 1:]) ** 2 / 2)[pixels_mask]
    pixels -= min_intensity
    variance_sum = np.zeros(len(unique_pixels)) # sum of variances per pixel value
    for i in range(0, len(pixels), int(1e8)):  # chunk it for memory efficiency
        variance_sum += np.bincount(pixels[i: i + int(1e8)], weights=variances[i: i + int(1e8)],
                                    minlength=len(unique_pixels))[unique_pixels - min_intensity]
    unique_variances = variance_sum / counts # average variance per intensity

    # Compute quantal size (by fitting a linear regressor to predict the variance from intensity)
    X = unique_pixels.reshape(-1, 1)
    y = unique_variances
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    quantal_size = model.coef_[0]
    zero_level = - model.intercept_ / model.coef_[0]

    return (min_intensity, max_intensity, unique_pixels, unique_variances,
           quantal_size, zero_level)
Esempio n. 4
0
def compute_quantal_size(scan):
    """ Estimate the unit change in calcium response corresponding to a unit change in
    pixel intensity (dubbed quantal size, lower is better).

    Assumes images are stationary from one timestep to the next. Uses it to calculate a
    measure of noise per bright intensity (which increases linearly given that imaging
    noise is poisson), fits a line to it and uses the slope as the estimate.

    :param np.array scan: 3-dimensional scan (image_height, image_width, num_frames).

    :returns: int minimum pixel value in the scan (that appears a min number of times)
    :returns: int maximum pixel value in the scan (that appears a min number of times)
    :returns: np.array pixel intensities used for the estimation.
    :returns: np.array noise variances used for the estimation.
    :returns: float the estimated quantal size
    :returns: float the estimated zero value
    """
    # Set some params
    num_frames = scan.shape[2]
    min_count = num_frames * 0.1  # pixel values with fewer appearances will be ignored
    max_acceptable_intensity = 3000  # pixel values higher than this will be ignored

    # Make sure field is at least 32 bytes (int16 overflows if summed to itself)
    scan = scan.astype(np.float32, copy=False)

    # Create pixel values at each position in field
    eps = 1e-4 # needed for np.round to not be biased towards even numbers (0.5 -> 1, 1.5 -> 2, 2.5 -> 3, etc.)
    pixels = np.round((scan[:, :, :-1] + scan[:, :, 1:]) / 2 + eps)
    pixels = pixels.astype(np.int16 if np.max(abs(pixels)) < 2 ** 15 else np.int32)

    # Compute a good range of pixel values (common, not too bright values)
    unique_pixels, counts = np.unique(pixels, return_counts=True)
    min_intensity = min(unique_pixels[counts > min_count])
    max_intensity = max(unique_pixels[counts > min_count])
    max_acceptable_intensity = min(max_intensity, max_acceptable_intensity)
    pixels_mask = np.logical_and(pixels >= min_intensity, pixels <= max_acceptable_intensity)

    # Select pixels in good range
    pixels = pixels[pixels_mask]
    unique_pixels, counts = np.unique(pixels, return_counts=True)

    # Compute noise variance
    variances = ((scan[:, :, :-1] - scan[:, :, 1:]) ** 2 / 2)[pixels_mask]
    pixels -= min_intensity
    variance_sum = np.zeros(len(unique_pixels)) # sum of variances per pixel value
    for i in range(0, len(pixels), int(1e8)):  # chunk it for memory efficiency
        variance_sum += np.bincount(pixels[i: i + int(1e8)], weights=variances[i: i + int(1e8)],
                                    minlength=len(unique_pixels))[unique_pixels - min_intensity]
    unique_variances = variance_sum / counts # average variance per intensity

    # Compute quantal size (by fitting a linear regressor to predict the variance from intensity)
    X = unique_pixels.reshape(-1, 1)
    y = unique_variances
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    quantal_size = model.coef_[0]
    zero_level = - model.intercept_ / model.coef_[0]

    return (min_intensity, max_intensity, unique_pixels, unique_variances,
           quantal_size, zero_level)
def calculate_scaling_params(events, kmer_mean_levels):
    events = pd.DataFrame(events)
    events['pos'] = events['move'].cumsum()
    jump_positions = events[events['move'] > 1]['pos']
    jump_positions = set(jump_positions - 1) | set(jump_positions)
    nonjump_positions = set(events['pos']) - jump_positions
    if len(nonjump_positions) < MINIMUM_NONJUMP_POSITIONS:
        return

    statelevels = []
    statelevels_jump = []
    for pos, posevents in events.groupby('pos'):
        state = posevents['model_state'].iloc[0]
        if '_' in state:
            continue

        medlevel = posevents['mean'].median()
        if pos in nonjump_positions:
            statelevels.append([medlevel, kmer_mean_levels[state]])
        else:
            statelevels_jump.append([medlevel, kmer_mean_levels[state]])

    statelevels_jump = np.array(statelevels_jump)
    statelevels = np.array(statelevels)
    regr = TheilSenRegressor(random_state=922)
    regr.fit(statelevels[:, 0][:, np.newaxis], statelevels[:, 1])

    return regr.coef_[0], regr.intercept_
Esempio n. 6
0
def robust_cor(x, y):
    if isinstance(x[0], list):
        x = list(map(list, zip(*x)))
    else:
        x = np.array(x).reshape(-1, 1)
    X = np.array(x)
    Y = np.array(y)
    theil_regr = TheilSenRegressor(random_state=42)
    theil_regr.fit(X, Y)
    y_pred = theil_regr.predict(X)
    res = y_pred - y
    tot_dev = y - np.mean(y)
    SSres = np.dot(res, res)
    SStot = np.dot(tot_dev, tot_dev)
    adjR2 = 1 - (SSres / SStot) * (X.shape[0] - 1) / (X.shape[0] - X.shape[1] -
                                                      1)
    sgn = np.sign(theil_regr.coef_)[0]
    if adjR2 > 0:
        corr_val = sgn * np.sqrt(adjR2)
    else:
        corr_val = 0
    return [
        corr_val, theil_regr.coef_, theil_regr.intercept_,
        theil_regr.breakdown_
    ]
Esempio n. 7
0
def test_checksubparams_n_subsamples_if_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0)
    with pytest.raises(ValueError):
        theil_sen.fit(X, y)
Esempio n. 8
0
class Regressor(BaseEstimator):
    def __init__(self):

        self.regressorName="linear"
        if self.regressorName=="rf":
            self.clf= RandomForestRegressor(n_estimators=30, max_depth=63,max_features=50, n_jobs=-1)
        elif self.regressorName=="gb":

            self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls'
                                ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0
                                ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True)
            #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1)
            #self.clf=gb
        elif self.regressorName=="ridge":
            self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False)
        elif self.regressorName=="linear":
            self.clf = LinearRegression()
        elif self.regressorName=="lasso":
            self.clf = LassoCV(cv=10)
        elif self.regressorName=="svr":
             self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01)
        elif self.regressorName=="knn":
            self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1)
        elif self.regressorName=="gauss":
            self.clf = TheilSenRegressor()

    def fit(self, X, y):
        X=csc_matrix(X)
        print "Training Algorithm"
        self.clf.fit(X, y)
        #print self.clf.best_estimator_

    def predict(self, X):
        X=csr_matrix(X)
        print "Testing Algorithm"
        return self.clf.predict(X)

    def getRegressor(self):
        return self.clf

    def getRegressorName(self):
        return self.regressorName

    def getParamGrid(self):
        if self.regressorName=="rf":
            defaultGrid=[None]
            maxDepthGrid=np.arange(10,70,7)
            maxFeaturesGrid=["sqrt","log2",None]
            maxTreesGrid=np.arange(10,100,10)
            param_grid = {'max_features': defaultGrid}
        elif self.regressorName == "gb":
            #maxDepthGrid=np.arange(3,20,5)
            learningRateGrid=np.arange(50,100,10)
            #param_grid = {'max_depth': maxDepthGrid}
            #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']}
            param_grid={'alpha':[0.9]}
        return param_grid
Esempio n. 9
0
class Regressor(BaseEstimator):
    def __init__(self):
 
        self.regressorName="gb"
        if self.regressorName=="rf":
            self.clf= RandomForestRegressor(n_estimators=400, max_depth=63,max_features=50, n_jobs=-1)
        elif self.regressorName=="gb":
 
            self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls'
                                ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0
                                ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True)
            #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1)
            #self.clf=gb
        elif self.regressorName=="ridge":
            self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False)
        elif self.regressorName=="linear":
            self.clf = LinearRegression(alpha=0.01,max_iter=5000)
        elif self.regressorName=="lasso":
            self.clf = LassoCV(cv=10)
        elif self.regressorName=="svr":
             self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01)
        elif self.regressorName=="knn":
            self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1)
        elif self.regressorName=="gauss":
            self.clf = TheilSenRegressor()
 
    def fit(self, X, y):
        #X=csc_matrix(X)
        self.clf.fit(X, y)
        #print self.clf.best_estimator_
 
    def predict(self, X):
        #X=csr_matrix(X)
        return self.clf.predict(X)
 
    def getRegressor(self):
        return self.clf
 
    def getRegressorName(self):
        return self.regressorName
 
    def getParamGrid(self):
        if self.regressorName=="rf":
            defaultGrid=[None]
            maxDepthGrid=np.arange(10,70,7)
            maxFeaturesGrid=["sqrt","log2",None]
            maxTreesGrid=np.arange(10,100,10)
            param_grid = {'max_features': defaultGrid}
        elif self.regressorName == "gb":
            #maxDepthGrid=np.arange(3,20,5)
            learningRateGrid=np.arange(50,100,10)
            #param_grid = {'max_depth': maxDepthGrid}
            #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']}
            param_grid={'alpha':[0.9]}
        return param_grid
Esempio n. 10
0
def _fit_robust_line(shifts):
    """ Use a robust linear regression algorithm to fit a line to the data."""
    from sklearn.linear_model import TheilSenRegressor

    X = np.arange(len(shifts)).reshape(-1, 1)
    y = shifts
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    line = model.predict(X)

    return line
 def trainAlgo(self):
     self.model = TheilSenRegressor(
         fit_intercept=self.param['fit_intercept'],
         copy_X=self.param['copy_X'],
         max_subpopulation=self.param['max_subpopulation'],
         n_subsamples=self.param['n_subsamples'],
         max_iter=self.param['max_iter'],
         tol=self.param['tol'],
         random_state=self.param['random_state'],
         verbose=self.param['verbose'],
     )
     self.model.fit(self.inputData['X'], self.outputData['Y'])
Esempio n. 12
0
def test_theil_sen_1d_no_intercept():
    X, y, w, c = gen_toy_problem_1d(intercept=False)
    # Check that Least Squares fails
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert np.abs(lstq.coef_ - w - c) > 0.5
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w + c, 1)
    assert_almost_equal(theil_sen.intercept_, 0.0)

    # non-regression test for #18104
    theil_sen.score(X, y)
Esempio n. 13
0
def createTheilSenRegressor(params):
    info("Creating TheilSen Regressor", ind=4)

    ## Params
    params = mergeParams(TheilSenRegressor(), params)
    tuneParams = getTheilSenRegressorParams()
    info("Without Parameters", ind=4)

    ## estimator
    reg = TheilSenRegressor()

    return {"estimator": reg, "params": tuneParams}
Esempio n. 14
0
def theilsen_regress_predict(var):
    """
    Input:-
    var: 1-D array var
    regressortype = LinearRegression, TheilSenRegressor

    Output: regression coefficient

    """
    regressor = TheilSenRegressor()
    y = np.asarray(var).reshape(-1, 1)
    X = np.arange(len(y)).reshape(-1, 1)
    regressor.fit(X, y)
    return regressor.predict(X)
Esempio n. 15
0
def _fit_theil_sen_one_track(x_coords_metres, y_coords_metres,
                             valid_times_unix_sec):
    """Fits Theil-Sen model for one storm track.

    P = number of points in track

    :param x_coords_metres: length-P numpy array of x-coordinates.
    :param y_coords_metres: length-P numpy array of y-coordinates.
    :param valid_times_unix_sec: length-P numpy array of times.
    :return: theil_sen_dict: Dictionary with the following keys.
    theil_sen_dict['x_intercept_metres']: x-intercept.
    theil_sen_dict['x_velocity_m_s01']: x-velocity (metres per second).
    theil_sen_dict['y_intercept_metres']: y-intercept.
    theil_sen_dict['y_velocity_m_s01']: y-velocity (metres per second).
    """

    num_points = len(x_coords_metres)
    valid_times_unix_sec = numpy.reshape(valid_times_unix_sec, (num_points, 1))

    model_object_for_x = TheilSenRegressor(fit_intercept=True)
    model_object_for_x.fit(valid_times_unix_sec, x_coords_metres)
    model_object_for_y = TheilSenRegressor(fit_intercept=True)
    model_object_for_y.fit(valid_times_unix_sec, y_coords_metres)

    return {
        X_INTERCEPT_KEY: model_object_for_x.intercept_,
        X_VELOCITY_KEY: model_object_for_x.coef_,
        Y_INTERCEPT_KEY: model_object_for_y.intercept_,
        Y_VELOCITY_KEY: model_object_for_y.coef_
    }
Esempio n. 16
0
def test_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    # Check that Theil-Sen falls back to Least Squares if fit_intercept=False
    theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y)
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
    # Check fit_intercept=True case. This will not be equal to the Least
    # Squares solution since the intercept is calculated differently.
    theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
    y_pred = theil_sen.predict(X)
    assert_array_almost_equal(y_pred, y, 12)
Esempio n. 17
0
class _TheilSenRegressorImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Esempio n. 18
0
 def __init__(self,
              fit_intercept=True,
              copy_X=True,
              max_subpopulation=1e4,
              n_subsamples=None,
              max_iter=300,
              tol=1.e-3,
              random_state=None,
              n_jobs=1,
              verbose=False):
     max_iter = int(max_iter)
     _TheilSenRegressor.__init__(self, fit_intercept, copy_X,
                                 max_subpopulation, n_subsamples, max_iter,
                                 tol, random_state, n_jobs, verbose)
     BaseWrapperReg.__init__(self)
Esempio n. 19
0
def theilsen_regress_coeff(var, a):
    """
    Input:-
    var: 1-D array var
    a: 1-D array index
    regressortype = LinearRegression, TheilSenRegressor

    Output: regression coefficient

    """
    regressor = TheilSenRegressor()
    y = np.asarray(var).reshape(-1, 1)
    X = a.reshape(-1, 1)
    regressor.fit(X, y)
    return np.array([regressor.coef_])
def test_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    # Check that Theil-Sen falls back to Least Squares if fit_intercept=False
    theil_sen = TheilSenRegressor(fit_intercept=False,
                                  random_state=0).fit(X, y)
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
    # Check fit_intercept=True case. This will not be equal to the Least
    # Squares solution since the intercept is calculated differently.
    theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
    y_pred = theil_sen.predict(X)
    assert_array_almost_equal(y_pred, y, 12)
Esempio n. 21
0
    def _regress_a(X, y, robust, n_jobs):
        """
        Calculates the slope and intercept
        """

        if robust:
            model = TheilSenRegressor(n_jobs=n_jobs)
        else:
            model = LinearRegression(n_jobs=n_jobs)

        model.fit(X, y)

        slope_m = model.coef_[0]
        intercept_b = model.intercept_

        return slope_m, intercept_b
def get_best_degree(data):
    degrees = range(1, 6)

    errors = []

    degrees = list(degrees)

    for deg in degrees:
        reg = Pipeline([
            ("quad", PolynomialFeatures(degree=deg)),
            (
                "linear",
                TheilSenRegressor(max_subpopulation=50, max_iter=300),
            ),
        ])

        numDims = np.size(data, 1)

        X = data[:, 0:numDims - 1]  # noqa
        Y = data[:, numDims - 1]

        reg.fit(X, Y)

        out = reg.predict(X)

        Sr = np.sum(np.square(Y - out))

        errors.append(Sr)

    min_degree = degrees[np.argmin(errors)]

    return min_degree
Esempio n. 23
0
def test_subsamples():
    X, y, w, c = gen_toy_problem_4d()
    theil_sen = TheilSenRegressor(n_subsamples=X.shape[0],
                                  random_state=0).fit(X, y)
    lstq = LinearRegression().fit(X, y)
    # Check for exact the same results as Least Squares
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
Esempio n. 24
0
def log_log_robust_regression(cfs, y, kind=0):
    assert y.shape[0] == 40
    y = y.reshape(40, -1)
    x = np.tile(cfs[:, np.newaxis], (1, y.shape[1]))
    y = np.log(y).ravel()
    x = np.log(x).ravel()[:, np.newaxis]
    if kind == 0:
        model = RANSACRegressor()
    elif kind == 1:
        model = TheilSenRegressor(n_jobs=-1)
    elif kind == 2:
        model = HuberRegressor()
    else:
        raise ValueError
    model.fit(x, y)
    yp = model.predict(x)
    u = np.square(y - yp)
    v = np.square(y - y.mean())
    R2 = 1. - u / v
    if kind == 0:
        return model.estimator_.coef_, model.estimator_.intercept_, np.median(
            R2)
    elif kind in [1, 2]:
        return model.coef_, model.intercept_, np.median(R2)
    else:
        raise ValueError
Esempio n. 25
0
def show():
    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 4, 5, 0]
    y = [0, 5, 9, 12, 13, 12, 9, 5, 0, 1, 0, 7]
    X = list(map(lambda x: [x], X))

    import pylab

    pylab.scatter(X, y)

    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.pipeline import make_pipeline
    import numpy as np
    from sklearn.linear_model import LinearRegression, TheilSenRegressor, HuberRegressor, RANSACRegressor

    for regressor in [
        [LinearRegression(), "linreg"],
        [TheilSenRegressor(), "theil-sen"],
        [HuberRegressor(), "huber"],
        [RANSACRegressor(), "ransac"],
    ]:
        model = make_pipeline(PolynomialFeatures(2), regressor[0])
        model.fit(X, y)

        print("")
        print(regressor[1])
        print(model.score(X, y))

        test_x = np.linspace(-1, 10, 100)
        test_y = []
        for x in test_x:
            test_y.append(model.predict([[x]])[0])

        pylab.plot(test_x, test_y, label=regressor[1])
    pylab.legend(loc="best")
    pylab.show()
Esempio n. 26
0
def underline_regression(x, y, method="ramp"):
    start_params = guess(x, y)
    if method == "ramp":
        reg = minimize(asymmetric_ramp_loss,
                       x0=start_params,
                       args=(x, y),
                       bounds=((None, None), (0, None)),
                       method="Powell")
    elif method == 'quadratic' or method == "parabolic":
        reg = ParabolicRegressor.regress(x, y)
        return reg
    elif method == "squashed":
        reg = minimize(squashed_loss,
                       x0=start_params,
                       jac=squashed_grad,
                       args=(x, y),
                       bounds=((None, None), (0, 1)),
                       method="L-BFGS-B")
    elif method == "median":
        y = y.reshape(-1, 1)
        X = np.vstack((np.ones(y.shape).transpose(), x.reshape(-1,
                                                               1).transpose()))
        reg = TheilSenRegressor(random_state=0).fit(X.transpose(), np.ravel(y))
        offset = np.min(subtract_bg(y, x, [reg.coef_[0], reg.coef_[1]]))
        return np.array([reg.coef_[0] + offset, reg.coef_[1]])
    elif method == "huber":
        reg = HubelRegressor.regress(x, y)
        return reg
    return (reg.x[0], reg.x[1])
Esempio n. 27
0
    def _cfunc_theilsen(x, y):
        """
        Get Theil-Sen regression score for data set.

        Args:
            x: (list<float>) independent property (x-axis)
            y: (list<float>) dependent property (y-axis)

        Returns: (float) Theil-Sen score

        """
        from sklearn.linear_model import TheilSenRegressor
        r = TheilSenRegressor(random_state=21)
        x_coeff = np.array(x)[:, np.newaxis]
        r.fit(x_coeff, y)
        return r.score(x_coeff, y)
Esempio n. 28
0
    def fit(self, smiles_list, logS_list):
        X = []
        y = []
        for i, smiles in enumerate(smiles_list):
            mol = Chem.MolFromSmiles(smiles)
            (mw, logp, rotors, ap) = self._calc_esol_descriptors(mol)
            X.append([mw, logp, rotors, ap])
            y.append(logS_list[i])

        if self.model == 'linear':
            model = LinearRegression()
        elif self.model == 'pls':
            model = PLSRegression(n_components=2)
        elif self.model == 'huber':
            model = HuberRegressor(epsilon=1.5, alpha=2.0)
        elif self.model == 'ts':
            logging.debug(f'Model: {self.model}')
            model = TheilSenRegressor()
        else:
            self.model = 'linear'
            model = LinearRegression()

        logging.debug(f'Model: {self.model}')

        model.fit(X, y)
        self._intercept = model.intercept_
        self._coef["MW"] = model.coef_[0]
        self._coef["LogP"] = model.coef_[1]
        self._coef["RB"] = model.coef_[2]
        self._coef["AP"] = model.coef_[3]
def regression(
    data,
    theilsen_max_iter=100,
    order="auto",
    threshold_multiplier=2,
):
    if order == "auto":
        order = get_best_degree(data)
    elif not isinstance(order, int):
        order = 1

    reg = Pipeline([
        ("quad", PolynomialFeatures(degree=order)),
        (
            "linear",
            TheilSenRegressor(max_subpopulation=50,
                              max_iter=theilsen_max_iter),
        ),
    ])

    numDims = np.size(data, 1)

    X = data[:, 0:numDims - 1]  # noqa
    Y = data[:, numDims - 1]

    inlier_mask = np.ones(np.size(data, 0), dtype=bool)

    mask_length = 0
    threshold = 0

    for _ in range(10):
        if mask_length == sum(inlier_mask):
            break
        else:
            mask_length = sum(inlier_mask)

        inlier_mask = inlier_mask.astype(bool)
        i_X = X[inlier_mask]
        i_Y = Y[inlier_mask]

        if i_X.shape[0] == 0:
            inlier_mask = inlier_mask.astype(int)
            break

        reg.fit(i_X, i_Y)
        ts = reg.predict(X)

        residuals = abs(ts - Y)

        inlier_residuals = abs(reg.predict(i_X) - i_Y)

        threshold = np.median(inlier_residuals)

        within = residuals < (threshold_multiplier * threshold)

        inlier_mask = within.astype(int)

    return reg, inlier_mask, threshold_multiplier * threshold, order
Esempio n. 30
0
    def fit(self, X, y, random_state=None):
        """
        Train ENOLS on the given training set.

        Parameters
        ----------
        X: an input array of shape (n_sample, n_features)
        y: an array of shape (n_sample,) containing the classes for the input examples

        Return
        ------
        self: the fitted model
        """

        # use random instead of np.random to sample random numbers below
        random = check_random_state(random_state)

        estimators = [('lr', LinearRegression())]

        if isinstance(self.sample_size, int):
            self.sample_size = 'reservoir_sampling'

        # add all the trained OLS models to this list
        self.estimators_lr, self.estimators_TSR, self.estimators_enols = [], [], []
        for i in range(self.n_estimators):
            samples = sample_without_replacement(n_population=random.choice([50, 100]),
                                                 n_samples=random.choice([10, 20]),
                                                 random_state=random_state, method=self.sample_size)

            X_train, y_train = [], []
            for i in samples:
                X_train.append(X[i]), y_train.append(y[i])

            reg = LinearRegression()
            reg.fit(np.array(X_train), np.array(y_train))

            tsr = TheilSenRegressor()
            tsr.fit(np.array(X_train), np.array(y_train))

            enol = StackingRegressor(estimators=estimators, final_estimator=LinearRegression())
            enol.fit(np.array(X_train), np.array(y_train))

            self.estimators_lr.append(reg), self.estimators_TSR.append(tsr), self.estimators_enols.append(enol)

        return self
Esempio n. 31
0
def test_theil_sen_1d():
    X, y, w, c = gen_toy_problem_1d()
    # Check that Least Squares fails
    lstq = LinearRegression().fit(X, y)
    assert np.abs(lstq.coef_ - w) > 0.9
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w, 1)
    assert_array_almost_equal(theil_sen.intercept_, c, 1)
Esempio n. 32
0
def get_models():
    models = list()
    models.append(LinearRegression(fit_intercept=False))
    models.append(HuberRegressor(fit_intercept=False))
    #models.append(RANSACRegressor())#fit_intercept=False)) # Doesnt have option to not fit the intercept
    models.append(
        TheilSenRegressor(fit_intercept=False)
    )  # Strunggling a bit with this one as the output varies a lot given n_samples (if n_samples=1 then it returns the median of the ratio, if it equals the number of data points then it returns essentially the output of least square fitting)
    return models
Esempio n. 33
0
def test_theil_sen_2d():
    X, y, w, c = gen_toy_problem_2d()
    # Check that Least Squares fails
    lstq = LinearRegression().fit(X, y)
    assert norm(lstq.coef_ - w) > 1.0
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(max_subpopulation=1e3, random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w, 1)
    assert_array_almost_equal(theil_sen.intercept_, c, 1)
Esempio n. 34
0
    def __init__(self):

        self.regressorName="gb"
        if self.regressorName=="rf":
            self.clf= RandomForestRegressor(n_estimators=400, max_depth=63,max_features=50, n_jobs=-1)
        elif self.regressorName=="gb":

            self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls'
                                ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0
                                ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True)
            #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1)
            #self.clf=gb
        elif self.regressorName=="ridge":
            self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False)
        elif self.regressorName=="linear":
            self.clf = LinearRegression(alpha=0.01,max_iter=5000)
        elif self.regressorName=="lasso":
            self.clf = LassoCV(cv=10)
        elif self.regressorName=="svr":
             self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01)
        elif self.regressorName=="knn":
            self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1)
        elif self.regressorName=="gauss":
            self.clf = TheilSenRegressor()
Esempio n. 35
0
def fit_TheilSen(features_train, labels_train, features_pred):
	model = TheilSenRegressor()
	model.fit(features_train, labels_train)
	labels_pred = model.predict(features_pred)
	print "TheilSen - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train)
	return labels_pred