class Simple: def __init__(self, a, b, c, d): self.model = TheilSenRegressor() def update_a_b(self, x, y): self.model.fit(x.reshape(-1, 1), y) def set_c_d(self, c, d): pass def get_y(self, x): return self.model.predict(x.reshape(-1, 1)) def get_likelihood(self, x, y): return 1 / float(x.shape[0]) * np.sum(np.abs(y - self.get_y(x))) def to_string(self): return "a:{}, b:{}".format(self.model.coef_, self.model.intercept_) def get_a_b(self): return self.model.coef_, self.model.intercept_ @staticmethod def var_to_weight(v): return 1 @staticmethod def get_c_d(x, r): return None, None
def test_verbosity(): X, y, w, c = gen_toy_problem_1d() # Check that Theil-Sen can be verbose with no_stdout_stderr(): TheilSenRegressor(verbose=True, random_state=0).fit(X, y) TheilSenRegressor(verbose=True, max_subpopulation=10, random_state=0).fit(X, y)
def compute_quantal_size(scan): """ Estimate the unit change in calcium response corresponding to a unit change in pixel intensity (dubbed quantal size, lower is better). Assumes images are stationary from one timestep to the next. Uses it to calculate a measure of noise per bright intensity (which increases linearly given that imaging noise is poisson), fits a line to it and uses the slope as the estimate. :param np.array scan: 3-dimensional scan (image_height, image_width, num_frames). :returns: int minimum pixel value in the scan (that appears a min number of times) :returns: int maximum pixel value in the scan (that appears a min number of times) :returns: np.array pixel intensities used for the estimation. :returns: np.array noise variances used for the estimation. :returns: float the estimated quantal size :returns: float the estimated zero value """ # Set some params num_frames = scan.shape[2] min_count = num_frames * 0.1 # pixel values with fewer appearances will be ignored max_acceptable_intensity = 3000 # pixel values higher than this will be ignored # Make sure field is at least 32 bytes (int16 overflows if summed to itself) scan = scan.astype(np.float32, copy=False) # Create pixel values at each position in field eps = 1e-4 # needed for np.round to not be biased towards even numbers (0.5 -> 1, 1.5 -> 2, 2.5 -> 3, etc.) pixels = np.round((scan[:, :, :-1] + scan[:, :, 1:]) / 2 + eps) pixels = pixels.astype(np.int16 if np.max(abs(pixels)) < 2 ** 15 else np.int32) # Compute a good range of pixel values (common, not too bright values) unique_pixels, counts = np.unique(pixels, return_counts=True) min_intensity = min(unique_pixels[counts > min_count]) max_intensity = max(unique_pixels[counts > min_count]) max_acceptable_intensity = min(max_intensity, max_acceptable_intensity) pixels_mask = np.logical_and(pixels >= min_intensity, pixels <= max_acceptable_intensity) # Select pixels in good range pixels = pixels[pixels_mask] unique_pixels, counts = np.unique(pixels, return_counts=True) # Compute noise variance variances = ((scan[:, :, :-1] - scan[:, :, 1:]) ** 2 / 2)[pixels_mask] pixels -= min_intensity variance_sum = np.zeros(len(unique_pixels)) # sum of variances per pixel value for i in range(0, len(pixels), int(1e8)): # chunk it for memory efficiency variance_sum += np.bincount(pixels[i: i + int(1e8)], weights=variances[i: i + int(1e8)], minlength=len(unique_pixels))[unique_pixels - min_intensity] unique_variances = variance_sum / counts # average variance per intensity # Compute quantal size (by fitting a linear regressor to predict the variance from intensity) X = unique_pixels.reshape(-1, 1) y = unique_variances model = TheilSenRegressor() # robust regression model.fit(X, y) quantal_size = model.coef_[0] zero_level = - model.intercept_ / model.coef_[0] return (min_intensity, max_intensity, unique_pixels, unique_variances, quantal_size, zero_level)
def calculate_scaling_params(events, kmer_mean_levels): events = pd.DataFrame(events) events['pos'] = events['move'].cumsum() jump_positions = events[events['move'] > 1]['pos'] jump_positions = set(jump_positions - 1) | set(jump_positions) nonjump_positions = set(events['pos']) - jump_positions if len(nonjump_positions) < MINIMUM_NONJUMP_POSITIONS: return statelevels = [] statelevels_jump = [] for pos, posevents in events.groupby('pos'): state = posevents['model_state'].iloc[0] if '_' in state: continue medlevel = posevents['mean'].median() if pos in nonjump_positions: statelevels.append([medlevel, kmer_mean_levels[state]]) else: statelevels_jump.append([medlevel, kmer_mean_levels[state]]) statelevels_jump = np.array(statelevels_jump) statelevels = np.array(statelevels) regr = TheilSenRegressor(random_state=922) regr.fit(statelevels[:, 0][:, np.newaxis], statelevels[:, 1]) return regr.coef_[0], regr.intercept_
def robust_cor(x, y): if isinstance(x[0], list): x = list(map(list, zip(*x))) else: x = np.array(x).reshape(-1, 1) X = np.array(x) Y = np.array(y) theil_regr = TheilSenRegressor(random_state=42) theil_regr.fit(X, Y) y_pred = theil_regr.predict(X) res = y_pred - y tot_dev = y - np.mean(y) SSres = np.dot(res, res) SStot = np.dot(tot_dev, tot_dev) adjR2 = 1 - (SSres / SStot) * (X.shape[0] - 1) / (X.shape[0] - X.shape[1] - 1) sgn = np.sign(theil_regr.coef_)[0] if adjR2 > 0: corr_val = sgn * np.sqrt(adjR2) else: corr_val = 0 return [ corr_val, theil_regr.coef_, theil_regr.intercept_, theil_regr.breakdown_ ]
def test_checksubparams_n_subsamples_if_less_samples_than_features(): random_state = np.random.RandomState(0) n_samples, n_features = 10, 20 X = random_state.normal(size=(n_samples, n_features)) y = random_state.normal(size=n_samples) theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0) with pytest.raises(ValueError): theil_sen.fit(X, y)
class Regressor(BaseEstimator): def __init__(self): self.regressorName="linear" if self.regressorName=="rf": self.clf= RandomForestRegressor(n_estimators=30, max_depth=63,max_features=50, n_jobs=-1) elif self.regressorName=="gb": self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls' ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0 ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True) #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1) #self.clf=gb elif self.regressorName=="ridge": self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False) elif self.regressorName=="linear": self.clf = LinearRegression() elif self.regressorName=="lasso": self.clf = LassoCV(cv=10) elif self.regressorName=="svr": self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01) elif self.regressorName=="knn": self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1) elif self.regressorName=="gauss": self.clf = TheilSenRegressor() def fit(self, X, y): X=csc_matrix(X) print "Training Algorithm" self.clf.fit(X, y) #print self.clf.best_estimator_ def predict(self, X): X=csr_matrix(X) print "Testing Algorithm" return self.clf.predict(X) def getRegressor(self): return self.clf def getRegressorName(self): return self.regressorName def getParamGrid(self): if self.regressorName=="rf": defaultGrid=[None] maxDepthGrid=np.arange(10,70,7) maxFeaturesGrid=["sqrt","log2",None] maxTreesGrid=np.arange(10,100,10) param_grid = {'max_features': defaultGrid} elif self.regressorName == "gb": #maxDepthGrid=np.arange(3,20,5) learningRateGrid=np.arange(50,100,10) #param_grid = {'max_depth': maxDepthGrid} #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']} param_grid={'alpha':[0.9]} return param_grid
class Regressor(BaseEstimator): def __init__(self): self.regressorName="gb" if self.regressorName=="rf": self.clf= RandomForestRegressor(n_estimators=400, max_depth=63,max_features=50, n_jobs=-1) elif self.regressorName=="gb": self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls' ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0 ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True) #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1) #self.clf=gb elif self.regressorName=="ridge": self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False) elif self.regressorName=="linear": self.clf = LinearRegression(alpha=0.01,max_iter=5000) elif self.regressorName=="lasso": self.clf = LassoCV(cv=10) elif self.regressorName=="svr": self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01) elif self.regressorName=="knn": self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1) elif self.regressorName=="gauss": self.clf = TheilSenRegressor() def fit(self, X, y): #X=csc_matrix(X) self.clf.fit(X, y) #print self.clf.best_estimator_ def predict(self, X): #X=csr_matrix(X) return self.clf.predict(X) def getRegressor(self): return self.clf def getRegressorName(self): return self.regressorName def getParamGrid(self): if self.regressorName=="rf": defaultGrid=[None] maxDepthGrid=np.arange(10,70,7) maxFeaturesGrid=["sqrt","log2",None] maxTreesGrid=np.arange(10,100,10) param_grid = {'max_features': defaultGrid} elif self.regressorName == "gb": #maxDepthGrid=np.arange(3,20,5) learningRateGrid=np.arange(50,100,10) #param_grid = {'max_depth': maxDepthGrid} #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']} param_grid={'alpha':[0.9]} return param_grid
def _fit_robust_line(shifts): """ Use a robust linear regression algorithm to fit a line to the data.""" from sklearn.linear_model import TheilSenRegressor X = np.arange(len(shifts)).reshape(-1, 1) y = shifts model = TheilSenRegressor() # robust regression model.fit(X, y) line = model.predict(X) return line
def trainAlgo(self): self.model = TheilSenRegressor( fit_intercept=self.param['fit_intercept'], copy_X=self.param['copy_X'], max_subpopulation=self.param['max_subpopulation'], n_subsamples=self.param['n_subsamples'], max_iter=self.param['max_iter'], tol=self.param['tol'], random_state=self.param['random_state'], verbose=self.param['verbose'], ) self.model.fit(self.inputData['X'], self.outputData['Y'])
def test_theil_sen_1d_no_intercept(): X, y, w, c = gen_toy_problem_1d(intercept=False) # Check that Least Squares fails lstq = LinearRegression(fit_intercept=False).fit(X, y) assert np.abs(lstq.coef_ - w - c) > 0.5 # Check that Theil-Sen works theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y) assert_array_almost_equal(theil_sen.coef_, w + c, 1) assert_almost_equal(theil_sen.intercept_, 0.0) # non-regression test for #18104 theil_sen.score(X, y)
def createTheilSenRegressor(params): info("Creating TheilSen Regressor", ind=4) ## Params params = mergeParams(TheilSenRegressor(), params) tuneParams = getTheilSenRegressorParams() info("Without Parameters", ind=4) ## estimator reg = TheilSenRegressor() return {"estimator": reg, "params": tuneParams}
def theilsen_regress_predict(var): """ Input:- var: 1-D array var regressortype = LinearRegression, TheilSenRegressor Output: regression coefficient """ regressor = TheilSenRegressor() y = np.asarray(var).reshape(-1, 1) X = np.arange(len(y)).reshape(-1, 1) regressor.fit(X, y) return regressor.predict(X)
def _fit_theil_sen_one_track(x_coords_metres, y_coords_metres, valid_times_unix_sec): """Fits Theil-Sen model for one storm track. P = number of points in track :param x_coords_metres: length-P numpy array of x-coordinates. :param y_coords_metres: length-P numpy array of y-coordinates. :param valid_times_unix_sec: length-P numpy array of times. :return: theil_sen_dict: Dictionary with the following keys. theil_sen_dict['x_intercept_metres']: x-intercept. theil_sen_dict['x_velocity_m_s01']: x-velocity (metres per second). theil_sen_dict['y_intercept_metres']: y-intercept. theil_sen_dict['y_velocity_m_s01']: y-velocity (metres per second). """ num_points = len(x_coords_metres) valid_times_unix_sec = numpy.reshape(valid_times_unix_sec, (num_points, 1)) model_object_for_x = TheilSenRegressor(fit_intercept=True) model_object_for_x.fit(valid_times_unix_sec, x_coords_metres) model_object_for_y = TheilSenRegressor(fit_intercept=True) model_object_for_y.fit(valid_times_unix_sec, y_coords_metres) return { X_INTERCEPT_KEY: model_object_for_x.intercept_, X_VELOCITY_KEY: model_object_for_x.coef_, Y_INTERCEPT_KEY: model_object_for_y.intercept_, Y_VELOCITY_KEY: model_object_for_y.coef_ }
def test_less_samples_than_features(): random_state = np.random.RandomState(0) n_samples, n_features = 10, 20 X = random_state.normal(size=(n_samples, n_features)) y = random_state.normal(size=n_samples) # Check that Theil-Sen falls back to Least Squares if fit_intercept=False theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y) lstq = LinearRegression(fit_intercept=False).fit(X, y) assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12) # Check fit_intercept=True case. This will not be equal to the Least # Squares solution since the intercept is calculated differently. theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y) y_pred = theil_sen.predict(X) assert_array_almost_equal(y_pred, y, 12)
class _TheilSenRegressorImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def __init__(self, fit_intercept=True, copy_X=True, max_subpopulation=1e4, n_subsamples=None, max_iter=300, tol=1.e-3, random_state=None, n_jobs=1, verbose=False): max_iter = int(max_iter) _TheilSenRegressor.__init__(self, fit_intercept, copy_X, max_subpopulation, n_subsamples, max_iter, tol, random_state, n_jobs, verbose) BaseWrapperReg.__init__(self)
def theilsen_regress_coeff(var, a): """ Input:- var: 1-D array var a: 1-D array index regressortype = LinearRegression, TheilSenRegressor Output: regression coefficient """ regressor = TheilSenRegressor() y = np.asarray(var).reshape(-1, 1) X = a.reshape(-1, 1) regressor.fit(X, y) return np.array([regressor.coef_])
def _regress_a(X, y, robust, n_jobs): """ Calculates the slope and intercept """ if robust: model = TheilSenRegressor(n_jobs=n_jobs) else: model = LinearRegression(n_jobs=n_jobs) model.fit(X, y) slope_m = model.coef_[0] intercept_b = model.intercept_ return slope_m, intercept_b
def get_best_degree(data): degrees = range(1, 6) errors = [] degrees = list(degrees) for deg in degrees: reg = Pipeline([ ("quad", PolynomialFeatures(degree=deg)), ( "linear", TheilSenRegressor(max_subpopulation=50, max_iter=300), ), ]) numDims = np.size(data, 1) X = data[:, 0:numDims - 1] # noqa Y = data[:, numDims - 1] reg.fit(X, Y) out = reg.predict(X) Sr = np.sum(np.square(Y - out)) errors.append(Sr) min_degree = degrees[np.argmin(errors)] return min_degree
def test_subsamples(): X, y, w, c = gen_toy_problem_4d() theil_sen = TheilSenRegressor(n_subsamples=X.shape[0], random_state=0).fit(X, y) lstq = LinearRegression().fit(X, y) # Check for exact the same results as Least Squares assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
def log_log_robust_regression(cfs, y, kind=0): assert y.shape[0] == 40 y = y.reshape(40, -1) x = np.tile(cfs[:, np.newaxis], (1, y.shape[1])) y = np.log(y).ravel() x = np.log(x).ravel()[:, np.newaxis] if kind == 0: model = RANSACRegressor() elif kind == 1: model = TheilSenRegressor(n_jobs=-1) elif kind == 2: model = HuberRegressor() else: raise ValueError model.fit(x, y) yp = model.predict(x) u = np.square(y - yp) v = np.square(y - y.mean()) R2 = 1. - u / v if kind == 0: return model.estimator_.coef_, model.estimator_.intercept_, np.median( R2) elif kind in [1, 2]: return model.coef_, model.intercept_, np.median(R2) else: raise ValueError
def show(): X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 4, 5, 0] y = [0, 5, 9, 12, 13, 12, 9, 5, 0, 1, 0, 7] X = list(map(lambda x: [x], X)) import pylab pylab.scatter(X, y) from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline import numpy as np from sklearn.linear_model import LinearRegression, TheilSenRegressor, HuberRegressor, RANSACRegressor for regressor in [ [LinearRegression(), "linreg"], [TheilSenRegressor(), "theil-sen"], [HuberRegressor(), "huber"], [RANSACRegressor(), "ransac"], ]: model = make_pipeline(PolynomialFeatures(2), regressor[0]) model.fit(X, y) print("") print(regressor[1]) print(model.score(X, y)) test_x = np.linspace(-1, 10, 100) test_y = [] for x in test_x: test_y.append(model.predict([[x]])[0]) pylab.plot(test_x, test_y, label=regressor[1]) pylab.legend(loc="best") pylab.show()
def underline_regression(x, y, method="ramp"): start_params = guess(x, y) if method == "ramp": reg = minimize(asymmetric_ramp_loss, x0=start_params, args=(x, y), bounds=((None, None), (0, None)), method="Powell") elif method == 'quadratic' or method == "parabolic": reg = ParabolicRegressor.regress(x, y) return reg elif method == "squashed": reg = minimize(squashed_loss, x0=start_params, jac=squashed_grad, args=(x, y), bounds=((None, None), (0, 1)), method="L-BFGS-B") elif method == "median": y = y.reshape(-1, 1) X = np.vstack((np.ones(y.shape).transpose(), x.reshape(-1, 1).transpose())) reg = TheilSenRegressor(random_state=0).fit(X.transpose(), np.ravel(y)) offset = np.min(subtract_bg(y, x, [reg.coef_[0], reg.coef_[1]])) return np.array([reg.coef_[0] + offset, reg.coef_[1]]) elif method == "huber": reg = HubelRegressor.regress(x, y) return reg return (reg.x[0], reg.x[1])
def _cfunc_theilsen(x, y): """ Get Theil-Sen regression score for data set. Args: x: (list<float>) independent property (x-axis) y: (list<float>) dependent property (y-axis) Returns: (float) Theil-Sen score """ from sklearn.linear_model import TheilSenRegressor r = TheilSenRegressor(random_state=21) x_coeff = np.array(x)[:, np.newaxis] r.fit(x_coeff, y) return r.score(x_coeff, y)
def fit(self, smiles_list, logS_list): X = [] y = [] for i, smiles in enumerate(smiles_list): mol = Chem.MolFromSmiles(smiles) (mw, logp, rotors, ap) = self._calc_esol_descriptors(mol) X.append([mw, logp, rotors, ap]) y.append(logS_list[i]) if self.model == 'linear': model = LinearRegression() elif self.model == 'pls': model = PLSRegression(n_components=2) elif self.model == 'huber': model = HuberRegressor(epsilon=1.5, alpha=2.0) elif self.model == 'ts': logging.debug(f'Model: {self.model}') model = TheilSenRegressor() else: self.model = 'linear' model = LinearRegression() logging.debug(f'Model: {self.model}') model.fit(X, y) self._intercept = model.intercept_ self._coef["MW"] = model.coef_[0] self._coef["LogP"] = model.coef_[1] self._coef["RB"] = model.coef_[2] self._coef["AP"] = model.coef_[3]
def regression( data, theilsen_max_iter=100, order="auto", threshold_multiplier=2, ): if order == "auto": order = get_best_degree(data) elif not isinstance(order, int): order = 1 reg = Pipeline([ ("quad", PolynomialFeatures(degree=order)), ( "linear", TheilSenRegressor(max_subpopulation=50, max_iter=theilsen_max_iter), ), ]) numDims = np.size(data, 1) X = data[:, 0:numDims - 1] # noqa Y = data[:, numDims - 1] inlier_mask = np.ones(np.size(data, 0), dtype=bool) mask_length = 0 threshold = 0 for _ in range(10): if mask_length == sum(inlier_mask): break else: mask_length = sum(inlier_mask) inlier_mask = inlier_mask.astype(bool) i_X = X[inlier_mask] i_Y = Y[inlier_mask] if i_X.shape[0] == 0: inlier_mask = inlier_mask.astype(int) break reg.fit(i_X, i_Y) ts = reg.predict(X) residuals = abs(ts - Y) inlier_residuals = abs(reg.predict(i_X) - i_Y) threshold = np.median(inlier_residuals) within = residuals < (threshold_multiplier * threshold) inlier_mask = within.astype(int) return reg, inlier_mask, threshold_multiplier * threshold, order
def fit(self, X, y, random_state=None): """ Train ENOLS on the given training set. Parameters ---------- X: an input array of shape (n_sample, n_features) y: an array of shape (n_sample,) containing the classes for the input examples Return ------ self: the fitted model """ # use random instead of np.random to sample random numbers below random = check_random_state(random_state) estimators = [('lr', LinearRegression())] if isinstance(self.sample_size, int): self.sample_size = 'reservoir_sampling' # add all the trained OLS models to this list self.estimators_lr, self.estimators_TSR, self.estimators_enols = [], [], [] for i in range(self.n_estimators): samples = sample_without_replacement(n_population=random.choice([50, 100]), n_samples=random.choice([10, 20]), random_state=random_state, method=self.sample_size) X_train, y_train = [], [] for i in samples: X_train.append(X[i]), y_train.append(y[i]) reg = LinearRegression() reg.fit(np.array(X_train), np.array(y_train)) tsr = TheilSenRegressor() tsr.fit(np.array(X_train), np.array(y_train)) enol = StackingRegressor(estimators=estimators, final_estimator=LinearRegression()) enol.fit(np.array(X_train), np.array(y_train)) self.estimators_lr.append(reg), self.estimators_TSR.append(tsr), self.estimators_enols.append(enol) return self
def test_theil_sen_1d(): X, y, w, c = gen_toy_problem_1d() # Check that Least Squares fails lstq = LinearRegression().fit(X, y) assert np.abs(lstq.coef_ - w) > 0.9 # Check that Theil-Sen works theil_sen = TheilSenRegressor(random_state=0).fit(X, y) assert_array_almost_equal(theil_sen.coef_, w, 1) assert_array_almost_equal(theil_sen.intercept_, c, 1)
def get_models(): models = list() models.append(LinearRegression(fit_intercept=False)) models.append(HuberRegressor(fit_intercept=False)) #models.append(RANSACRegressor())#fit_intercept=False)) # Doesnt have option to not fit the intercept models.append( TheilSenRegressor(fit_intercept=False) ) # Strunggling a bit with this one as the output varies a lot given n_samples (if n_samples=1 then it returns the median of the ratio, if it equals the number of data points then it returns essentially the output of least square fitting) return models
def test_theil_sen_2d(): X, y, w, c = gen_toy_problem_2d() # Check that Least Squares fails lstq = LinearRegression().fit(X, y) assert norm(lstq.coef_ - w) > 1.0 # Check that Theil-Sen works theil_sen = TheilSenRegressor(max_subpopulation=1e3, random_state=0).fit(X, y) assert_array_almost_equal(theil_sen.coef_, w, 1) assert_array_almost_equal(theil_sen.intercept_, c, 1)
def __init__(self): self.regressorName="gb" if self.regressorName=="rf": self.clf= RandomForestRegressor(n_estimators=400, max_depth=63,max_features=50, n_jobs=-1) elif self.regressorName=="gb": self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls' ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0 ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True) #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1) #self.clf=gb elif self.regressorName=="ridge": self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False) elif self.regressorName=="linear": self.clf = LinearRegression(alpha=0.01,max_iter=5000) elif self.regressorName=="lasso": self.clf = LassoCV(cv=10) elif self.regressorName=="svr": self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01) elif self.regressorName=="knn": self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1) elif self.regressorName=="gauss": self.clf = TheilSenRegressor()
def fit_TheilSen(features_train, labels_train, features_pred): model = TheilSenRegressor() model.fit(features_train, labels_train) labels_pred = model.predict(features_pred) print "TheilSen - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train) return labels_pred