def _fit_theil_sen_one_track(x_coords_metres, y_coords_metres, valid_times_unix_sec): """Fits Theil-Sen model for one storm track. P = number of points in track :param x_coords_metres: length-P numpy array of x-coordinates. :param y_coords_metres: length-P numpy array of y-coordinates. :param valid_times_unix_sec: length-P numpy array of times. :return: theil_sen_dict: Dictionary with the following keys. theil_sen_dict['x_intercept_metres']: x-intercept. theil_sen_dict['x_velocity_m_s01']: x-velocity (metres per second). theil_sen_dict['y_intercept_metres']: y-intercept. theil_sen_dict['y_velocity_m_s01']: y-velocity (metres per second). """ num_points = len(x_coords_metres) valid_times_unix_sec = numpy.reshape(valid_times_unix_sec, (num_points, 1)) model_object_for_x = TheilSenRegressor(fit_intercept=True) model_object_for_x.fit(valid_times_unix_sec, x_coords_metres) model_object_for_y = TheilSenRegressor(fit_intercept=True) model_object_for_y.fit(valid_times_unix_sec, y_coords_metres) return { X_INTERCEPT_KEY: model_object_for_x.intercept_, X_VELOCITY_KEY: model_object_for_x.coef_, Y_INTERCEPT_KEY: model_object_for_y.intercept_, Y_VELOCITY_KEY: model_object_for_y.coef_ }
def test_verbosity(): X, y, w, c = gen_toy_problem_1d() # Check that Theil-Sen can be verbose with no_stdout_stderr(): TheilSenRegressor(verbose=True, random_state=0).fit(X, y) TheilSenRegressor(verbose=True, max_subpopulation=10, random_state=0).fit(X, y)
def createTheilSenRegressor(params): info("Creating TheilSen Regressor", ind=4) ## Params params = mergeParams(TheilSenRegressor(), params) tuneParams = getTheilSenRegressorParams() info("Without Parameters", ind=4) ## estimator reg = TheilSenRegressor() return {"estimator": reg, "params": tuneParams}
def test_less_samples_than_features(): random_state = np.random.RandomState(0) n_samples, n_features = 10, 20 X = random_state.normal(size=(n_samples, n_features)) y = random_state.normal(size=n_samples) # Check that Theil-Sen falls back to Least Squares if fit_intercept=False theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y) lstq = LinearRegression(fit_intercept=False).fit(X, y) assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12) # Check fit_intercept=True case. This will not be equal to the Least # Squares solution since the intercept is calculated differently. theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y) y_pred = theil_sen.predict(X) assert_array_almost_equal(y_pred, y, 12)
def compute_quantal_size(scan): """ Estimate the unit change in calcium response corresponding to a unit change in pixel intensity (dubbed quantal size, lower is better). Assumes images are stationary from one timestep to the next. Uses it to calculate a measure of noise per bright intensity (which increases linearly given that imaging noise is poisson), fits a line to it and uses the slope as the estimate. :param np.array scan: 3-dimensional scan (image_height, image_width, num_frames). :returns: int minimum pixel value in the scan (that appears a min number of times) :returns: int maximum pixel value in the scan (that appears a min number of times) :returns: np.array pixel intensities used for the estimation. :returns: np.array noise variances used for the estimation. :returns: float the estimated quantal size :returns: float the estimated zero value """ # Set some params num_frames = scan.shape[2] min_count = num_frames * 0.1 # pixel values with fewer appearances will be ignored max_acceptable_intensity = 3000 # pixel values higher than this will be ignored # Make sure field is at least 32 bytes (int16 overflows if summed to itself) scan = scan.astype(np.float32, copy=False) # Create pixel values at each position in field eps = 1e-4 # needed for np.round to not be biased towards even numbers (0.5 -> 1, 1.5 -> 2, 2.5 -> 3, etc.) pixels = np.round((scan[:, :, :-1] + scan[:, :, 1:]) / 2 + eps) pixels = pixels.astype(np.int16 if np.max(abs(pixels)) < 2 ** 15 else np.int32) # Compute a good range of pixel values (common, not too bright values) unique_pixels, counts = np.unique(pixels, return_counts=True) min_intensity = min(unique_pixels[counts > min_count]) max_intensity = max(unique_pixels[counts > min_count]) max_acceptable_intensity = min(max_intensity, max_acceptable_intensity) pixels_mask = np.logical_and(pixels >= min_intensity, pixels <= max_acceptable_intensity) # Select pixels in good range pixels = pixels[pixels_mask] unique_pixels, counts = np.unique(pixels, return_counts=True) # Compute noise variance variances = ((scan[:, :, :-1] - scan[:, :, 1:]) ** 2 / 2)[pixels_mask] pixels -= min_intensity variance_sum = np.zeros(len(unique_pixels)) # sum of variances per pixel value for i in range(0, len(pixels), int(1e8)): # chunk it for memory efficiency variance_sum += np.bincount(pixels[i: i + int(1e8)], weights=variances[i: i + int(1e8)], minlength=len(unique_pixels))[unique_pixels - min_intensity] unique_variances = variance_sum / counts # average variance per intensity # Compute quantal size (by fitting a linear regressor to predict the variance from intensity) X = unique_pixels.reshape(-1, 1) y = unique_variances model = TheilSenRegressor() # robust regression model.fit(X, y) quantal_size = model.coef_[0] zero_level = - model.intercept_ / model.coef_[0] return (min_intensity, max_intensity, unique_pixels, unique_variances, quantal_size, zero_level)
def show(): X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 4, 5, 0] y = [0, 5, 9, 12, 13, 12, 9, 5, 0, 1, 0, 7] X = list(map(lambda x: [x], X)) import pylab pylab.scatter(X, y) from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline import numpy as np from sklearn.linear_model import LinearRegression, TheilSenRegressor, HuberRegressor, RANSACRegressor for regressor in [ [LinearRegression(), "linreg"], [TheilSenRegressor(), "theil-sen"], [HuberRegressor(), "huber"], [RANSACRegressor(), "ransac"], ]: model = make_pipeline(PolynomialFeatures(2), regressor[0]) model.fit(X, y) print("") print(regressor[1]) print(model.score(X, y)) test_x = np.linspace(-1, 10, 100) test_y = [] for x in test_x: test_y.append(model.predict([[x]])[0]) pylab.plot(test_x, test_y, label=regressor[1]) pylab.legend(loc="best") pylab.show()
def robust_cor(x, y): if isinstance(x[0], list): x = list(map(list, zip(*x))) else: x = np.array(x).reshape(-1, 1) X = np.array(x) Y = np.array(y) theil_regr = TheilSenRegressor(random_state=42) theil_regr.fit(X, Y) y_pred = theil_regr.predict(X) res = y_pred - y tot_dev = y - np.mean(y) SSres = np.dot(res, res) SStot = np.dot(tot_dev, tot_dev) adjR2 = 1 - (SSres / SStot) * (X.shape[0] - 1) / (X.shape[0] - X.shape[1] - 1) sgn = np.sign(theil_regr.coef_)[0] if adjR2 > 0: corr_val = sgn * np.sqrt(adjR2) else: corr_val = 0 return [ corr_val, theil_regr.coef_, theil_regr.intercept_, theil_regr.breakdown_ ]
def test_checksubparams_n_subsamples_if_less_samples_than_features(): random_state = np.random.RandomState(0) n_samples, n_features = 10, 20 X = random_state.normal(size=(n_samples, n_features)) y = random_state.normal(size=n_samples) theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0) assert_raises(ValueError, theil_sen.fit, X, y)
def underline_regression(x, y, method="ramp"): start_params = guess(x, y) if method == "ramp": reg = minimize(asymmetric_ramp_loss, x0=start_params, args=(x, y), bounds=((None, None), (0, None)), method="Powell") elif method == 'quadratic' or method == "parabolic": reg = ParabolicRegressor.regress(x, y) return reg elif method == "squashed": reg = minimize(squashed_loss, x0=start_params, jac=squashed_grad, args=(x, y), bounds=((None, None), (0, 1)), method="L-BFGS-B") elif method == "median": y = y.reshape(-1, 1) X = np.vstack((np.ones(y.shape).transpose(), x.reshape(-1, 1).transpose())) reg = TheilSenRegressor(random_state=0).fit(X.transpose(), np.ravel(y)) offset = np.min(subtract_bg(y, x, [reg.coef_[0], reg.coef_[1]])) return np.array([reg.coef_[0] + offset, reg.coef_[1]]) elif method == "huber": reg = HubelRegressor.regress(x, y) return reg return (reg.x[0], reg.x[1])
def log_log_robust_regression(cfs, y, kind=0): assert y.shape[0] == 40 y = y.reshape(40, -1) x = np.tile(cfs[:, np.newaxis], (1, y.shape[1])) y = np.log(y).ravel() x = np.log(x).ravel()[:, np.newaxis] if kind == 0: model = RANSACRegressor() elif kind == 1: model = TheilSenRegressor(n_jobs=-1) elif kind == 2: model = HuberRegressor() else: raise ValueError model.fit(x, y) yp = model.predict(x) u = np.square(y - yp) v = np.square(y - y.mean()) R2 = 1. - u / v if kind == 0: return model.estimator_.coef_, model.estimator_.intercept_, np.median( R2) elif kind in [1, 2]: return model.coef_, model.intercept_, np.median(R2) else: raise ValueError
def getscore_getnext(df, days_ahead, coin): forecast_val = days_ahead forecast_col = 'close' df.fillna(value=-99999, inplace=True) df['label'] = df[forecast_col].shift(-forecast_val) #X = X[:-forecast_val] X = np.array(df.drop(['label', 'date'], 1)) X = preprocessing.scale(X) futureX = X[-1:] X = X[:-forecast_val] df.dropna(inplace=True) y = np.array(df['label']) X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.15) ''' inPickle = open('%s.pickle' %(coin), 'rb') clf = pickle.load(inPickle) ''' clf = TheilSenRegressor() clf.fit(X_train, y_train) confidence = clf.score(X_test, y_test) #print "accuracy with 1.0 being perfect:", (confidence) futureval = clf.predict(futureX) return (confidence, futureval)
def calculate_scaling_params(events, kmer_mean_levels): events = pd.DataFrame(events) events['pos'] = events['move'].cumsum() jump_positions = events[events['move'] > 1]['pos'] jump_positions = set(jump_positions - 1) | set(jump_positions) nonjump_positions = set(events['pos']) - jump_positions if len(nonjump_positions) < MINIMUM_NONJUMP_POSITIONS: return statelevels = [] statelevels_jump = [] for pos, posevents in events.groupby('pos'): state = posevents['model_state'].iloc[0] if '_' in state: continue medlevel = posevents['mean'].median() if pos in nonjump_positions: statelevels.append([medlevel, kmer_mean_levels[state]]) else: statelevels_jump.append([medlevel, kmer_mean_levels[state]]) statelevels_jump = np.array(statelevels_jump) statelevels = np.array(statelevels) regr = TheilSenRegressor(random_state=922) regr.fit(statelevels[:, 0][:, np.newaxis], statelevels[:, 1]) return regr.coef_[0], regr.intercept_
def test_subsamples(): X, y, w, c = gen_toy_problem_4d() theil_sen = TheilSenRegressor(n_subsamples=X.shape[0], random_state=0).fit(X, y) lstq = LinearRegression().fit(X, y) # Check for exact the same results as Least Squares assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
def fit(self, smiles_list, logS_list): X = [] y = [] for i, smiles in enumerate(smiles_list): mol = Chem.MolFromSmiles(smiles) (mw, logp, rotors, ap) = self._calc_esol_descriptors(mol) X.append([mw, logp, rotors, ap]) y.append(logS_list[i]) if self.model == 'linear': model = LinearRegression() elif self.model == 'pls': model = PLSRegression(n_components=2) elif self.model == 'huber': model = HuberRegressor(epsilon=1.5, alpha=2.0) elif self.model == 'ts': logging.debug(f'Model: {self.model}') model = TheilSenRegressor() else: self.model = 'linear' model = LinearRegression() logging.debug(f'Model: {self.model}') model.fit(X, y) self._intercept = model.intercept_ self._coef["MW"] = model.coef_[0] self._coef["LogP"] = model.coef_[1] self._coef["RB"] = model.coef_[2] self._coef["AP"] = model.coef_[3]
def get_best_degree(data): degrees = range(1, 6) errors = [] degrees = list(degrees) for deg in degrees: reg = Pipeline([ ("quad", PolynomialFeatures(degree=deg)), ( "linear", TheilSenRegressor(max_subpopulation=50, max_iter=300), ), ]) numDims = np.size(data, 1) X = data[:, 0:numDims - 1] # noqa Y = data[:, numDims - 1] reg.fit(X, Y) out = reg.predict(X) Sr = np.sum(np.square(Y - out)) errors.append(Sr) min_degree = degrees[np.argmin(errors)] return min_degree
def estimate_txty(cluster, k=20): xs = [] ys = [] zs = [] tx = [] ty = [] for i, node in cluster.nodes(data=True): xs.append(node['features']['SX']) ys.append(node['features']['SY']) zs.append(node['features']['SZ']) tx.append(node['features']['TX']) ty.append(node['features']['TY']) xs = np.array(xs) ys = np.array(ys) zs = np.array(zs) tx = np.array(tx) ty = np.array(ty) argosorted_z = np.argsort(zs) lr = TheilSenRegressor() lr.fit(zs[argosorted_z][:k].reshape((-1, 1)), xs[argosorted_z][:k]) TX = lr.coef_[0] lr.fit(zs[argosorted_z][:k].reshape((-1, 1)), ys[argosorted_z][:k]) TY = lr.coef_[0] return TX, TY
def regression( data, theilsen_max_iter=100, order="auto", threshold_multiplier=2, ): if order == "auto": order = get_best_degree(data) elif not isinstance(order, int): order = 1 reg = Pipeline([ ("quad", PolynomialFeatures(degree=order)), ( "linear", TheilSenRegressor(max_subpopulation=50, max_iter=theilsen_max_iter), ), ]) numDims = np.size(data, 1) X = data[:, 0:numDims - 1] # noqa Y = data[:, numDims - 1] inlier_mask = np.ones(np.size(data, 0), dtype=bool) mask_length = 0 threshold = 0 for _ in range(10): if mask_length == sum(inlier_mask): break else: mask_length = sum(inlier_mask) inlier_mask = inlier_mask.astype(bool) i_X = X[inlier_mask] i_Y = Y[inlier_mask] if i_X.shape[0] == 0: inlier_mask = inlier_mask.astype(int) break reg.fit(i_X, i_Y) ts = reg.predict(X) residuals = abs(ts - Y) inlier_residuals = abs(reg.predict(i_X) - i_Y) threshold = np.median(inlier_residuals) within = residuals < (threshold_multiplier * threshold) inlier_mask = within.astype(int) return reg, inlier_mask, threshold_multiplier * threshold, order
def test_theil_sen_2d(): X, y, w, c = gen_toy_problem_2d() # Check that Least Squares fails lstq = LinearRegression().fit(X, y) assert norm(lstq.coef_ - w) > 1.0 # Check that Theil-Sen works theil_sen = TheilSenRegressor(max_subpopulation=1e3, random_state=0).fit(X, y) assert_array_almost_equal(theil_sen.coef_, w, 1) assert_array_almost_equal(theil_sen.intercept_, c, 1)
def __init__(self, X, y, tsr_params, nfolds=3, n_jobs=1, scoring=None, verbose=True): self._code="tsr" if verbose: print ("Constructed TheilSenRegressor: " +self._code) AbstractRegressorPredictiveModel.__init__(self, "regressor", X, y, tsr_params, nfolds, n_jobs, scoring, verbose) self._model = self.constructRegressor(TheilSenRegressor())
def get_models(): models = list() models.append(LinearRegression(fit_intercept=False)) models.append(HuberRegressor(fit_intercept=False)) #models.append(RANSACRegressor())#fit_intercept=False)) # Doesnt have option to not fit the intercept models.append( TheilSenRegressor(fit_intercept=False) ) # Strunggling a bit with this one as the output varies a lot given n_samples (if n_samples=1 then it returns the median of the ratio, if it equals the number of data points then it returns essentially the output of least square fitting) return models
def test_theil_sen_1d(): X, y, w, c = gen_toy_problem_1d() # Check that Least Squares fails lstq = LinearRegression().fit(X, y) assert np.abs(lstq.coef_ - w) > 0.9 # Check that Theil-Sen works theil_sen = TheilSenRegressor(random_state=0).fit(X, y) assert_array_almost_equal(theil_sen.coef_, w, 1) assert_array_almost_equal(theil_sen.intercept_, c, 1)
def train(X, Y, selected="Linear", modelName='best_model.sav'): X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.10, shuffle=True) # Scaling sc = StandardScaler() sc.fit(X_train) X_train = sc.transform(X_train) X_test = sc.transform(X_test) # create and fit the best regression model seed = 5 models = {} models["Linear"] = LinearRegression() #models["RANSAC"] = RANSACRegressor() models["Huber"] = HuberRegressor(max_iter=1000) models["TheilSen"] = TheilSenRegressor() #models["SGD"] = SGDRegressor(max_iter=500,penalty=None, eta0=0.01, tol=0.00001) models["Ridge"] = Ridge() models["Lasso"] = Lasso() models["ElasticNet"] = ElasticNet() models["KNN"] = KNeighborsRegressor() models["DecisionTree"] = DecisionTreeRegressor() models["SVR"] = SVR() models["AdaBoost"] = AdaBoostRegressor() models["GradientBoost"] = GradientBoostingRegressor() models["RandomForest"] = RandomForestRegressor() models["ExtraTrees"] = ExtraTreesRegressor() best_model = models[selected] best_model.fit(X_train, Y_train) # Save model pickle.dump(best_model, open(modelName, 'wb')) # make predictions using the model (train and test) Y_test_pred = best_model.predict(X_test) Y_train_pred = best_model.predict(X_train) #print("[INFO] MSE : {}".format(round(mean_squared_error(Y_test, Y_test_pred), 3))) # R2 score coefficient of determination (quanto gli input influscono sulla predizione) # 0 male 1 bene validate(Y_train, Y_train_pred, name="Training") R2 = best_model.score(X_train, Y_train) print("[Training] R2 Score: ", round(R2, 3)) validate(Y_test, Y_test_pred, name="Test") R2 = best_model.score(X_test, Y_test) print("[Test] R2 Score: ", round(R2, 3)) fig_train = plot_fig([Y_train, Y_train_pred], ["Train Real", "Train Predicted"]) fig_test = plot_fig([Y_test, Y_test_pred], ["Test Real", "Test Predicted"]) return fig_train, fig_test
def test_theil_sen_1d_no_intercept(): X, y, w, c = gen_toy_problem_1d(intercept=False) # Check that Least Squares fails lstq = LinearRegression(fit_intercept=False).fit(X, y) assert_greater(np.abs(lstq.coef_ - w - c), 0.5) # Check that Theil-Sen works theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y) assert_array_almost_equal(theil_sen.coef_, w + c, 1) assert_almost_equal(theil_sen.intercept_, 0.)
def test_model_theilsen(self): model, X = fit_regression_model(TheilSenRegressor()) model_onnx = convert_sklearn( model, "thiel-sen regressor", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnTheilSen-Dec4")
def _fit_robust_line(shifts): """ Use a robust linear regression algorithm to fit a line to the data.""" from sklearn.linear_model import TheilSenRegressor X = np.arange(len(shifts)).reshape(-1, 1) y = shifts model = TheilSenRegressor() # robust regression model.fit(X, y) line = model.predict(X) return line
def translated_huber_regression(x, y): y_reshape = y.reshape(-1, 1) X = np.vstack( (np.ones(y_reshape.shape).transpose(), x.reshape(-1, 1).transpose())) reg = TheilSenRegressor(random_state=0).fit(X.transpose(), np.ravel(y_reshape)) # subtracted_data = subtract_bg(y, x, [reg.coef_[0], reg.coef_[1]]) subtracted_data = y - reg.coef_[0] - reg.coef_[1] * x offset = np.min(subtracted_data) return np.array([reg.coef_[0] + offset, reg.coef_[1]])
def test_theil_sen_parallel(): X, y, w, c = gen_toy_problem_2d() # Check that Least Squares fails lstq = LinearRegression().fit(X, y) assert_greater(norm(lstq.coef_ - w), 1.0) # Check that Theil-Sen works theil_sen = TheilSenRegressor(n_jobs=-1, random_state=0, max_subpopulation=2e3).fit(X, y) assert_array_almost_equal(theil_sen.coef_, w, 1) assert_array_almost_equal(theil_sen.intercept_, c, 1)
def get_hyperparameters_model(): param_dist = {} clf = TheilSenRegressor() model = { 'theil_sen_regressor': { 'model': clf, 'param_distributions': param_dist } } return model
def main(): np.random.seed(42) X = np.random.uniform(low=-10, high=10, size=400) x_predict = np.linspace(-10, 10, 1000) y = np.sin(2 * np.pi * 0.1 * X) + 0.1 * np.abs(X) + np.abs(np.arctan(X)) X_test = np.random.uniform(low=-30, high=30, size=200) y_test = np.sin(2 * np.pi * 0.1 * X_test) y_errors_large = y.copy() y_errors_large[::10] = 6 # Make sure that X is 2D X = X[:, np.newaxis] X_test = X_test[:, np.newaxis] # predict y knots = np.linspace(-30, 30, 20) bspline_features = BSplineFeatures(knots, degree=3, periodic=False) estimators = [ ('Least-Square', '-', 'C0', LinearRegression(fit_intercept=False)), ('Theil-Sen', '-.', 'C1', TheilSenRegressor(random_state=42)), ('RANSAC', ':', 'C2', RANSACRegressor(random_state=42)), ('HuberRegressor', '--', 'C3', HuberRegressor()) ] fig, ax = plt.subplots(1, 1, figsize=(8, 3)) fig.suptitle('Robust B-Spline Regression with SKLearn') ax.plot(X[:, 0], y_errors_large, 'o', ms=5, c='black', label='data points [10% outliers]') for label, style, color, estimator in estimators: model = make_pipeline(bspline_features, estimator) model.fit(X, y_errors_large) mse = mean_squared_error(model.predict(X_test), y_test) y_predicted = model.predict(x_predict[:, None]) ax.plot(x_predict, y_predicted, style, lw=2, markevery=8, ms=6, color=color, label=label + ' E={:2.2g}'.format(mse)) ax.legend(loc='upper right', framealpha=0.95, fontsize='xx-small') ax.set(ylim=(-2, 8), xlabel='time [s]', ylabel='amplitude') fig.tight_layout() fig.savefig('../results/fitting_experiments/robust_bspline_regression.png', bbox_inches='tight', dpi=300)
def select_regressor(X, y, scoring='neg_mean_squared_error', show=True): regressors = [ AdaBoostRegressor(), # ARDRegression(), BaggingRegressor(), DecisionTreeRegressor(), ElasticNet(), ExtraTreeRegressor(), ExtraTreesRegressor(), # GaussianProcessRegressor(), GradientBoostingRegressor(), HuberRegressor(), KNeighborsRegressor(), Lasso(), LinearRegression(), # LogisticRegression(), MLPRegressor(), PassiveAggressiveRegressor(), PLSRegression(), # RadiusNeighborsRegressor(), RandomForestRegressor(), RANSACRegressor(), Ridge(), SGDRegressor(), TheilSenRegressor(), ] names = [reg.__class__.__name__ for reg in regressors] # cv = StratifiedShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=random_state) scores = {} for i, (name, reg) in enumerate(zip(names, regressors)): print('Processing {}...'.format(name)) ss = cross_val_score(reg, X, y, scoring=scoring, cv=10) scores[name] = ss # for train_index, test_index in cv.split(X, y): # X_train, X_test = X[train_index], X[test_index] # y_train, y_test = y[train_index], y[test_index] # try: # clf.fit(X_train, y_train) # train_predictions = clf.predict(X_test) # rmse = np.sqrt(mean_squared_error(y_test, train_predictions)) # except: # rmse = 0 # s = scores.get(name, []) # s.append(acc) # scores[name] = s scores = [[n, np.sqrt(-s).mean()] for n, s in scores.items()] scores = pd.DataFrame(scores, columns=['Regressor', 'Score']).sort_values(by='Score', ascending=True) if show: print(scores) return scores.iloc[0, 0], regressors[scores.iloc[0].name], scores