def __add_trend_feature(self, arr, abs_values=False): idx = np.array(range(len(arr))) if abs_values: arr = np.abs(arr) lr = LinearRegression() lr.fit(idx.reshape(-1, 1), arr) return lr.coef_[0]
def linearRegression_sales(self): #线性回归 path = u'4.Advertising.csv' data = self.readFile(path) # x=data[['TV', 'Radio', 'Newspaper']] x = data[['TV', 'Radio']] y = data['Sales'] x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1) # print x_train, y_train linreg = LinearRegression() model = linreg.fit(x_train, y_train) print model print linreg.coef_ print linreg.intercept_ y_hat = linreg.predict(np.array(x_test)) mse = np.average((y_hat - y_test)**2) rmse = np.sqrt(mse) print mse, rmse t = np.arange(len(x_test)) plt.plot(t, y_test, 'r-', linewidth=2, label='Test') plt.plot(t, y_hat, 'g-', linewidth=2, label='Predict') plt.grid() plt.legend(loc='upper right') plt.show()
class PredictLoss(BaseLR): def __init__(self, hist=30, posmax=15, lr=0.2): from sklearn.linear_model.base import LinearRegression from collections import deque self.hist = hist self.track = deque(maxlen=self.hist) self.regr = LinearRegression() self.poscases = 0 self.posmax = posmax self.lr = lr def __call__(self, env): if len(self.track) > 5: y = np.array(self.track) x = np.array(range(len(y.shape))).reshape(-1, 1) self.regr.fit(x, y) coef_ = self.regr.coef_[0] preds = self.regr.predict(x) fst = preds[0] lst = preds[-1] e = np.sqrt(((y - preds)**2).mean()) if coef_ > 0: self.poscases += 1 if self.poscases >= self.posmax: raise EarlyStopException else: self.poscases -= 1 if self.poscases < 0: self.poscases = 0 diff = np.abs(fst - lst) coef = np.clip(diff/e, 1e-6, 1) lr = self.lr*coef print(lr, e, diff, coef_, coef, file=open('log.txt', 'a')) env.model.set_param("learning_rate", lr)
def compare_panorama_cubic(greenery_measure="vegetation", **kwargs): """ Compare/plot the segmentation results of panoramic and cubic images to each other. Also use linear regression to determine how they relate to each other. """ green_kwargs = select_green_model(greenery_measure) panorama_tiler = TileManager(cubic_pictures=False, **kwargs, **green_kwargs) cubic_tiler = TileManager(cubic_pictures=True, **kwargs, **green_kwargs) panorama_green = panorama_tiler.green_direct() cubic_green = cubic_tiler.green_direct() _remove_missing(panorama_green, cubic_green) x = np.arange(0, 0.8, 0.01) x_pano = np.array(panorama_green["green"]).reshape(-1, 1) y_cubic = np.array(cubic_green["green"]) reg = LinearRegression().fit(x_pano, y_cubic) print(reg.score(x_pano, y_cubic)) print(reg.coef_[0], reg.intercept_) plt.figure() plt.scatter(panorama_green["green"], cubic_green["green"]) plt.plot(x, reg.predict(x.reshape(-1, 1))) plt.xlabel("panoramas") plt.ylabel("cubic") plt.xlim(0, max(0.001, max(panorama_green["green"])*1.1)) plt.ylim(0, max(0.001, max(cubic_green["green"])*1.1)) plot_greenery(panorama_green, show=False, title="panorama") plot_greenery(cubic_green, show=False, title="cubic") plt.show()
def test_predict_hdf_dataframe(self): # create some data x = np.array(list(range(0, 10))) y = x * 2 df = pd.DataFrame({'x': x, 'y': y}) X = df['x'] Y = df['y'] # put into Omega -- assume a client with pandas, scikit learn os.environ['DJANGO_SETTINGS_MODULE'] = '' om = Omega() om.runtime.pure_python = True om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True om.datasets.put(X, 'datax', as_hdf=True) om.datasets.put(Y, 'datay', as_hdf=True) # have Omega fit the model then predict lr = LinearRegression() lr.fit(reshaped(X), reshaped(Y)) pred = lr.predict(reshaped(X)) om.models.put(lr, 'mymodel2') # -- using data provided locally # note this is the same as # om.datasets.put(X, 'foo') # om.runtimes.model('mymodel2').predict('foo') result = om.runtime.model('mymodel2').predict('datax') pred2 = result.get() self.assertTrue( (pred == pred2).all(), "runtimes prediction is different(1)") self.assertTrue( (pred == pred2).all(), "runtimes prediction is different(2)")
def get_scikit_prediction(x=np.array([1, 2, 3]), y=np.array([1, 2, 3])): from sklearn.linear_model.base import LinearRegression as ScikitLinearRegression regression = ScikitLinearRegression() regression.fit(x, y) return regression.predict(x)
def train(): X = np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]]) y = np.array([10, 20, 30]) X_test = np.array([[10, 20, 30, 40], [40, 50, 60, 70], [70, 80, 90, 100]]) reg = LinearRegression() reg.fit(X, y) print('coef_:', reg.coef_) print('intercept_:', reg.intercept_) print('predict:', reg.predict(X_test))
def test_linear_regression_n_jobs(): """ Test for the n_jobs parameter on the fit method and the constructor """ X = [[1], [2]] Y = [1, 2] clf = LinearRegression() clf_fit = clf.fit(X, Y, 4) assert_equal(clf_fit.n_jobs, clf.n_jobs) assert_equal(clf.n_jobs, 1)
def __init__(self, treatment_cols, nusiance_cols, effect_estimator=LinearRegression(fit_intercept=False), treatment_estimator=LinearRegression(fit_intercept=False), y_estimator=LinearRegression(fit_intercept=False)): self.nusiance_cols = nusiance_cols self.treatment_cols = treatment_cols self.effect_estimator = effect_estimator self.treatment_estimator = treatment_estimator self.y_estimator = y_estimator
def test_fit(self): # create some data x = np.array(list(range(0, 10))) y = x * 2 df = pd.DataFrame({'x': x, 'y': y}) X = df[['x']] Y = df[['y']] # put into Omega os.environ['DJANGO_SETTINGS_MODULE'] = '' om = Omega() om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True om.datasets.put(X, 'datax') om.datasets.put(Y, 'datay') om.datasets.get('datax') om.datasets.get('datay') # create a model locally, store (unfitted) in Omega lr = LinearRegression() om.models.put(lr, 'mymodel2') self.assertIn('mymodel2', om.models.list('*')) # predict locally for comparison lr.fit(X, Y) pred = lr.predict(X) # try predicting without fitting with self.assertRaises(NotFittedError): result = om.runtime.model('mymodel2').predict('datax') result.get() # have Omega fit the model then predict result = om.runtime.model('mymodel2').fit('datax', 'datay') result.get() # check the new model version metadata includes the datax/y references meta = om.models.metadata('mymodel2') self.assertIn('metaX', meta.attributes) self.assertIn('metaY', meta.attributes) # -- using data already in Omega result = om.runtime.model('mymodel2').predict('datax') pred1 = result.get() # -- using data provided locally # note this is the same as # om.datasets.put(X, 'foo') # om.runtimes.model('mymodel2').predict('foo') result = om.runtime.model('mymodel2').fit(X, Y) result = om.runtime.model('mymodel2').predict(X) pred2 = result.get() # -- check the local data provided to fit was stored as intended meta = om.models.metadata('mymodel2') self.assertIn('metaX', meta.attributes) self.assertIn('metaY', meta.attributes) self.assertIn('_fitX', meta.attributes.get('metaX').get('collection')) self.assertIn('_fitY', meta.attributes.get('metaY').get('collection')) self.assertTrue( (pred == pred1).all(), "runtimes prediction is different(1)") self.assertTrue( (pred == pred2).all(), "runtimes prediction is different(2)")
def test_linear_regression_sparse(random_state=0): "Test that linear regression also works with sparse data" random_state = check_random_state(random_state) n = 100 X = sparse.eye(n, n) beta = random_state.rand(n) y = X * beta[:, np.newaxis] ols = LinearRegression() ols.fit(X, y.ravel()) assert_array_almost_equal(beta, ols.coef_ + ols.intercept_) assert_array_almost_equal(ols.residues_, 0)
def __init__(self, base_estimator: RegressorMixin = None, n_trees: int = 50, sigma_a: int = 0.001, sigma_b: float = 0.001, n_samples: int = 200, n_burn: int = 200, p_grow: float = 0.5, p_prune: float = 0.5, alpha: float = 0.95, beta: float = 2.): if base_estimator is not None: self.base_estimator = clone(base_estimator) else: base_estimator = LinearRegression() self.base_estimator = base_estimator super().__init__(n_trees=n_trees, sigma_a=sigma_a, sigma_b=sigma_b, n_samples=n_samples, n_burn=n_burn, p_grow=p_grow, p_prune=p_prune, alpha=alpha, beta=beta)
def test_multiple_response_regressor(): np.random.seed(1) m = 100000 n = 10 X = np.random.normal(size=(m, n)) beta1 = np.random.normal(size=(n, 1)) beta2 = np.random.normal(size=(n, 1)) y1 = np.dot(X, beta1) p2 = 1. / (1. + np.exp(-np.dot(X, beta2))) y2 = np.random.binomial(n=1, p=p2) y = np.concatenate([y1, y2], axis=1) model = MaskedEstimator( LinearRegression(), [True, False]) & MaskedEstimator( ProbaPredictingEstimator(LogisticRegression()), [False, True]) # MultipleResponseEstimator([('linear', np.array([True, False], dtype=bool), LinearRegression()), # ('logistic', np.array([False, True], dtype=bool), ProbaPredictingEstimator(LogisticRegression()))]) model.fit(X, y) assert np.mean(beta1 - model.estimators_[0].estimator_.coef_) < .01 assert np.mean(beta2 - model.estimators_[1].estimator_.estimator_.coef_) < .01 model.get_params() model.predict(X)
def test_backward_elimination_estimation(): np.random.seed(0) m = 100000 n = 6 factor = .9 X = np.random.normal(size=(m, n)) beta = 100 * np.ones(shape=n) for i in range(1, n): beta[i] = factor * beta[i - 1] beta = np.random.permutation(beta)[:, None] # beta = np.random.normal(size=(n,1)) y = np.dot(X, beta) + 0.01 * np.random.normal(size=(m, 1)) target_sequence = np.ravel(np.argsort(beta**2, axis=0)) model1 = BackwardEliminationEstimator( SingleEliminationFeatureImportanceEstimatorCV(LinearRegression())) model1.fit(X, y) # model2 = BRFE(FeatureImportanceEstimatorCV(LinearRegression())) # model2.fit(X, y) np.testing.assert_array_equal(model1.elimination_sequence_, target_sequence)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def test_fit_pipeline(self): # create some data x = np.array(list(range(0, 10))) y = x * 2 df = pd.DataFrame({'x': x, 'y': y}) X = df[['x']] Y = df[['y']] # put into Omega os.environ['DJANGO_SETTINGS_MODULE'] = '' om = Omega() om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True om.datasets.put(X, 'datax') om.datasets.put(Y, 'datay') om.datasets.get('datax') om.datasets.get('datay') # create a pipeline locally, store (unfitted) in Omega p = Pipeline([ ('lr', LinearRegression()), ]) om.models.put(p, 'mymodel2') self.assertIn('mymodel2', om.models.list('*')) # predict locally for comparison p.fit(reshaped(X), reshaped(Y)) pred = p.predict(reshaped(X)) # have Omega fit the model then predict result = om.runtime.model('mymodel2').fit('datax', 'datay') result.get() result = om.runtime.model('mymodel2').predict('datax') pred1 = result.get() self.assertTrue( (pred == pred1).all(), "runtimes prediction is different(1)")
def test_k_best_feature_selector(): np.random.seed(0) m = 100000 n = 6 factor = .9 X = np.random.normal(size=(m, n)) beta = 100 * np.ones(shape=n) for i in range(1, n): beta[i] = factor * beta[i - 1] beta = np.random.permutation(beta)[:, None] # beta = np.random.normal(size=(n,1)) y = np.dot(X, beta) + 0.01 * np.random.normal(size=(m, 1)) target_vars = np.ravel(np.argsort(beta**2, axis=0))[::-1][:3] target_support = np.zeros(shape=n, dtype=bool) target_support[target_vars] = True model1 = BestKFeatureSelector(UnivariateFeatureImportanceEstimatorCV( LinearRegression()), k=3) model1.fit(X, y) np.testing.assert_array_equal(model1.support_, target_support)
def polynomial_linear_regression(self): best_accuracy = 0 best_degree = 0 # for degree in range(2, 10): degree = 2 model = make_pipeline( PolynomialFeatures(degree), LinearRegression()) # polynomial transformation of this degree model.fit(self.X_train, self.Y_train) # fit the model ''' check accuracy using test dataset ''' predicted_y = model.predict(self.X_test) predicted_y = [ 1 if (abs(1 - val) < abs(val)) else 0 for val in predicted_y ] accuracy = accuracy_score(self.Y_test, predicted_y) if accuracy > best_accuracy: best_accuracy = accuracy best_degree = degree self.best_model = model print(best_degree) return model
def __init__(self, base_estimator: RegressorMixin = None, **kwargs): if base_estimator is not None: self.base_estimator = clone(base_estimator) else: base_estimator = LinearRegression() self.base_estimator = base_estimator super().__init__(**kwargs)
def test_pipeline(): np.random.seed(1) m = 10000 n = 10 X = np.random.normal(size=(m, n)) beta = np.random.normal(size=(n, 1)) beta[np.random.binomial(p=2.0 / float(n), n=1, size=n).astype(bool)] = 0 y = np.dot(X, beta) + 0.5 * np.random.normal(size=(m, 1)) beta_reduced = beta[beta != 0] model = BackwardEliminationEstimator( SingleEliminationFeatureImportanceEstimatorCV(LinearRegression())) model >>= LinearRegression() model.fit(X, y) assert np.max(np.abs(model.final_stage_.coef_ - beta_reduced)) < .1
def __init__(self, hist=30, posmax=15, lr=0.2): from sklearn.linear_model.base import LinearRegression from collections import deque self.hist = hist self.track = deque(maxlen=self.hist) self.regr = LinearRegression() self.poscases = 0 self.posmax = posmax self.lr = lr
def test_ridge_vs_lstsq(): """On alpha=0., Ridge and OLS yield the same solution.""" # we need more samples than features n_samples, n_features = 5, 4 y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=0., fit_intercept=False) ols = LinearRegression(fit_intercept=False) ridge.fit(X, y) ols.fit(X, y) assert_almost_equal(ridge.coef_, ols.coef_) ridge.fit(X, y) ols.fit(X, y) assert_almost_equal(ridge.coef_, ols.coef_)
def test_delta_transformer(): fit_model = DoublePipeline( [('xdelta', DeltaTransformer()), ('linreg', LinearRegression(fit_intercept=False))], [('ydelta', DeltaTransformer())]).fit(X, Y) assert (np.isclose(fit_model.predict(X), np.squeeze(Y)).all()) assert (np.isclose(fit_model.x_pipe_.steps[-1][1].coef_, [1.0, 0.0, 0.0]).all())
def test_raises_value_error_if_sample_weights_greater_than_1d(): # Sample weights must be either scalar or 1D n_sampless = [2, 3] n_featuress = [3, 2] for n_samples, n_features in zip(n_sampless, n_featuress): X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) sample_weights_OK = rng.randn(n_samples) ** 2 + 1 sample_weights_OK_1 = 1. sample_weights_OK_2 = 2. reg = LinearRegression() # make sure the "OK" sample weights actually work reg.fit(X, y, sample_weights_OK) reg.fit(X, y, sample_weights_OK_1) reg.fit(X, y, sample_weights_OK_2)
def test_fit_intercept(): # Test assertions on betas shape. X2 = np.array([[0.38349978, 0.61650022], [0.58853682, 0.41146318]]) X3 = np.array([[0.27677969, 0.70693172, 0.01628859], [0.08385139, 0.20692515, 0.70922346]]) y = np.array([1, 1]) lr2_without_intercept = LinearRegression(fit_intercept=False).fit(X2, y) lr2_with_intercept = LinearRegression(fit_intercept=True).fit(X2, y) lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y) lr3_with_intercept = LinearRegression(fit_intercept=True).fit(X3, y) assert_equal(lr2_with_intercept.coef_.shape, lr2_without_intercept.coef_.shape) assert_equal(lr3_with_intercept.coef_.shape, lr3_without_intercept.coef_.shape) assert_equal(lr2_without_intercept.coef_.ndim, lr3_without_intercept.coef_.ndim)
class LinearRegressionImpl(): def __init__(self, fit_intercept=True, normalize=False, copy_X=True, n_jobs=None): self._hyperparams = { 'fit_intercept': fit_intercept, 'normalize': normalize, 'copy_X': copy_X, 'n_jobs': n_jobs} def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def predict(self, X): return self._sklearn_model.predict(X)
def __init__(self, weights=None, cv_train_size=None): estimators = [] estimators.append(KNeighborsRegressor(n_neighbors=3)) estimators.append(DecisionTreeRegressor()) estimators.append(BayesianRidge()) # estimators.append(BayesianRidge()) self.estimators = estimators self.stacker = LinearRegression() self.weights = weights if weights is not None else {} self.cv_train_size = cv_train_size if cv_train_size is not None else 0.7 self._is_fitted = False
def __init__(self, cols_1, cols_2, estimator_1=LinearRegression(fit_intercept=False), estimator_2=RandomForestRegressor(), iters=2): self.cols_1 = cols_1 self.cols_2 = cols_2 self.estimator_1 = estimator_1 self.estimator_2 = estimator_2 self.iters = iters
def test_super_learner(): np.random.seed(0) X, y = load_boston(return_X_y=True) X = pandas.DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])]) model = CrossValidatingEstimator(SuperLearner( [('linear', LinearRegression()), ('earth', Earth(max_degree=2))], LinearRegression(), cv=5, n_jobs=1), cv=5) cv_pred = model.fit_predict(X, y) pred = model.predict(X) cv_r2 = r2_score(y, cv_pred) best_component_cv_r2 = max([ r2_score( y, first(model.estimator_.cross_validating_estimators_.values()). cv_predictions_) for i in range(2) ]) assert cv_r2 >= .9 * best_component_cv_r2 code = sklearn2code(model, ['predict'], numpy_flat) module = exec_module('module', code) test_pred = module.predict(**X) try: assert_array_almost_equal(np.ravel(pred), np.ravel(test_pred)) except: idx = np.abs(np.ravel(pred) - np.ravel(test_pred)) > .000001 print(np.ravel(pred)[idx]) print(np.ravel(test_pred)[idx]) raise print(r2_score(y, pred)) print(r2_score(y, cv_pred)) print( max([ r2_score( y, first(model.estimator_.cross_validating_estimators_.values()). cv_predictions_) for i in range(2) ]))
def test_linear_regression_sample_weights(): # TODO: loop over sparse data as well rng = np.random.RandomState(0) # It would not work with under-determined systems for n_samples, n_features in ((6, 5), ): y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) sample_weight = 1.0 + rng.rand(n_samples) for intercept in (True, False): # LinearRegression with explicit sample_weight reg = LinearRegression(fit_intercept=intercept) reg.fit(X, y, sample_weight=sample_weight) coefs1 = reg.coef_ inter1 = reg.intercept_ assert_equal(reg.coef_.shape, (X.shape[1], )) # sanity checks assert_greater(reg.score(X, y), 0.5) # Closed form of the weighted least square # theta = (X^T W X)^(-1) * X^T W y W = np.diag(sample_weight) if intercept is False: X_aug = X else: dummy_column = np.ones(shape=(n_samples, 1)) X_aug = np.concatenate((dummy_column, X), axis=1) coefs2 = linalg.solve( X_aug.T.dot(W).dot(X_aug), X_aug.T.dot(W).dot(y)) if intercept is False: assert_array_almost_equal(coefs1, coefs2) else: assert_array_almost_equal(coefs1, coefs2[1:]) assert_almost_equal(inter1, coefs2[0])
def __init__(self, fit_intercept=True, normalize=False, copy_X=True, n_jobs=None): self._hyperparams = { 'fit_intercept': fit_intercept, 'normalize': normalize, 'copy_X': copy_X, 'n_jobs': n_jobs } self._wrapped_model = Op(**self._hyperparams)
def test_score(self): # create some data x = np.array(list(range(0, 10))) y = x * 2 df = pd.DataFrame({'x': x, 'y': y}) X = df[['x']] Y = df[['y']] # put into Omega os.environ['DJANGO_SETTINGS_MODULE'] = '' om = Omega() om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True om.datasets.put(X, 'datax') om.datasets.put(Y, 'datay') om.datasets.get('datax') om.datasets.get('datay') # create a model locally, fit it, store in Omega lr = LinearRegression() lr.fit(X, Y) scores = lr.score(X, Y) om.models.put(lr, 'mymodel')
def test_linear_regression_sample_weights(): # TODO: loop over sparse data as well rng = np.random.RandomState(0) # It would not work with under-determined systems for n_samples, n_features in ((6, 5), ): y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) sample_weight = 1.0 + rng.rand(n_samples) for intercept in (True, False): # LinearRegression with explicit sample_weight reg = LinearRegression(fit_intercept=intercept) reg.fit(X, y, sample_weight=sample_weight) coefs1 = reg.coef_ inter1 = reg.intercept_ assert_equal(reg.coef_.shape, (X.shape[1], )) # sanity checks assert_greater(reg.score(X, y), 0.5) # Closed form of the weighted least square # theta = (X^T W X)^(-1) * X^T W y W = np.diag(sample_weight) if intercept is False: X_aug = X else: dummy_column = np.ones(shape=(n_samples, 1)) X_aug = np.concatenate((dummy_column, X), axis=1) coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug), X_aug.T.dot(W).dot(y)) if intercept is False: assert_array_almost_equal(coefs1, coefs2) else: assert_array_almost_equal(coefs1, coefs2[1:]) assert_almost_equal(inter1, coefs2[0])
def test_linear_regression_multiple_outcome(random_state=0): # Test multiple-outcome linear regressions X, y = make_regression(random_state=random_state) Y = np.vstack((y, y)).T n_features = X.shape[1] reg = LinearRegression() reg.fit((X), Y) assert reg.coef_.shape == (2, n_features) Y_pred = reg.predict(X) reg.fit(X, y) y_pred = reg.predict(X) assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
def test_linear_regression_multiple_outcome(random_state=0): # Test multiple-outcome linear regressions X, y = make_regression(random_state=random_state) Y = np.vstack((y, y)).T n_features = X.shape[1] clf = LinearRegression(fit_intercept=True) clf.fit((X), Y) assert_equal(clf.coef_.shape, (2, n_features)) Y_pred = clf.predict(X) clf.fit(X, y) y_pred = clf.predict(X) assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
def test_linear_regression(): # Test LinearRegression on a simple dataset. # a simple dataset X = [[1], [2]] Y = [1, 2] clf = LinearRegression() clf.fit(X, Y) assert_array_almost_equal(clf.coef_, [1]) assert_array_almost_equal(clf.intercept_, [0]) assert_array_almost_equal(clf.predict(X), [1, 2]) # test it also for degenerate input X = [[1]] Y = [0] clf = LinearRegression() clf.fit(X, Y) assert_array_almost_equal(clf.coef_, [0]) assert_array_almost_equal(clf.intercept_, [0]) assert_array_almost_equal(clf.predict(X), [0])
class StackedRegression(LinearModel, RegressorMixin): def __init__(self, weights=None, cv_train_size=None): estimators = [] estimators.append(KNeighborsRegressor(n_neighbors=3)) estimators.append(DecisionTreeRegressor()) estimators.append(BayesianRidge()) # estimators.append(BayesianRidge()) self.estimators = estimators self.stacker = LinearRegression() self.weights = weights if weights is not None else {} self.cv_train_size = cv_train_size if cv_train_size is not None else 0.7 self._is_fitted = False def fit_stack(self, X, y): print('fitting') print(X.shape) n_train = int(X.shape[0] * self.cv_train_size) for estimator in self.estimators: estimator.fit(X[:n_train, :], y[:n_train]) predictions = np.concatenate([np.matrix(estimator.predict(X[n_train:, :])).transpose() for estimator in self.estimators], axis=1) self.stacker.fit(predictions, y[n_train:]) self._is_fitted = True print('fitted') print(self.stacker.residues_) def fit(self, X, y): if not self._is_fitted: raise NotFittedError('StackedRegression must call fit_stack before fit.') for estimator in self.estimators: estimator.fit(X, y) def predict(self, X): predictions = np.concatenate([np.matrix(estimator.predict(X)).transpose() for estimator in self.estimators], axis=1) return self.stacker.predict(predictions)
def test_linear_regression_multiple_outcome(random_state=0): "Test multiple-outcome linear regressions" X, y = make_regression(random_state=random_state) Y = np.vstack((y, y)).T n_features = X.shape[1] clf = LinearRegression(fit_intercept=True) clf.fit((X), Y) assert_equal(clf.coef_.shape, (2, n_features)) Y_pred = clf.predict(X) clf.fit(X, y) y_pred = clf.predict(X) assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
def test_linear_regression_sparse_multiple_outcome(random_state=0): "Test multiple-outcome linear regressions with sparse data" random_state = check_random_state(random_state) X, y = make_sparse_uncorrelated(random_state=random_state) X = sparse.coo_matrix(X) Y = np.vstack((y, y)).T n_features = X.shape[1] ols = LinearRegression() ols.fit(X, Y) assert_equal(ols.coef_.shape, (2, n_features)) Y_pred = ols.predict(X) ols.fit(X, y.ravel()) y_pred = ols.predict(X) assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
def test_linear_regression_sparse_equal_dense(normalize, fit_intercept): # Test that linear regression agrees between sparse and dense rng = check_random_state(0) n_samples = 200 n_features = 2 X = rng.randn(n_samples, n_features) X[X < 0.1] = 0. Xcsr = sparse.csr_matrix(X) y = rng.rand(n_samples) params = dict(normalize=normalize, fit_intercept=fit_intercept) clf_dense = LinearRegression(**params) clf_sparse = LinearRegression(**params) clf_dense.fit(X, y) clf_sparse.fit(Xcsr, y) assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_) assert_allclose(clf_dense.coef_, clf_sparse.coef_)
def test_linear_regression_sample_weights(): rng = np.random.RandomState(0) for n_samples, n_features in ((6, 5), (5, 10)): y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) sample_weight = 1.0 + rng.rand(n_samples) clf = LinearRegression() clf.fit(X, y, sample_weight) coefs1 = clf.coef_ assert_equal(clf.coef_.shape, (X.shape[1], )) assert_greater(clf.score(X, y), 0.9) assert_array_almost_equal(clf.predict(X), y) # Sample weight can be implemented via a simple rescaling # for the square loss. scaled_y = y * np.sqrt(sample_weight) scaled_X = X * np.sqrt(sample_weight)[:, np.newaxis] clf.fit(X, y) coefs2 = clf.coef_ assert_array_almost_equal(coefs1, coefs2)
def fit(self, X1, y1, X2, y2): """Fit estimator using RANSAC algorithm. Namely, the fit is done into two main steps: - pre-fitting: quickly select n_prefits configurations which seems suitable given topological constraints. - finding best fit: select the pre-fit with the maximum number of inliers as the best fit. Inputs: X1, y1: Left lane points (supposedly) X2, y2: Right lane points (supposedly) """ check_consistent_length(X1, y1) check_consistent_length(X2, y2) # Assume linear model by default min_samples = X1.shape[1] + 1 if min_samples > X1.shape[0] or min_samples > X2.shape[0]: raise ValueError("`min_samples` may not be larger than number " "of samples ``X1-2.shape[0]``.") # Check additional parameters... if self.stop_probability < 0 or self.stop_probability > 1: raise ValueError("`stop_probability` must be in range [0, 1].") if self.residual_threshold is None: residual_threshold = np.median(np.abs(y - np.median(y))) else: residual_threshold = self.residual_threshold # random_state = check_random_state(self.random_state) # === Pre-fit with small subsets (4 points) === # # Allows to quickly pre-select some good configurations. w1_prefits, w2_prefits = lanes_ransac_prefit(X1, y1, X2, y2, self.n_prefits, self.max_trials, self.is_valid_diffs, self.is_valid_bounds) # === Select best pre-fit, using the full dataset === # post_fit = 0 (w1, w2, inlier_mask1, inlier_mask2) = lanes_ransac_select_best(X1, y1, X2, y2, w1_prefits, w2_prefits, residual_threshold, post_fit) self.w1_ = w1 self.w2_ = w2 # Set regression parameters. base_estimator1 = LinearRegression(fit_intercept=False) base_estimator1.coef_ = w1 base_estimator1.intercept_ = 0.0 base_estimator2 = LinearRegression(fit_intercept=False) base_estimator2.coef_ = w2 base_estimator2.intercept_ = 0.0 # Save final model parameters. self.estimator1_ = base_estimator1 self.estimator2_ = base_estimator2 self.inlier_mask1_ = inlier_mask1 self.inlier_mask2_ = inlier_mask2 # # Estimate final model using all inliers # # base_estimator1.fit(X1_inlier_best, y1_inlier_best) # # base_estimator2.fit(X2_inlier_best, y2_inlier_best) return self
### ages and net_worths need to be reshaped into 2D numpy arrays ### second argument of reshape command is a tuple of integers: (n_rows, n_columns) ### by convention, n_rows is the number of data points ### and n_columns is the number of features ages = numpy.reshape( numpy.array(ages), (len(ages), 1)) net_worths = numpy.reshape( numpy.array(net_worths), (len(net_worths), 1)) from sklearn.cross_validation import train_test_split ages_train, ages_test, net_worths_train, net_worths_test = train_test_split(ages, net_worths, test_size=0.1, random_state=42) ### fill in a regression here! Name the regression object reg so that ### the plotting code below works, and you can see what your regression looks like from sklearn.linear_model.base import LinearRegression reg = LinearRegression() reg.fit(ages_train, net_worths_train) print("Slope %s" % reg.coef_) print("Intercept %s" % reg.intercept_) print("Score = ", reg.score(ages_test, net_worths_test)) try: plt.plot(ages, reg.predict(ages), color="blue") except NameError: pass
### draw the scatterplot, with color-coded training and testing points import matplotlib.pyplot as plt for feature, target in zip(feature_test, target_test): plt.scatter( feature, target, color=test_color ) for feature, target in zip(feature_train, target_train): plt.scatter( feature, target, color=train_color ) ### labels for the legend plt.scatter(feature_test[0], target_test[0], color=test_color, label="test") plt.scatter(feature_test[0], target_test[0], color=train_color, label="train") from sklearn.linear_model.base import LinearRegression reg = LinearRegression() reg.fit(feature_train, target_train) print("Slope %s" % reg.coef_) print("Intercept %s" % reg.intercept_) print("Score = ", reg.score(feature_test, target_test)) ### draw the regression line, once it's coded try: plt.plot( feature_test, reg.predict(feature_test) ) except NameError: pass reg.fit(feature_test, target_test) plt.plot(feature_train, reg.predict(feature_train), color="b") plt.xlabel(features_list[1]) plt.ylabel(features_list[0]) plt.legend()
def fit(self, X1, y1, X2, y2, left_right_bounds=None): """Fit estimator using RANSAC algorithm. Namely, the fit is done into two main steps: - pre-fitting: quickly select n_prefits configurations which seems suitable given topological constraints. - finding best fit: select the pre-fit with the maximum number of inliers as the best fit. Inputs: X1, y1: Left lane points (supposedly) X2, y2: Right lane points (supposedly) """ check_consistent_length(X1, y1) check_consistent_length(X2, y2) # Assume linear model by default min_samples = X1.shape[1] + 1 if min_samples > X1.shape[0] or min_samples > X2.shape[0]: raise ValueError("`min_samples` may not be larger than number " "of samples ``X1-2.shape[0]``.") # Check additional parameters... if self.stop_probability < 0 or self.stop_probability > 1: raise ValueError("`stop_probability` must be in range [0, 1].") if self.residual_threshold is None: residual_threshold = np.median(np.abs(y - np.median(y))) else: residual_threshold = self.residual_threshold delta_left_right = (left_right_bounds[0, 0, 1] + left_right_bounds[0, 0, 0]) / 2. # random_state = check_random_state(self.random_state) # Set up lambdas for computing score. score_lambdas = np.copy(self.score_lambdas) score_lambdas[0] = score_lambdas[0] / (y1.size + y2.size) # Collections... self.w_fits = [] self.w_fits_l2 = [] self.inliers_masks = [] self.n_inliers = [] self.score_fits = [] # === Left lane, and then, right lane === # w_left_prefits = lanes_ransac_prefit(X1, y1, self.n_prefits, self.max_trials, self.w_refs_left, self.is_valid_bounds_left) (w_left1, in_mask_left1, score_left1) = \ lanes_ransac_select_best(X1, y1, w_left_prefits, residual_threshold, self.w_refs_left, score_lambdas) n_inliers_left1 = np.sum(in_mask_left1) w_refs = np.vstack((self.w_refs_right, np.reshape(w_left1, (1, 3)))) is_valid_bounds = np.vstack((self.is_valid_bounds_right, left_right_bounds)) w_right_prefits = lanes_ransac_prefit(X2, y2, self.n_prefits, self.max_trials, w_refs, is_valid_bounds) w0 = lane_translate(w_left1, delta_left_right) w_right_prefits = np.vstack((w0, w_right_prefits)) (w_right1, in_mask_right1, score_right1) = \ lanes_ransac_select_best(X2, y2, w_right_prefits, residual_threshold, self.w_refs_right, score_lambdas) n_inliers_right1 = np.sum(in_mask_right1) n_inliers1 = n_inliers_right1 + n_inliers_left1 self.w_fits.append((w_left1, w_right1)) self.n_inliers.append(n_inliers1) self.inliers_masks.append((in_mask_left1, in_mask_right1)) self.score_fits.append((score_left1, score_right1)) # === Right lane and then left lane === # w_right_prefits = lanes_ransac_prefit(X2, y2, self.n_prefits, self.max_trials, self.w_refs_right, self.is_valid_bounds_right) (w_right2, in_mask_right2, score_right2) = \ lanes_ransac_select_best(X2, y2, w_right_prefits, residual_threshold, self.w_refs_right, score_lambdas) n_inliers_right2 = np.sum(in_mask_right2) w_refs = np.vstack((self.w_refs_left, np.reshape(w_right2, (1, 3)))) is_valid_bounds = np.vstack((self.is_valid_bounds_left, left_right_bounds)) w_left_prefits = lanes_ransac_prefit(X1, y1, self.n_prefits, self.max_trials, w_refs, is_valid_bounds) w0 = lane_translate(w_right2, -delta_left_right) w_left_prefits = np.vstack((w0, w_left_prefits)) (w_left2, in_mask_left2, score_left2) = \ lanes_ransac_select_best(X1, y1, w_left_prefits, residual_threshold, self.w_refs_left, score_lambdas) n_inliers_left2 = np.sum(in_mask_left2) n_inliers2 = n_inliers_right2 + n_inliers_left2 self.w_fits.append((w_left2, w_right2)) self.n_inliers.append(n_inliers2) self.inliers_masks.append((in_mask_left2, in_mask_right2)) self.score_fits.append((score_left2, score_right2)) # === Previous frame??? === # if self.w_refs_left.size > 0 and self.w_refs_right.size > 0: in_mask_left3 = lanes_inliers(X1, y1, self.w_refs_left[0], residual_threshold) in_mask_right3 = lanes_inliers(X2, y2, self.w_refs_right[0], residual_threshold) n_inliers3 = np.sum(in_mask_left3) + np.sum(in_mask_right3) score_left3 = lane_score(np.sum(in_mask_left3), self.w_refs_left[0], self.w_refs_left, score_lambdas) score_right3 = lane_score(np.sum(in_mask_right3), self.w_refs_right[0], self.w_refs_right, score_lambdas) self.w_fits.append((self.w_refs_left[0], self.w_refs_right[0])) self.n_inliers.append(n_inliers3) self.inliers_masks.append((in_mask_left3, in_mask_right3)) self.score_fits.append((score_left3, score_right3)) # L2 regression regularisation of fits. self.w_fits_l2 = copy.deepcopy(self.w_fits) if self.l2_scales is not None: for i in range(len(self.w_fits)): w1, w2 = self.w_fits[i] # Some regression: ignored when inversed matrix error. try: w_left = m_regression_exp(X1, y1, w1, self.l2_scales) except Exception: w_left = w1 try: w_right = m_regression_exp(X2, y2, w2, self.l2_scales) except Exception: w_right = w2 in_mask_left = lanes_inliers(X1, y1, w_left, residual_threshold) in_mask_right = lanes_inliers(X2, y2, w_right, residual_threshold) n_inliers = np.sum(in_mask_left) + np.sum(in_mask_right) score_left = lane_score(np.sum(in_mask_left), w_left, self.w_refs_left, score_lambdas) score_right = lane_score(np.sum(in_mask_right), w_right, self.w_refs_right, score_lambdas) self.w_fits_l2[i] = (w_left, w_right) self.n_inliers[i] = n_inliers self.inliers_masks[i] = (in_mask_left, in_mask_right) self.score_fits[i] = (score_left, score_right) # Best fit? scores = [s1+s2 for (s1, s2) in self.score_fits] idx = np.argmax(scores) w_left, w_right = self.w_fits_l2[idx] in_mask_left, in_mask_right = self.inliers_masks[idx] # Smoothing. smoothing = self.smoothing if self.w_refs_left.size > 0 and self.w_refs_right.size > 0: w_left = smoothing * w_left + (1. - smoothing) * self.w_refs_left[0] w_right = smoothing * w_right + (1. - smoothing) * self.w_refs_right[0] self.w1_ = w_left self.w2_ = w_right # Set regression parameters. base_estimator1 = LinearRegression(fit_intercept=False) base_estimator1.coef_ = w_left base_estimator1.intercept_ = 0.0 base_estimator2 = LinearRegression(fit_intercept=False) base_estimator2.coef_ = w_right base_estimator2.intercept_ = 0.0 # Save final model parameters. self.estimator1_ = base_estimator1 self.estimator2_ = base_estimator2 self.inlier_mask1_ = in_mask_left self.inlier_mask2_ = in_mask_right # # Estimate final model using all inliers # # base_estimator1.fit(X1_inlier_best, y1_inlier_best) # # base_estimator2.fit(X2_inlier_best, y2_inlier_best) return self
inp_prices = list() features = list() def get_inp_features(self): return self.inp_features def get_inp_prices(self): return self.inp_prices def get_features(self): return self.features def read(self): F, N = map(int, raw_input().split(' ')) for _ in range(N): inp_f = map(float, raw_input().strip().split()) self.inp_features.append(inp_f[:F:]) self.inp_prices.append(inp_f[F::]) questions = int(raw_input()) for _ in range(questions): self.features.append(map(float, raw_input().split())) reader = inp_reader() reader.read() inp_features = reader.get_inp_features() inp_prices = reader.get_inp_prices() features = reader.get_features() model = LinearRegression() model.fit(inp_features, inp_prices) prices=model.predict(features) for el in prices: print (el[0])