def __add_trend_feature(self, arr, abs_values=False):
     idx = np.array(range(len(arr)))
     if abs_values:
         arr = np.abs(arr)
     lr = LinearRegression()
     lr.fit(idx.reshape(-1, 1), arr)
     return lr.coef_[0]
Ejemplo n.º 2
0
    def linearRegression_sales(self):  #线性回归
        path = u'4.Advertising.csv'
        data = self.readFile(path)
        #         x=data[['TV', 'Radio', 'Newspaper']]
        x = data[['TV', 'Radio']]
        y = data['Sales']
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            random_state=1)
        # print x_train, y_train
        linreg = LinearRegression()
        model = linreg.fit(x_train, y_train)
        print model
        print linreg.coef_
        print linreg.intercept_
        y_hat = linreg.predict(np.array(x_test))
        mse = np.average((y_hat - y_test)**2)
        rmse = np.sqrt(mse)
        print mse, rmse

        t = np.arange(len(x_test))
        plt.plot(t, y_test, 'r-', linewidth=2, label='Test')
        plt.plot(t, y_hat, 'g-', linewidth=2, label='Predict')
        plt.grid()
        plt.legend(loc='upper right')
        plt.show()
Ejemplo n.º 3
0
class PredictLoss(BaseLR):
    def __init__(self, hist=30, posmax=15, lr=0.2):
        from sklearn.linear_model.base import LinearRegression
        from collections import deque
        self.hist = hist
        self.track = deque(maxlen=self.hist)
        self.regr = LinearRegression()
        self.poscases = 0
        self.posmax = posmax
        self.lr = lr

    def __call__(self, env):
        if len(self.track) > 5:
            y = np.array(self.track)
            x = np.array(range(len(y.shape))).reshape(-1, 1)
            self.regr.fit(x, y)
            coef_ = self.regr.coef_[0]
            preds = self.regr.predict(x)
            fst = preds[0]
            lst = preds[-1]
            e = np.sqrt(((y - preds)**2).mean())
            if coef_ > 0:
                self.poscases += 1
                if self.poscases >= self.posmax:
                    raise EarlyStopException
            else:
                self.poscases -= 1
                if self.poscases < 0:
                    self.poscases = 0
            diff = np.abs(fst - lst)
            coef = np.clip(diff/e, 1e-6, 1)
            lr = self.lr*coef
            print(lr, e, diff, coef_, coef, file=open('log.txt', 'a'))
            env.model.set_param("learning_rate", lr)
Ejemplo n.º 4
0
def compare_panorama_cubic(greenery_measure="vegetation", **kwargs):
    """ Compare/plot the segmentation results of panoramic and cubic
        images to each other. Also use linear regression to determine
        how they relate to each other.
    """

    green_kwargs = select_green_model(greenery_measure)

    panorama_tiler = TileManager(cubic_pictures=False, **kwargs, **green_kwargs)
    cubic_tiler = TileManager(cubic_pictures=True, **kwargs, **green_kwargs)

    panorama_green = panorama_tiler.green_direct()
    cubic_green = cubic_tiler.green_direct()

    _remove_missing(panorama_green, cubic_green)
    x = np.arange(0, 0.8, 0.01)

    x_pano = np.array(panorama_green["green"]).reshape(-1, 1)
    y_cubic = np.array(cubic_green["green"])
    reg = LinearRegression().fit(x_pano, y_cubic)
    print(reg.score(x_pano, y_cubic))
    print(reg.coef_[0], reg.intercept_)
    plt.figure()
    plt.scatter(panorama_green["green"], cubic_green["green"])
    plt.plot(x, reg.predict(x.reshape(-1, 1)))
    plt.xlabel("panoramas")
    plt.ylabel("cubic")
    plt.xlim(0, max(0.001, max(panorama_green["green"])*1.1))
    plt.ylim(0, max(0.001, max(cubic_green["green"])*1.1))

    plot_greenery(panorama_green, show=False, title="panorama")
    plot_greenery(cubic_green, show=False, title="cubic")
    plt.show()
Ejemplo n.º 5
0
 def test_predict_hdf_dataframe(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df['x']
     Y = df['y']
     # put into Omega -- assume a client with pandas, scikit learn
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.pure_python = True
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax', as_hdf=True)
     om.datasets.put(Y, 'datay', as_hdf=True)
     # have Omega fit the model then predict
     lr = LinearRegression()
     lr.fit(reshaped(X), reshaped(Y))
     pred = lr.predict(reshaped(X))
     om.models.put(lr, 'mymodel2')
     # -- using data provided locally
     #    note this is the same as
     #        om.datasets.put(X, 'foo')
     #        om.runtimes.model('mymodel2').predict('foo')
     result = om.runtime.model('mymodel2').predict('datax')
     pred2 = result.get()
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(1)")
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(2)")
Ejemplo n.º 6
0
    def get_scikit_prediction(x=np.array([1, 2, 3]), y=np.array([1, 2, 3])):

        from sklearn.linear_model.base import LinearRegression as ScikitLinearRegression

        regression = ScikitLinearRegression()
        regression.fit(x, y)

        return regression.predict(x)
Ejemplo n.º 7
0
def train():
    X = np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]])
    y = np.array([10, 20, 30])
    X_test = np.array([[10, 20, 30, 40], [40, 50, 60, 70], [70, 80, 90, 100]])
    reg = LinearRegression()
    reg.fit(X, y)
    print('coef_:', reg.coef_)
    print('intercept_:', reg.intercept_)
    print('predict:', reg.predict(X_test))
Ejemplo n.º 8
0
def test_linear_regression_n_jobs():
    """
    Test for the n_jobs parameter on the fit method and the constructor
    """
    X = [[1], [2]]
    Y = [1, 2]
    clf = LinearRegression()
    clf_fit = clf.fit(X, Y, 4)
    assert_equal(clf_fit.n_jobs, clf.n_jobs)
    assert_equal(clf.n_jobs, 1)
Ejemplo n.º 9
0
def test_linear_regression_n_jobs():
    """
    Test for the n_jobs parameter on the fit method and the constructor
    """
    X = [[1], [2]]
    Y = [1, 2]
    clf = LinearRegression()
    clf_fit = clf.fit(X, Y, 4)
    assert_equal(clf_fit.n_jobs, clf.n_jobs)
    assert_equal(clf.n_jobs, 1)
Ejemplo n.º 10
0
 def __init__(self,
              treatment_cols,
              nusiance_cols,
              effect_estimator=LinearRegression(fit_intercept=False),
              treatment_estimator=LinearRegression(fit_intercept=False),
              y_estimator=LinearRegression(fit_intercept=False)):
     self.nusiance_cols = nusiance_cols
     self.treatment_cols = treatment_cols
     self.effect_estimator = effect_estimator
     self.treatment_estimator = treatment_estimator
     self.y_estimator = y_estimator
Ejemplo n.º 11
0
 def test_fit(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a model locally, store (unfitted) in Omega
     lr = LinearRegression()
     om.models.put(lr, 'mymodel2')
     self.assertIn('mymodel2', om.models.list('*'))
     # predict locally for comparison
     lr.fit(X, Y)
     pred = lr.predict(X)
     # try predicting without fitting
     with self.assertRaises(NotFittedError):
         result = om.runtime.model('mymodel2').predict('datax')
         result.get()
     # have Omega fit the model then predict
     result = om.runtime.model('mymodel2').fit('datax', 'datay')
     result.get()
     # check the new model version metadata includes the datax/y references
     meta = om.models.metadata('mymodel2')
     self.assertIn('metaX', meta.attributes)
     self.assertIn('metaY', meta.attributes)
     # -- using data already in Omega
     result = om.runtime.model('mymodel2').predict('datax')
     pred1 = result.get()
     # -- using data provided locally
     #    note this is the same as
     #        om.datasets.put(X, 'foo')
     #        om.runtimes.model('mymodel2').predict('foo')
     result = om.runtime.model('mymodel2').fit(X, Y)
     result = om.runtime.model('mymodel2').predict(X)
     pred2 = result.get()
     # -- check the local data provided to fit was stored as intended
     meta = om.models.metadata('mymodel2')
     self.assertIn('metaX', meta.attributes)
     self.assertIn('metaY', meta.attributes)
     self.assertIn('_fitX', meta.attributes.get('metaX').get('collection'))
     self.assertIn('_fitY', meta.attributes.get('metaY').get('collection'))
     self.assertTrue(
         (pred == pred1).all(), "runtimes prediction is different(1)")
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(2)")
Ejemplo n.º 12
0
def test_linear_regression_sparse(random_state=0):
    "Test that linear regression also works with sparse data"
    random_state = check_random_state(random_state)
    n = 100
    X = sparse.eye(n, n)
    beta = random_state.rand(n)
    y = X * beta[:, np.newaxis]

    ols = LinearRegression()
    ols.fit(X, y.ravel())
    assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)
    assert_array_almost_equal(ols.residues_, 0)
Ejemplo n.º 13
0
def test_linear_regression_sparse(random_state=0):
    "Test that linear regression also works with sparse data"
    random_state = check_random_state(random_state)
    n = 100
    X = sparse.eye(n, n)
    beta = random_state.rand(n)
    y = X * beta[:, np.newaxis]

    ols = LinearRegression()
    ols.fit(X, y.ravel())
    assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)
    assert_array_almost_equal(ols.residues_, 0)
Ejemplo n.º 14
0
    def __init__(self,
                 base_estimator: RegressorMixin = None,
                 n_trees: int = 50,
                 sigma_a: int = 0.001,
                 sigma_b: float = 0.001,
                 n_samples: int = 200,
                 n_burn: int = 200,
                 p_grow: float = 0.5,
                 p_prune: float = 0.5,
                 alpha: float = 0.95,
                 beta: float = 2.):

        if base_estimator is not None:
            self.base_estimator = clone(base_estimator)
        else:
            base_estimator = LinearRegression()
        self.base_estimator = base_estimator
        super().__init__(n_trees=n_trees,
                         sigma_a=sigma_a,
                         sigma_b=sigma_b,
                         n_samples=n_samples,
                         n_burn=n_burn,
                         p_grow=p_grow,
                         p_prune=p_prune,
                         alpha=alpha,
                         beta=beta)
Ejemplo n.º 15
0
def test_multiple_response_regressor():
    np.random.seed(1)
    m = 100000
    n = 10

    X = np.random.normal(size=(m, n))
    beta1 = np.random.normal(size=(n, 1))
    beta2 = np.random.normal(size=(n, 1))

    y1 = np.dot(X, beta1)
    p2 = 1. / (1. + np.exp(-np.dot(X, beta2)))
    y2 = np.random.binomial(n=1, p=p2)
    y = np.concatenate([y1, y2], axis=1)

    model = MaskedEstimator(
        LinearRegression(), [True, False]) & MaskedEstimator(
            ProbaPredictingEstimator(LogisticRegression()), [False, True])
    #     MultipleResponseEstimator([('linear', np.array([True, False], dtype=bool), LinearRegression()),
    #                                        ('logistic', np.array([False, True], dtype=bool), ProbaPredictingEstimator(LogisticRegression()))])
    model.fit(X, y)

    assert np.mean(beta1 - model.estimators_[0].estimator_.coef_) < .01
    assert np.mean(beta2 -
                   model.estimators_[1].estimator_.estimator_.coef_) < .01
    model.get_params()
    model.predict(X)
Ejemplo n.º 16
0
def test_backward_elimination_estimation():
    np.random.seed(0)
    m = 100000
    n = 6
    factor = .9

    X = np.random.normal(size=(m, n))
    beta = 100 * np.ones(shape=n)
    for i in range(1, n):
        beta[i] = factor * beta[i - 1]
    beta = np.random.permutation(beta)[:, None]
    #     beta = np.random.normal(size=(n,1))

    y = np.dot(X, beta) + 0.01 * np.random.normal(size=(m, 1))

    target_sequence = np.ravel(np.argsort(beta**2, axis=0))
    model1 = BackwardEliminationEstimator(
        SingleEliminationFeatureImportanceEstimatorCV(LinearRegression()))
    model1.fit(X, y)

    #     model2 = BRFE(FeatureImportanceEstimatorCV(LinearRegression()))
    #     model2.fit(X, y)

    np.testing.assert_array_equal(model1.elimination_sequence_,
                                  target_sequence)
Ejemplo n.º 17
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
Ejemplo n.º 18
0
 def test_fit_pipeline(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a pipeline locally, store (unfitted) in Omega
     p = Pipeline([
         ('lr', LinearRegression()),
     ])
     om.models.put(p, 'mymodel2')
     self.assertIn('mymodel2', om.models.list('*'))
     # predict locally for comparison
     p.fit(reshaped(X), reshaped(Y))
     pred = p.predict(reshaped(X))
     # have Omega fit the model then predict
     result = om.runtime.model('mymodel2').fit('datax', 'datay')
     result.get()
     result = om.runtime.model('mymodel2').predict('datax')
     pred1 = result.get()
     self.assertTrue(
         (pred == pred1).all(), "runtimes prediction is different(1)")
Ejemplo n.º 19
0
def test_k_best_feature_selector():
    np.random.seed(0)
    m = 100000
    n = 6
    factor = .9

    X = np.random.normal(size=(m, n))
    beta = 100 * np.ones(shape=n)
    for i in range(1, n):
        beta[i] = factor * beta[i - 1]
    beta = np.random.permutation(beta)[:, None]
    #     beta = np.random.normal(size=(n,1))

    y = np.dot(X, beta) + 0.01 * np.random.normal(size=(m, 1))

    target_vars = np.ravel(np.argsort(beta**2, axis=0))[::-1][:3]
    target_support = np.zeros(shape=n, dtype=bool)
    target_support[target_vars] = True

    model1 = BestKFeatureSelector(UnivariateFeatureImportanceEstimatorCV(
        LinearRegression()),
                                  k=3)
    model1.fit(X, y)

    np.testing.assert_array_equal(model1.support_, target_support)
Ejemplo n.º 20
0
    def polynomial_linear_regression(self):

        best_accuracy = 0
        best_degree = 0

        #         for degree in range(2, 10):

        degree = 2
        model = make_pipeline(
            PolynomialFeatures(degree),
            LinearRegression())  # polynomial transformation of this degree
        model.fit(self.X_train, self.Y_train)  # fit the model
        ''' check accuracy using test dataset '''

        predicted_y = model.predict(self.X_test)
        predicted_y = [
            1 if (abs(1 - val) < abs(val)) else 0 for val in predicted_y
        ]

        accuracy = accuracy_score(self.Y_test, predicted_y)

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_degree = degree
            self.best_model = model

            print(best_degree)

            return model
Ejemplo n.º 21
0
    def __init__(self, base_estimator: RegressorMixin = None, **kwargs):

        if base_estimator is not None:
            self.base_estimator = clone(base_estimator)
        else:
            base_estimator = LinearRegression()
        self.base_estimator = base_estimator
        super().__init__(**kwargs)
Ejemplo n.º 22
0
def test_pipeline():
    np.random.seed(1)
    m = 10000
    n = 10

    X = np.random.normal(size=(m, n))
    beta = np.random.normal(size=(n, 1))
    beta[np.random.binomial(p=2.0 / float(n), n=1, size=n).astype(bool)] = 0
    y = np.dot(X, beta) + 0.5 * np.random.normal(size=(m, 1))
    beta_reduced = beta[beta != 0]

    model = BackwardEliminationEstimator(
        SingleEliminationFeatureImportanceEstimatorCV(LinearRegression()))
    model >>= LinearRegression()

    model.fit(X, y)
    assert np.max(np.abs(model.final_stage_.coef_ - beta_reduced)) < .1
Ejemplo n.º 23
0
 def __init__(self, hist=30, posmax=15, lr=0.2):
     from sklearn.linear_model.base import LinearRegression
     from collections import deque
     self.hist = hist
     self.track = deque(maxlen=self.hist)
     self.regr = LinearRegression()
     self.poscases = 0
     self.posmax = posmax
     self.lr = lr
Ejemplo n.º 24
0
def test_ridge_vs_lstsq():
    """On alpha=0., Ridge and OLS yield the same solution."""

    # we need more samples than features
    n_samples, n_features = 5, 4
    y = rng.randn(n_samples)
    X = rng.randn(n_samples, n_features)

    ridge = Ridge(alpha=0., fit_intercept=False)
    ols = LinearRegression(fit_intercept=False)

    ridge.fit(X, y)
    ols.fit(X, y)
    assert_almost_equal(ridge.coef_, ols.coef_)

    ridge.fit(X, y)
    ols.fit(X, y)
    assert_almost_equal(ridge.coef_, ols.coef_)
Ejemplo n.º 25
0
def test_delta_transformer():
    fit_model = DoublePipeline(
        [('xdelta', DeltaTransformer()),
         ('linreg', LinearRegression(fit_intercept=False))],
        [('ydelta', DeltaTransformer())]).fit(X, Y)

    assert (np.isclose(fit_model.predict(X), np.squeeze(Y)).all())
    assert (np.isclose(fit_model.x_pipe_.steps[-1][1].coef_,
                       [1.0, 0.0, 0.0]).all())
Ejemplo n.º 26
0
def test_raises_value_error_if_sample_weights_greater_than_1d():
    # Sample weights must be either scalar or 1D

    n_sampless = [2, 3]
    n_featuress = [3, 2]

    for n_samples, n_features in zip(n_sampless, n_featuress):
        X = rng.randn(n_samples, n_features)
        y = rng.randn(n_samples)
        sample_weights_OK = rng.randn(n_samples) ** 2 + 1
        sample_weights_OK_1 = 1.
        sample_weights_OK_2 = 2.

        reg = LinearRegression()

        # make sure the "OK" sample weights actually work
        reg.fit(X, y, sample_weights_OK)
        reg.fit(X, y, sample_weights_OK_1)
        reg.fit(X, y, sample_weights_OK_2)
Ejemplo n.º 27
0
def test_fit_intercept():
    # Test assertions on betas shape.
    X2 = np.array([[0.38349978, 0.61650022], [0.58853682, 0.41146318]])
    X3 = np.array([[0.27677969, 0.70693172, 0.01628859],
                   [0.08385139, 0.20692515, 0.70922346]])
    y = np.array([1, 1])

    lr2_without_intercept = LinearRegression(fit_intercept=False).fit(X2, y)
    lr2_with_intercept = LinearRegression(fit_intercept=True).fit(X2, y)

    lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y)
    lr3_with_intercept = LinearRegression(fit_intercept=True).fit(X3, y)

    assert_equal(lr2_with_intercept.coef_.shape,
                 lr2_without_intercept.coef_.shape)
    assert_equal(lr3_with_intercept.coef_.shape,
                 lr3_without_intercept.coef_.shape)
    assert_equal(lr2_without_intercept.coef_.ndim,
                 lr3_without_intercept.coef_.ndim)
Ejemplo n.º 28
0
class LinearRegressionImpl():

    def __init__(self, fit_intercept=True, normalize=False, copy_X=True, n_jobs=None):
        self._hyperparams = {
            'fit_intercept': fit_intercept,
            'normalize': normalize,
            'copy_X': copy_X,
            'n_jobs': n_jobs}

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)
 def __init__(self, weights=None, cv_train_size=None):
     estimators = []
     estimators.append(KNeighborsRegressor(n_neighbors=3))
     estimators.append(DecisionTreeRegressor())
     estimators.append(BayesianRidge())
     # estimators.append(BayesianRidge())
     self.estimators = estimators
     self.stacker = LinearRegression()
     self.weights = weights if weights is not None else {}
     self.cv_train_size = cv_train_size if cv_train_size is not None else 0.7
     self._is_fitted = False
Ejemplo n.º 30
0
 def __init__(self,
              cols_1,
              cols_2,
              estimator_1=LinearRegression(fit_intercept=False),
              estimator_2=RandomForestRegressor(),
              iters=2):
     self.cols_1 = cols_1
     self.cols_2 = cols_2
     self.estimator_1 = estimator_1
     self.estimator_2 = estimator_2
     self.iters = iters
Ejemplo n.º 31
0
def test_super_learner():
    np.random.seed(0)
    X, y = load_boston(return_X_y=True)
    X = pandas.DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])])
    model = CrossValidatingEstimator(SuperLearner(
        [('linear', LinearRegression()), ('earth', Earth(max_degree=2))],
        LinearRegression(),
        cv=5,
        n_jobs=1),
                                     cv=5)
    cv_pred = model.fit_predict(X, y)
    pred = model.predict(X)
    cv_r2 = r2_score(y, cv_pred)
    best_component_cv_r2 = max([
        r2_score(
            y,
            first(model.estimator_.cross_validating_estimators_.values()).
            cv_predictions_) for i in range(2)
    ])
    assert cv_r2 >= .9 * best_component_cv_r2

    code = sklearn2code(model, ['predict'], numpy_flat)
    module = exec_module('module', code)
    test_pred = module.predict(**X)
    try:
        assert_array_almost_equal(np.ravel(pred), np.ravel(test_pred))
    except:
        idx = np.abs(np.ravel(pred) - np.ravel(test_pred)) > .000001
        print(np.ravel(pred)[idx])
        print(np.ravel(test_pred)[idx])
        raise
    print(r2_score(y, pred))
    print(r2_score(y, cv_pred))

    print(
        max([
            r2_score(
                y,
                first(model.estimator_.cross_validating_estimators_.values()).
                cv_predictions_) for i in range(2)
        ]))
Ejemplo n.º 32
0
def test_linear_regression_sample_weights():
    # TODO: loop over sparse data as well

    rng = np.random.RandomState(0)

    # It would not work with under-determined systems
    for n_samples, n_features in ((6, 5), ):

        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        for intercept in (True, False):

            # LinearRegression with explicit sample_weight
            reg = LinearRegression(fit_intercept=intercept)
            reg.fit(X, y, sample_weight=sample_weight)
            coefs1 = reg.coef_
            inter1 = reg.intercept_

            assert_equal(reg.coef_.shape, (X.shape[1], ))  # sanity checks
            assert_greater(reg.score(X, y), 0.5)

            # Closed form of the weighted least square
            # theta = (X^T W X)^(-1) * X^T W y
            W = np.diag(sample_weight)
            if intercept is False:
                X_aug = X
            else:
                dummy_column = np.ones(shape=(n_samples, 1))
                X_aug = np.concatenate((dummy_column, X), axis=1)

            coefs2 = linalg.solve(
                X_aug.T.dot(W).dot(X_aug),
                X_aug.T.dot(W).dot(y))

            if intercept is False:
                assert_array_almost_equal(coefs1, coefs2)
            else:
                assert_array_almost_equal(coefs1, coefs2[1:])
                assert_almost_equal(inter1, coefs2[0])
Ejemplo n.º 33
0
 def __init__(self,
              fit_intercept=True,
              normalize=False,
              copy_X=True,
              n_jobs=None):
     self._hyperparams = {
         'fit_intercept': fit_intercept,
         'normalize': normalize,
         'copy_X': copy_X,
         'n_jobs': n_jobs
     }
     self._wrapped_model = Op(**self._hyperparams)
Ejemplo n.º 34
0
 def test_score(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a model locally, fit it, store in Omega
     lr = LinearRegression()
     lr.fit(X, Y)
     scores = lr.score(X, Y)
     om.models.put(lr, 'mymodel')
Ejemplo n.º 35
0
def test_linear_regression_sample_weights():
    # TODO: loop over sparse data as well

    rng = np.random.RandomState(0)

    # It would not work with under-determined systems
    for n_samples, n_features in ((6, 5), ):

        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        for intercept in (True, False):

            # LinearRegression with explicit sample_weight
            reg = LinearRegression(fit_intercept=intercept)
            reg.fit(X, y, sample_weight=sample_weight)
            coefs1 = reg.coef_
            inter1 = reg.intercept_

            assert_equal(reg.coef_.shape, (X.shape[1], ))  # sanity checks
            assert_greater(reg.score(X, y), 0.5)

            # Closed form of the weighted least square
            # theta = (X^T W X)^(-1) * X^T W y
            W = np.diag(sample_weight)
            if intercept is False:
                X_aug = X
            else:
                dummy_column = np.ones(shape=(n_samples, 1))
                X_aug = np.concatenate((dummy_column, X), axis=1)

            coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug),
                                  X_aug.T.dot(W).dot(y))

            if intercept is False:
                assert_array_almost_equal(coefs1, coefs2)
            else:
                assert_array_almost_equal(coefs1, coefs2[1:])
                assert_almost_equal(inter1, coefs2[0])
Ejemplo n.º 36
0
def test_linear_regression_multiple_outcome(random_state=0):
    # Test multiple-outcome linear regressions
    X, y = make_regression(random_state=random_state)

    Y = np.vstack((y, y)).T
    n_features = X.shape[1]

    reg = LinearRegression()
    reg.fit((X), Y)
    assert reg.coef_.shape == (2, n_features)
    Y_pred = reg.predict(X)
    reg.fit(X, y)
    y_pred = reg.predict(X)
    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
Ejemplo n.º 37
0
def test_linear_regression_multiple_outcome(random_state=0):
    # Test multiple-outcome linear regressions
    X, y = make_regression(random_state=random_state)

    Y = np.vstack((y, y)).T
    n_features = X.shape[1]

    clf = LinearRegression(fit_intercept=True)
    clf.fit((X), Y)
    assert_equal(clf.coef_.shape, (2, n_features))
    Y_pred = clf.predict(X)
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
Ejemplo n.º 38
0
def test_linear_regression():
    # Test LinearRegression on a simple dataset.
    # a simple dataset
    X = [[1], [2]]
    Y = [1, 2]

    clf = LinearRegression()
    clf.fit(X, Y)

    assert_array_almost_equal(clf.coef_, [1])
    assert_array_almost_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.predict(X), [1, 2])

    # test it also for degenerate input
    X = [[1]]
    Y = [0]

    clf = LinearRegression()
    clf.fit(X, Y)
    assert_array_almost_equal(clf.coef_, [0])
    assert_array_almost_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.predict(X), [0])
class StackedRegression(LinearModel, RegressorMixin):
    def __init__(self, weights=None, cv_train_size=None):
        estimators = []
        estimators.append(KNeighborsRegressor(n_neighbors=3))
        estimators.append(DecisionTreeRegressor())
        estimators.append(BayesianRidge())
        # estimators.append(BayesianRidge())
        self.estimators = estimators
        self.stacker = LinearRegression()
        self.weights = weights if weights is not None else {}
        self.cv_train_size = cv_train_size if cv_train_size is not None else 0.7
        self._is_fitted = False

    def fit_stack(self, X, y):
        print('fitting')
        print(X.shape)
        n_train = int(X.shape[0] * self.cv_train_size)
        for estimator in self.estimators:
            estimator.fit(X[:n_train, :], y[:n_train])
        predictions = np.concatenate([np.matrix(estimator.predict(X[n_train:, :])).transpose()
                                      for estimator in self.estimators], axis=1)
        self.stacker.fit(predictions, y[n_train:])
        self._is_fitted = True
        print('fitted')
        print(self.stacker.residues_)

    def fit(self, X, y):
        if not self._is_fitted:
            raise NotFittedError('StackedRegression must call fit_stack before fit.')
        for estimator in self.estimators:
            estimator.fit(X, y)

    def predict(self, X):
        predictions = np.concatenate([np.matrix(estimator.predict(X)).transpose()
                                      for estimator in self.estimators], axis=1)
        return self.stacker.predict(predictions)
Ejemplo n.º 40
0
def test_linear_regression_multiple_outcome(random_state=0):
    "Test multiple-outcome linear regressions"
    X, y = make_regression(random_state=random_state)

    Y = np.vstack((y, y)).T
    n_features = X.shape[1]

    clf = LinearRegression(fit_intercept=True)
    clf.fit((X), Y)
    assert_equal(clf.coef_.shape, (2, n_features))
    Y_pred = clf.predict(X)
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
Ejemplo n.º 41
0
def test_linear_regression_sparse_multiple_outcome(random_state=0):
    "Test multiple-outcome linear regressions with sparse data"
    random_state = check_random_state(random_state)
    X, y = make_sparse_uncorrelated(random_state=random_state)
    X = sparse.coo_matrix(X)
    Y = np.vstack((y, y)).T
    n_features = X.shape[1]

    ols = LinearRegression()
    ols.fit(X, Y)
    assert_equal(ols.coef_.shape, (2, n_features))
    Y_pred = ols.predict(X)
    ols.fit(X, y.ravel())
    y_pred = ols.predict(X)
    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
Ejemplo n.º 42
0
def test_linear_regression():
    # Test LinearRegression on a simple dataset.
    # a simple dataset
    X = [[1], [2]]
    Y = [1, 2]

    clf = LinearRegression()
    clf.fit(X, Y)

    assert_array_almost_equal(clf.coef_, [1])
    assert_array_almost_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.predict(X), [1, 2])

    # test it also for degenerate input
    X = [[1]]
    Y = [0]

    clf = LinearRegression()
    clf.fit(X, Y)
    assert_array_almost_equal(clf.coef_, [0])
    assert_array_almost_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.predict(X), [0])
Ejemplo n.º 43
0
def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
    # Test that linear regression agrees between sparse and dense
    rng = check_random_state(0)
    n_samples = 200
    n_features = 2
    X = rng.randn(n_samples, n_features)
    X[X < 0.1] = 0.
    Xcsr = sparse.csr_matrix(X)
    y = rng.rand(n_samples)
    params = dict(normalize=normalize, fit_intercept=fit_intercept)
    clf_dense = LinearRegression(**params)
    clf_sparse = LinearRegression(**params)
    clf_dense.fit(X, y)
    clf_sparse.fit(Xcsr, y)
    assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
    assert_allclose(clf_dense.coef_, clf_sparse.coef_)
Ejemplo n.º 44
0
def test_linear_regression_sample_weights():
    rng = np.random.RandomState(0)

    for n_samples, n_features in ((6, 5), (5, 10)):
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        clf = LinearRegression()
        clf.fit(X, y, sample_weight)
        coefs1 = clf.coef_

        assert_equal(clf.coef_.shape, (X.shape[1], ))
        assert_greater(clf.score(X, y), 0.9)
        assert_array_almost_equal(clf.predict(X), y)

        # Sample weight can be implemented via a simple rescaling
        # for the square loss.
        scaled_y = y * np.sqrt(sample_weight)
        scaled_X = X * np.sqrt(sample_weight)[:, np.newaxis]
        clf.fit(X, y)
        coefs2 = clf.coef_

        assert_array_almost_equal(coefs1, coefs2)
    def fit(self, X1, y1, X2, y2):
        """Fit estimator using RANSAC algorithm.

        Namely, the fit is done into two main steps:
        - pre-fitting: quickly select n_prefits configurations which seems
        suitable given topological constraints.
        - finding best fit: select the pre-fit with the maximum number of inliers
        as the best fit.

        Inputs:
          X1, y1: Left lane points (supposedly)
          X2, y2: Right lane points (supposedly)
        """
        check_consistent_length(X1, y1)
        check_consistent_length(X2, y2)

        # Assume linear model by default
        min_samples = X1.shape[1] + 1
        if min_samples > X1.shape[0] or min_samples > X2.shape[0]:
            raise ValueError("`min_samples` may not be larger than number "
                             "of samples ``X1-2.shape[0]``.")

        # Check additional parameters...
        if self.stop_probability < 0 or self.stop_probability > 1:
            raise ValueError("`stop_probability` must be in range [0, 1].")
        if self.residual_threshold is None:
            residual_threshold = np.median(np.abs(y - np.median(y)))
        else:
            residual_threshold = self.residual_threshold
        # random_state = check_random_state(self.random_state)

        # === Pre-fit with small subsets (4 points) === #
        # Allows to quickly pre-select some good configurations.
        w1_prefits, w2_prefits = lanes_ransac_prefit(X1, y1, X2, y2,
                                                     self.n_prefits,
                                                     self.max_trials,
                                                     self.is_valid_diffs,
                                                     self.is_valid_bounds)

        # === Select best pre-fit, using the full dataset === #
        post_fit = 0
        (w1,
         w2,
         inlier_mask1,
         inlier_mask2) = lanes_ransac_select_best(X1, y1, X2, y2,
                                                  w1_prefits, w2_prefits,
                                                  residual_threshold,
                                                  post_fit)
        self.w1_ = w1
        self.w2_ = w2

        # Set regression parameters.
        base_estimator1 = LinearRegression(fit_intercept=False)
        base_estimator1.coef_ = w1
        base_estimator1.intercept_ = 0.0
        base_estimator2 = LinearRegression(fit_intercept=False)
        base_estimator2.coef_ = w2
        base_estimator2.intercept_ = 0.0

        # Save final model parameters.
        self.estimator1_ = base_estimator1
        self.estimator2_ = base_estimator2

        self.inlier_mask1_ = inlier_mask1
        self.inlier_mask2_ = inlier_mask2

        # # Estimate final model using all inliers
        # # base_estimator1.fit(X1_inlier_best, y1_inlier_best)
        # # base_estimator2.fit(X2_inlier_best, y2_inlier_best)

        return self

### ages and net_worths need to be reshaped into 2D numpy arrays
### second argument of reshape command is a tuple of integers: (n_rows, n_columns)
### by convention, n_rows is the number of data points
### and n_columns is the number of features
ages       = numpy.reshape( numpy.array(ages), (len(ages), 1))
net_worths = numpy.reshape( numpy.array(net_worths), (len(net_worths), 1))
from sklearn.cross_validation import train_test_split
ages_train, ages_test, net_worths_train, net_worths_test = train_test_split(ages, net_worths, test_size=0.1, random_state=42)

### fill in a regression here!  Name the regression object reg so that
### the plotting code below works, and you can see what your regression looks like
from sklearn.linear_model.base import LinearRegression

reg = LinearRegression()
reg.fit(ages_train, net_worths_train)
print("Slope %s" % reg.coef_)
print("Intercept %s" % reg.intercept_)

print("Score = ", reg.score(ages_test, net_worths_test))






try:
    plt.plot(ages, reg.predict(ages), color="blue")
except NameError:
    pass
Ejemplo n.º 47
0
### draw the scatterplot, with color-coded training and testing points
import matplotlib.pyplot as plt
for feature, target in zip(feature_test, target_test):
    plt.scatter( feature, target, color=test_color ) 
for feature, target in zip(feature_train, target_train):
    plt.scatter( feature, target, color=train_color ) 

### labels for the legend
plt.scatter(feature_test[0], target_test[0], color=test_color, label="test")
plt.scatter(feature_test[0], target_test[0], color=train_color, label="train")


from sklearn.linear_model.base import LinearRegression

reg = LinearRegression()
reg.fit(feature_train, target_train)
print("Slope %s" % reg.coef_)
print("Intercept %s" % reg.intercept_)

print("Score = ", reg.score(feature_test, target_test))
### draw the regression line, once it's coded
try:
    plt.plot( feature_test, reg.predict(feature_test) )
except NameError:
    pass
reg.fit(feature_test, target_test)
plt.plot(feature_train, reg.predict(feature_train), color="b")
plt.xlabel(features_list[1])
plt.ylabel(features_list[0])
plt.legend()
    def fit(self, X1, y1, X2, y2, left_right_bounds=None):
        """Fit estimator using RANSAC algorithm.

        Namely, the fit is done into two main steps:
        - pre-fitting: quickly select n_prefits configurations which seems
        suitable given topological constraints.
        - finding best fit: select the pre-fit with the maximum number of inliers
        as the best fit.

        Inputs:
          X1, y1: Left lane points (supposedly)
          X2, y2: Right lane points (supposedly)
        """
        check_consistent_length(X1, y1)
        check_consistent_length(X2, y2)

        # Assume linear model by default
        min_samples = X1.shape[1] + 1
        if min_samples > X1.shape[0] or min_samples > X2.shape[0]:
            raise ValueError("`min_samples` may not be larger than number "
                             "of samples ``X1-2.shape[0]``.")

        # Check additional parameters...
        if self.stop_probability < 0 or self.stop_probability > 1:
            raise ValueError("`stop_probability` must be in range [0, 1].")
        if self.residual_threshold is None:
            residual_threshold = np.median(np.abs(y - np.median(y)))
        else:
            residual_threshold = self.residual_threshold
        delta_left_right = (left_right_bounds[0, 0, 1] + left_right_bounds[0, 0, 0]) / 2.
        # random_state = check_random_state(self.random_state)

        # Set up lambdas for computing score.
        score_lambdas = np.copy(self.score_lambdas)
        score_lambdas[0] = score_lambdas[0] / (y1.size + y2.size)

        # Collections...
        self.w_fits = []
        self.w_fits_l2 = []
        self.inliers_masks = []
        self.n_inliers = []
        self.score_fits = []

        # === Left lane, and then, right lane === #
        w_left_prefits = lanes_ransac_prefit(X1, y1,
                                             self.n_prefits,
                                             self.max_trials,
                                             self.w_refs_left,
                                             self.is_valid_bounds_left)
        (w_left1, in_mask_left1, score_left1) = \
            lanes_ransac_select_best(X1, y1,
                                     w_left_prefits, residual_threshold,
                                     self.w_refs_left, score_lambdas)
        n_inliers_left1 = np.sum(in_mask_left1)

        w_refs = np.vstack((self.w_refs_right, np.reshape(w_left1, (1, 3))))
        is_valid_bounds = np.vstack((self.is_valid_bounds_right, left_right_bounds))
        w_right_prefits = lanes_ransac_prefit(X2, y2,
                                              self.n_prefits,
                                              self.max_trials,
                                              w_refs,
                                              is_valid_bounds)
        w0 = lane_translate(w_left1, delta_left_right)
        w_right_prefits = np.vstack((w0, w_right_prefits))

        (w_right1, in_mask_right1, score_right1) = \
            lanes_ransac_select_best(X2, y2,
                                     w_right_prefits, residual_threshold,
                                     self.w_refs_right, score_lambdas)
        n_inliers_right1 = np.sum(in_mask_right1)
        n_inliers1 = n_inliers_right1 + n_inliers_left1

        self.w_fits.append((w_left1, w_right1))
        self.n_inliers.append(n_inliers1)
        self.inliers_masks.append((in_mask_left1, in_mask_right1))
        self.score_fits.append((score_left1, score_right1))

        # === Right lane and then left lane === #
        w_right_prefits = lanes_ransac_prefit(X2, y2,
                                              self.n_prefits,
                                              self.max_trials,
                                              self.w_refs_right,
                                              self.is_valid_bounds_right)
        (w_right2, in_mask_right2, score_right2) = \
            lanes_ransac_select_best(X2, y2,
                                     w_right_prefits, residual_threshold,
                                     self.w_refs_right, score_lambdas)
        n_inliers_right2 = np.sum(in_mask_right2)
        w_refs = np.vstack((self.w_refs_left, np.reshape(w_right2, (1, 3))))
        is_valid_bounds = np.vstack((self.is_valid_bounds_left, left_right_bounds))
        w_left_prefits = lanes_ransac_prefit(X1, y1,
                                             self.n_prefits,
                                             self.max_trials,
                                             w_refs,
                                             is_valid_bounds)
        w0 = lane_translate(w_right2, -delta_left_right)
        w_left_prefits = np.vstack((w0, w_left_prefits))

        (w_left2, in_mask_left2, score_left2) = \
            lanes_ransac_select_best(X1, y1,
                                     w_left_prefits, residual_threshold,
                                     self.w_refs_left, score_lambdas)
        n_inliers_left2 = np.sum(in_mask_left2)
        n_inliers2 = n_inliers_right2 + n_inliers_left2

        self.w_fits.append((w_left2, w_right2))
        self.n_inliers.append(n_inliers2)
        self.inliers_masks.append((in_mask_left2, in_mask_right2))
        self.score_fits.append((score_left2, score_right2))

        # === Previous frame??? === #
        if self.w_refs_left.size > 0 and self.w_refs_right.size > 0:
            in_mask_left3 = lanes_inliers(X1, y1, self.w_refs_left[0], residual_threshold)
            in_mask_right3 = lanes_inliers(X2, y2, self.w_refs_right[0], residual_threshold)
            n_inliers3 = np.sum(in_mask_left3) + np.sum(in_mask_right3)
            score_left3 = lane_score(np.sum(in_mask_left3),
                                     self.w_refs_left[0],
                                     self.w_refs_left,
                                     score_lambdas)
            score_right3 = lane_score(np.sum(in_mask_right3),
                                      self.w_refs_right[0],
                                      self.w_refs_right,
                                      score_lambdas)

            self.w_fits.append((self.w_refs_left[0], self.w_refs_right[0]))
            self.n_inliers.append(n_inliers3)
            self.inliers_masks.append((in_mask_left3, in_mask_right3))
            self.score_fits.append((score_left3, score_right3))

        # L2 regression regularisation of fits.
        self.w_fits_l2 = copy.deepcopy(self.w_fits)
        if self.l2_scales is not None:
            for i in range(len(self.w_fits)):
                w1, w2 = self.w_fits[i]
                # Some regression: ignored when inversed matrix error.
                try:
                    w_left = m_regression_exp(X1, y1, w1, self.l2_scales)
                except Exception:
                    w_left = w1
                try:
                    w_right = m_regression_exp(X2, y2, w2, self.l2_scales)
                except Exception:
                    w_right = w2

                in_mask_left = lanes_inliers(X1, y1, w_left, residual_threshold)
                in_mask_right = lanes_inliers(X2, y2, w_right, residual_threshold)
                n_inliers = np.sum(in_mask_left) + np.sum(in_mask_right)
                score_left = lane_score(np.sum(in_mask_left),
                                        w_left,
                                        self.w_refs_left,
                                        score_lambdas)
                score_right = lane_score(np.sum(in_mask_right),
                                         w_right,
                                         self.w_refs_right,
                                         score_lambdas)

                self.w_fits_l2[i] = (w_left, w_right)
                self.n_inliers[i] = n_inliers
                self.inliers_masks[i] = (in_mask_left, in_mask_right)
                self.score_fits[i] = (score_left, score_right)

        # Best fit?
        scores = [s1+s2 for (s1, s2) in self.score_fits]
        idx = np.argmax(scores)
        w_left, w_right = self.w_fits_l2[idx]
        in_mask_left, in_mask_right = self.inliers_masks[idx]

        # Smoothing.
        smoothing = self.smoothing
        if self.w_refs_left.size > 0 and self.w_refs_right.size > 0:
            w_left = smoothing * w_left + (1. - smoothing) * self.w_refs_left[0]
            w_right = smoothing * w_right + (1. - smoothing) * self.w_refs_right[0]

        self.w1_ = w_left
        self.w2_ = w_right

        # Set regression parameters.
        base_estimator1 = LinearRegression(fit_intercept=False)
        base_estimator1.coef_ = w_left
        base_estimator1.intercept_ = 0.0
        base_estimator2 = LinearRegression(fit_intercept=False)
        base_estimator2.coef_ = w_right
        base_estimator2.intercept_ = 0.0

        # Save final model parameters.
        self.estimator1_ = base_estimator1
        self.estimator2_ = base_estimator2

        self.inlier_mask1_ = in_mask_left
        self.inlier_mask2_ = in_mask_right

        # # Estimate final model using all inliers
        # # base_estimator1.fit(X1_inlier_best, y1_inlier_best)
        # # base_estimator2.fit(X2_inlier_best, y2_inlier_best)

        return self
Ejemplo n.º 49
0
    inp_prices = list()
    features = list()
    def get_inp_features(self): 
        return self.inp_features
    def get_inp_prices(self): 
        return self.inp_prices
    def get_features(self): 
        return self.features
    
    def read(self):
        F, N = map(int, raw_input().split(' '))              
        for _ in range(N):
            inp_f = map(float, raw_input().strip().split())
            self.inp_features.append(inp_f[:F:])
            self.inp_prices.append(inp_f[F::])
        questions = int(raw_input())        
        for _ in range(questions):
            self.features.append(map(float, raw_input().split()))
        
reader = inp_reader()
reader.read()
inp_features = reader.get_inp_features()
inp_prices = reader.get_inp_prices()
features = reader.get_features()
 
model = LinearRegression()

model.fit(inp_features, inp_prices)
prices=model.predict(features)
for el in prices:
    print (el[0])