class PredictLoss(BaseLR):
    def __init__(self, hist=30, posmax=15, lr=0.2):
        from sklearn.linear_model.base import LinearRegression
        from collections import deque
        self.hist = hist
        self.track = deque(maxlen=self.hist)
        self.regr = LinearRegression()
        self.poscases = 0
        self.posmax = posmax
        self.lr = lr

    def __call__(self, env):
        if len(self.track) > 5:
            y = np.array(self.track)
            x = np.array(range(len(y.shape))).reshape(-1, 1)
            self.regr.fit(x, y)
            coef_ = self.regr.coef_[0]
            preds = self.regr.predict(x)
            fst = preds[0]
            lst = preds[-1]
            e = np.sqrt(((y - preds)**2).mean())
            if coef_ > 0:
                self.poscases += 1
                if self.poscases >= self.posmax:
                    raise EarlyStopException
            else:
                self.poscases -= 1
                if self.poscases < 0:
                    self.poscases = 0
            diff = np.abs(fst - lst)
            coef = np.clip(diff/e, 1e-6, 1)
            lr = self.lr*coef
            print(lr, e, diff, coef_, coef, file=open('log.txt', 'a'))
            env.model.set_param("learning_rate", lr)
 def __add_trend_feature(self, arr, abs_values=False):
     idx = np.array(range(len(arr)))
     if abs_values:
         arr = np.abs(arr)
     lr = LinearRegression()
     lr.fit(idx.reshape(-1, 1), arr)
     return lr.coef_[0]
Exemple #3
0
 def test_predict_hdf_dataframe(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df['x']
     Y = df['y']
     # put into Omega -- assume a client with pandas, scikit learn
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.pure_python = True
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax', as_hdf=True)
     om.datasets.put(Y, 'datay', as_hdf=True)
     # have Omega fit the model then predict
     lr = LinearRegression()
     lr.fit(reshaped(X), reshaped(Y))
     pred = lr.predict(reshaped(X))
     om.models.put(lr, 'mymodel2')
     # -- using data provided locally
     #    note this is the same as
     #        om.datasets.put(X, 'foo')
     #        om.runtimes.model('mymodel2').predict('foo')
     result = om.runtime.model('mymodel2').predict('datax')
     pred2 = result.get()
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(1)")
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(2)")
    def get_scikit_prediction(x=np.array([1, 2, 3]), y=np.array([1, 2, 3])):

        from sklearn.linear_model.base import LinearRegression as ScikitLinearRegression

        regression = ScikitLinearRegression()
        regression.fit(x, y)

        return regression.predict(x)
Exemple #5
0
def train():
    X = np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]])
    y = np.array([10, 20, 30])
    X_test = np.array([[10, 20, 30, 40], [40, 50, 60, 70], [70, 80, 90, 100]])
    reg = LinearRegression()
    reg.fit(X, y)
    print('coef_:', reg.coef_)
    print('intercept_:', reg.intercept_)
    print('predict:', reg.predict(X_test))
Exemple #6
0
 def test_fit(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a model locally, store (unfitted) in Omega
     lr = LinearRegression()
     om.models.put(lr, 'mymodel2')
     self.assertIn('mymodel2', om.models.list('*'))
     # predict locally for comparison
     lr.fit(X, Y)
     pred = lr.predict(X)
     # try predicting without fitting
     with self.assertRaises(NotFittedError):
         result = om.runtime.model('mymodel2').predict('datax')
         result.get()
     # have Omega fit the model then predict
     result = om.runtime.model('mymodel2').fit('datax', 'datay')
     result.get()
     # check the new model version metadata includes the datax/y references
     meta = om.models.metadata('mymodel2')
     self.assertIn('metaX', meta.attributes)
     self.assertIn('metaY', meta.attributes)
     # -- using data already in Omega
     result = om.runtime.model('mymodel2').predict('datax')
     pred1 = result.get()
     # -- using data provided locally
     #    note this is the same as
     #        om.datasets.put(X, 'foo')
     #        om.runtimes.model('mymodel2').predict('foo')
     result = om.runtime.model('mymodel2').fit(X, Y)
     result = om.runtime.model('mymodel2').predict(X)
     pred2 = result.get()
     # -- check the local data provided to fit was stored as intended
     meta = om.models.metadata('mymodel2')
     self.assertIn('metaX', meta.attributes)
     self.assertIn('metaY', meta.attributes)
     self.assertIn('_fitX', meta.attributes.get('metaX').get('collection'))
     self.assertIn('_fitY', meta.attributes.get('metaY').get('collection'))
     self.assertTrue(
         (pred == pred1).all(), "runtimes prediction is different(1)")
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(2)")
Exemple #7
0
def test_linear_regression_sparse(random_state=0):
    "Test that linear regression also works with sparse data"
    random_state = check_random_state(random_state)
    n = 100
    X = sparse.eye(n, n)
    beta = random_state.rand(n)
    y = X * beta[:, np.newaxis]

    ols = LinearRegression()
    ols.fit(X, y.ravel())
    assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)
    assert_array_almost_equal(ols.residues_, 0)
Exemple #8
0
def test_linear_regression_sparse(random_state=0):
    "Test that linear regression also works with sparse data"
    random_state = check_random_state(random_state)
    n = 100
    X = sparse.eye(n, n)
    beta = random_state.rand(n)
    y = X * beta[:, np.newaxis]

    ols = LinearRegression()
    ols.fit(X, y.ravel())
    assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)
    assert_array_almost_equal(ols.residues_, 0)
def test_linear_regression_multiple_outcome(random_state=0):
    "Test multiple-outcome linear regressions"
    X, y = make_regression(random_state=random_state)

    Y = np.vstack((y, y)).T
    n_features = X.shape[1]

    clf = LinearRegression(fit_intercept=True)
    clf.fit((X), Y)
    assert_equal(clf.coef_.shape, (2, n_features))
    Y_pred = clf.predict(X)
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
def test_linear_regression_multiple_outcome(random_state=0):
    # Test multiple-outcome linear regressions
    X, y = make_regression(random_state=random_state)

    Y = np.vstack((y, y)).T
    n_features = X.shape[1]

    clf = LinearRegression(fit_intercept=True)
    clf.fit((X), Y)
    assert_equal(clf.coef_.shape, (2, n_features))
    Y_pred = clf.predict(X)
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
Exemple #11
0
def test_linear_regression_multiple_outcome(random_state=0):
    # Test multiple-outcome linear regressions
    X, y = make_regression(random_state=random_state)

    Y = np.vstack((y, y)).T
    n_features = X.shape[1]

    reg = LinearRegression()
    reg.fit((X), Y)
    assert reg.coef_.shape == (2, n_features)
    Y_pred = reg.predict(X)
    reg.fit(X, y)
    y_pred = reg.predict(X)
    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
def test_linear_regression_sparse_multiple_outcome(random_state=0):
    "Test multiple-outcome linear regressions with sparse data"
    random_state = check_random_state(random_state)
    X, y = make_sparse_uncorrelated(random_state=random_state)
    X = sparse.coo_matrix(X)
    Y = np.vstack((y, y)).T
    n_features = X.shape[1]

    ols = LinearRegression()
    ols.fit(X, Y)
    assert_equal(ols.coef_.shape, (2, n_features))
    Y_pred = ols.predict(X)
    ols.fit(X, y.ravel())
    y_pred = ols.predict(X)
    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
def test_linear_regression_sparse_multiple_outcome(random_state=0):
    # Test multiple-outcome linear regressions with sparse data
    random_state = check_random_state(random_state)
    X, y = make_sparse_uncorrelated(random_state=random_state)
    X = sparse.coo_matrix(X)
    Y = np.vstack((y, y)).T
    n_features = X.shape[1]

    ols = LinearRegression()
    ols.fit(X, Y)
    assert_equal(ols.coef_.shape, (2, n_features))
    Y_pred = ols.predict(X)
    ols.fit(X, y.ravel())
    y_pred = ols.predict(X)
    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
Exemple #14
0
def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
    # Test that linear regression agrees between sparse and dense
    rng = check_random_state(0)
    n_samples = 200
    n_features = 2
    X = rng.randn(n_samples, n_features)
    X[X < 0.1] = 0.
    Xcsr = sparse.csr_matrix(X)
    y = rng.rand(n_samples)
    params = dict(normalize=normalize, fit_intercept=fit_intercept)
    clf_dense = LinearRegression(**params)
    clf_sparse = LinearRegression(**params)
    clf_dense.fit(X, y)
    clf_sparse.fit(Xcsr, y)
    assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
    assert_allclose(clf_dense.coef_, clf_sparse.coef_)
Exemple #15
0
def MethodSelect(Xt, XT, Yt, YT):
    reg = LinearRegression()
    reg.fit(Xt, Yt)
    predict = reg.predict(XT)
    err = dp.rmsErr(predict, YT)
    if err > 100:
        a = XT[0]
        b = Xt[0]
        c = [a]
        pre = reg.predict(c)
        print(pre[0])

        for i in range(0, len(a)):
            print(a[i], b[i])
        print('\n\n\n')
    return err
    def linearRegression_sales(self):  #线性回归
        path = u'4.Advertising.csv'
        data = self.readFile(path)
        #         x=data[['TV', 'Radio', 'Newspaper']]
        x = data[['TV', 'Radio']]
        y = data['Sales']
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            random_state=1)
        # print x_train, y_train
        linreg = LinearRegression()
        model = linreg.fit(x_train, y_train)
        print model
        print linreg.coef_
        print linreg.intercept_
        y_hat = linreg.predict(np.array(x_test))
        mse = np.average((y_hat - y_test)**2)
        rmse = np.sqrt(mse)
        print mse, rmse

        t = np.arange(len(x_test))
        plt.plot(t, y_test, 'r-', linewidth=2, label='Test')
        plt.plot(t, y_hat, 'g-', linewidth=2, label='Predict')
        plt.grid()
        plt.legend(loc='upper right')
        plt.show()
Exemple #17
0
def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
    # Test that linear regression agrees between sparse and dense
    rng = check_random_state(0)
    n_samples = 200
    n_features = 2
    X = rng.randn(n_samples, n_features)
    X[X < 0.1] = 0.
    Xcsr = sparse.csr_matrix(X)
    y = rng.rand(n_samples)
    params = dict(normalize=normalize, fit_intercept=fit_intercept)
    clf_dense = LinearRegression(**params)
    clf_sparse = LinearRegression(**params)
    clf_dense.fit(X, y)
    clf_sparse.fit(Xcsr, y)
    assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
    assert_allclose(clf_dense.coef_, clf_sparse.coef_)
Exemple #18
0
def test_ridge_vs_lstsq():
    """On alpha=0., Ridge and OLS yield the same solution."""

    # we need more samples than features
    n_samples, n_features = 5, 4
    y = rng.randn(n_samples)
    X = rng.randn(n_samples, n_features)

    ridge = Ridge(alpha=0., fit_intercept=False)
    ols = LinearRegression(fit_intercept=False)

    ridge.fit(X, y)
    ols.fit(X, y)
    assert_almost_equal(ridge.coef_, ols.coef_)

    ridge.fit(X, y)
    ols.fit(X, y)
    assert_almost_equal(ridge.coef_, ols.coef_)
Exemple #19
0
def test_ridge_vs_lstsq():
    """On alpha=0., Ridge and OLS yield the same solution."""

    # we need more samples than features
    n_samples, n_features = 5, 4
    y = rng.randn(n_samples)
    X = rng.randn(n_samples, n_features)

    ridge = Ridge(alpha=0., fit_intercept=False)
    ols = LinearRegression(fit_intercept=False)

    ridge.fit(X, y)
    ols.fit(X, y)
    assert_almost_equal(ridge.coef_, ols.coef_)

    ridge.fit(X, y)
    ols.fit(X, y)
    assert_almost_equal(ridge.coef_, ols.coef_)
class LinearRegressionImpl():

    def __init__(self, fit_intercept=True, normalize=False, copy_X=True, n_jobs=None):
        self._hyperparams = {
            'fit_intercept': fit_intercept,
            'normalize': normalize,
            'copy_X': copy_X,
            'n_jobs': n_jobs}

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)
def test_linear_regression_n_jobs():
    """
    Test for the n_jobs parameter on the fit method and the constructor
    """
    X = [[1], [2]]
    Y = [1, 2]
    clf = LinearRegression()
    clf_fit = clf.fit(X, Y, 4)
    assert_equal(clf_fit.n_jobs, clf.n_jobs)
    assert_equal(clf.n_jobs, 1)
Exemple #22
0
def test_raises_value_error_if_sample_weights_greater_than_1d():
    # Sample weights must be either scalar or 1D

    n_sampless = [2, 3]
    n_featuress = [3, 2]

    for n_samples, n_features in zip(n_sampless, n_featuress):
        X = rng.randn(n_samples, n_features)
        y = rng.randn(n_samples)
        sample_weights_OK = rng.randn(n_samples)**2 + 1
        sample_weights_OK_1 = 1.
        sample_weights_OK_2 = 2.

        reg = LinearRegression()

        # make sure the "OK" sample weights actually work
        reg.fit(X, y, sample_weights_OK)
        reg.fit(X, y, sample_weights_OK_1)
        reg.fit(X, y, sample_weights_OK_2)
Exemple #23
0
def test_linear_regression_n_jobs():
    """
    Test for the n_jobs parameter on the fit method and the constructor
    """
    X = [[1], [2]]
    Y = [1, 2]
    clf = LinearRegression()
    clf_fit = clf.fit(X, Y, 4)
    assert_equal(clf_fit.n_jobs, clf.n_jobs)
    assert_equal(clf.n_jobs, 1)
Exemple #24
0
def test_raises_value_error_if_sample_weights_greater_than_1d():
    # Sample weights must be either scalar or 1D

    n_sampless = [2, 3]
    n_featuress = [3, 2]

    for n_samples, n_features in zip(n_sampless, n_featuress):
        X = rng.randn(n_samples, n_features)
        y = rng.randn(n_samples)
        sample_weights_OK = rng.randn(n_samples) ** 2 + 1
        sample_weights_OK_1 = 1.
        sample_weights_OK_2 = 2.

        reg = LinearRegression()

        # make sure the "OK" sample weights actually work
        reg.fit(X, y, sample_weights_OK)
        reg.fit(X, y, sample_weights_OK_1)
        reg.fit(X, y, sample_weights_OK_2)
Exemple #25
0
def test_linear_regression_sample_weights():
    # TODO: loop over sparse data as well

    rng = np.random.RandomState(0)

    # It would not work with under-determined systems
    for n_samples, n_features in ((6, 5), ):

        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        for intercept in (True, False):

            # LinearRegression with explicit sample_weight
            reg = LinearRegression(fit_intercept=intercept)
            reg.fit(X, y, sample_weight=sample_weight)
            coefs1 = reg.coef_
            inter1 = reg.intercept_

            assert_equal(reg.coef_.shape, (X.shape[1], ))  # sanity checks
            assert_greater(reg.score(X, y), 0.5)

            # Closed form of the weighted least square
            # theta = (X^T W X)^(-1) * X^T W y
            W = np.diag(sample_weight)
            if intercept is False:
                X_aug = X
            else:
                dummy_column = np.ones(shape=(n_samples, 1))
                X_aug = np.concatenate((dummy_column, X), axis=1)

            coefs2 = linalg.solve(
                X_aug.T.dot(W).dot(X_aug),
                X_aug.T.dot(W).dot(y))

            if intercept is False:
                assert_array_almost_equal(coefs1, coefs2)
            else:
                assert_array_almost_equal(coefs1, coefs2[1:])
                assert_almost_equal(inter1, coefs2[0])
Exemple #26
0
 def test_score(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a model locally, fit it, store in Omega
     lr = LinearRegression()
     lr.fit(X, Y)
     scores = lr.score(X, Y)
     om.models.put(lr, 'mymodel')
def test_linear_regression_sample_weights():
    # TODO: loop over sparse data as well

    rng = np.random.RandomState(0)

    # It would not work with under-determined systems
    for n_samples, n_features in ((6, 5), ):

        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        for intercept in (True, False):

            # LinearRegression with explicit sample_weight
            reg = LinearRegression(fit_intercept=intercept)
            reg.fit(X, y, sample_weight=sample_weight)
            coefs1 = reg.coef_
            inter1 = reg.intercept_

            assert_equal(reg.coef_.shape, (X.shape[1], ))  # sanity checks
            assert_greater(reg.score(X, y), 0.5)

            # Closed form of the weighted least square
            # theta = (X^T W X)^(-1) * X^T W y
            W = np.diag(sample_weight)
            if intercept is False:
                X_aug = X
            else:
                dummy_column = np.ones(shape=(n_samples, 1))
                X_aug = np.concatenate((dummy_column, X), axis=1)

            coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug),
                                  X_aug.T.dot(W).dot(y))

            if intercept is False:
                assert_array_almost_equal(coefs1, coefs2)
            else:
                assert_array_almost_equal(coefs1, coefs2[1:])
                assert_almost_equal(inter1, coefs2[0])
def test_linear_regression():
    # Test LinearRegression on a simple dataset.
    # a simple dataset
    X = [[1], [2]]
    Y = [1, 2]

    clf = LinearRegression()
    clf.fit(X, Y)

    assert_array_almost_equal(clf.coef_, [1])
    assert_array_almost_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.predict(X), [1, 2])

    # test it also for degenerate input
    X = [[1]]
    Y = [0]

    clf = LinearRegression()
    clf.fit(X, Y)
    assert_array_almost_equal(clf.coef_, [0])
    assert_array_almost_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.predict(X), [0])
def test_linear_regression():
    # Test LinearRegression on a simple dataset.
    # a simple dataset
    X = [[1], [2]]
    Y = [1, 2]

    clf = LinearRegression()
    clf.fit(X, Y)

    assert_array_almost_equal(clf.coef_, [1])
    assert_array_almost_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.predict(X), [1, 2])

    # test it also for degenerate input
    X = [[1]]
    Y = [0]

    clf = LinearRegression()
    clf.fit(X, Y)
    assert_array_almost_equal(clf.coef_, [0])
    assert_array_almost_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.predict(X), [0])
    def test_intercept_flag(rows=10, columns=9):
        inout = get_random_array(rows, columns)
        test_overfitting(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        lr_train = linear_training.Batch()
        lr_train.input.set(linear_training.data, ntX)
        lr_train.input.set(linear_training.dependentVariables, ntY)
        result = lr_train.compute()
        model = result.get(linear_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)
        daal_intercept = np_beta[0, 0]

        from sklearn.linear_model.base import LinearRegression as ScikitLinearRegression
        regression = ScikitLinearRegression()
        regression.fit(x, y)

        scikit_intercept = regression.intercept_
        assert_array_almost_equal(scikit_intercept, [daal_intercept])
def test_linear_regression_sample_weights():
    rng = np.random.RandomState(0)

    for n_samples, n_features in ((6, 5), (5, 10)):
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        clf = LinearRegression()
        clf.fit(X, y, sample_weight)
        coefs1 = clf.coef_

        assert_equal(clf.coef_.shape, (X.shape[1], ))
        assert_greater(clf.score(X, y), 0.9)
        assert_array_almost_equal(clf.predict(X), y)

        # Sample weight can be implemented via a simple rescaling
        # for the square loss.
        scaled_y = y * np.sqrt(sample_weight)
        scaled_X = X * np.sqrt(sample_weight)[:, np.newaxis]
        clf.fit(X, y)
        coefs2 = clf.coef_

        assert_array_almost_equal(coefs1, coefs2)
def test_linear_regression_sample_weights():
    rng = np.random.RandomState(0)

    for n_samples, n_features in ((6, 5), (5, 10)):
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        clf = LinearRegression()
        clf.fit(X, y, sample_weight)
        coefs1 = clf.coef_

        assert_equal(clf.coef_.shape, (X.shape[1], ))
        assert_greater(clf.score(X, y), 0.9)
        assert_array_almost_equal(clf.predict(X), y)

        # Sample weight can be implemented via a simple rescaling
        # for the square loss.
        scaled_y = y * np.sqrt(sample_weight)
        scaled_X = X * np.sqrt(sample_weight)[:, np.newaxis]
        clf.fit(X, y)
        coefs2 = clf.coef_

        assert_array_almost_equal(coefs1, coefs2)
Exemple #33
0
 def test_predict(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a model locally, fit it, store in Omega
     lr = LinearRegression()
     lr.fit(X, Y)
     pred = lr.predict(X)
     om.models.put(lr, 'mymodel')
     self.assertIn('mymodel', om.models.list('*'))
     # have Omega predict it
     # -- using data already in Omega
     result = om.runtime.model('mymodel').predict('datax')
     pred1 = result.get()
     # -- using data provided locally
     #    note this is the same as
     #        om.datasets.put(X, 'foo')
     #        om.runtimes.model('mymodel').predict('foo')
     result = om.runtime.model('mymodel').predict(X)
     pred2 = result.get()
     self.assertTrue(
         (pred == pred1).all(), "runtimes prediction is different(1)")
     self.assertTrue(
         (pred == pred2).all(), "runtimes prediction is different(2)")
class StackedRegression(LinearModel, RegressorMixin):
    def __init__(self, weights=None, cv_train_size=None):
        estimators = []
        estimators.append(KNeighborsRegressor(n_neighbors=3))
        estimators.append(DecisionTreeRegressor())
        estimators.append(BayesianRidge())
        # estimators.append(BayesianRidge())
        self.estimators = estimators
        self.stacker = LinearRegression()
        self.weights = weights if weights is not None else {}
        self.cv_train_size = cv_train_size if cv_train_size is not None else 0.7
        self._is_fitted = False

    def fit_stack(self, X, y):
        print('fitting')
        print(X.shape)
        n_train = int(X.shape[0] * self.cv_train_size)
        for estimator in self.estimators:
            estimator.fit(X[:n_train, :], y[:n_train])
        predictions = np.concatenate([np.matrix(estimator.predict(X[n_train:, :])).transpose()
                                      for estimator in self.estimators], axis=1)
        self.stacker.fit(predictions, y[n_train:])
        self._is_fitted = True
        print('fitted')
        print(self.stacker.residues_)

    def fit(self, X, y):
        if not self._is_fitted:
            raise NotFittedError('StackedRegression must call fit_stack before fit.')
        for estimator in self.estimators:
            estimator.fit(X, y)

    def predict(self, X):
        predictions = np.concatenate([np.matrix(estimator.predict(X)).transpose()
                                      for estimator in self.estimators], axis=1)
        return self.stacker.predict(predictions)
Exemple #35
0
def eval_linear(data_set, test_size=0.4):
    # load training data from feature matrix
    x, y = data_set.load_training_data()

    # cross validation evaluation
    model = LinearRegression(normalize=True)
    #model = RFE(model, 10)
    score = cross_val_score(model, x, y, scoring='neg_mean_squared_error')
    print('Mean squared error: {}'.format(-score))

    # to visualize:
    # split data into train and test set
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=test_size,
                                                        shuffle=True,
                                                        random_state=0)

    # train model on train set
    model = LinearRegression(normalize=True)
    model = model.fit(x_train, y_train)
    print(model.coef_)

    pprint(model)

    # plot train performance
    predict_train = model.predict(x_train)
    plt.figure()
    plt.title('train')
    plt.scatter(y_train, predict_train)

    # plot test performance
    predict = model.predict(x_test)
    plt.figure()
    plt.title('test')
    plt.scatter(y_test, predict)
    plt.show()
Exemple #36
0
import pandas as pd
from matplotlib.pyplot import scatter, plot, show
from sklearn.linear_model.base import LinearRegression

bmi_life_data = pd.read_csv('bmi_to_life_expect.csv')
x_data = bmi_life_data[['BMI']]
y_data = bmi_life_data[['Life expectancy']]
# print x_data, y_data
bmi_life_model = LinearRegression()
bmi_life_model.fit(x_data, y_data)

laos_life_exp = bmi_life_model.predict([50.00])
print(laos_life_exp)
scatter(x_data, y_data)
plot(x_data, bmi_life_model.predict(x_data))
show()
### ages and net_worths need to be reshaped into 2D numpy arrays
### second argument of reshape command is a tuple of integers: (n_rows, n_columns)
### by convention, n_rows is the number of data points
### and n_columns is the number of features
ages       = numpy.reshape( numpy.array(ages), (len(ages), 1))
net_worths = numpy.reshape( numpy.array(net_worths), (len(net_worths), 1))
from sklearn.cross_validation import train_test_split
ages_train, ages_test, net_worths_train, net_worths_test = train_test_split(ages, net_worths, test_size=0.1, random_state=42)

### fill in a regression here!  Name the regression object reg so that
### the plotting code below works, and you can see what your regression looks like
from sklearn.linear_model.base import LinearRegression

reg = LinearRegression()
reg.fit(ages_train, net_worths_train)
print("Slope %s" % reg.coef_)
print("Intercept %s" % reg.intercept_)

print("Score = ", reg.score(ages_test, net_worths_test))






try:
    plt.plot(ages, reg.predict(ages), color="blue")
except NameError:
    pass
plt.scatter(ages, net_worths)
Exemple #38
0
'''
Created on Jan 28, 2017

@author: Angel
tip_orderamount_whodel

TODO: how to create a complete regression analysis with dummy variable, 
    not just coefficients.
'''
import pandas as pd
import numpy as np
from sklearn.linear_model.base import LinearRegression

path = r'C:\Users\Angel\OneDrive\Documents\data_training\data\RawDelData.csv'
data = pd.read_csv(path)

data["isAngel"] = ""
data["isAngel"] = np.nan
dat = data.loc[data['OrderAmount']>-100.00][['Tip','OrderAmount','PersonWhoDelivered', 'isAngel']]

dat.loc[dat.PersonWhoDelivered == 'Angel', ['isAngel']] = 1
dat.loc[dat.PersonWhoDelivered == 'Sammie', ['isAngel']] = 0
feature_cols = ['OrderAmount', 'isAngel']
X = dat[feature_cols]
y = dat.Tip
lm = LinearRegression()
lm.fit(X, y)

print "Tips can be modeled by Y = 1.61 + 0.0829X_1 - 0.0533X2"
print "In other words, tips go up by 8 cents for every dollar"
print "increase in order amount and decrease by 5 cents if Angel is making the delivery."
Exemple #39
0
from sklearn.model_selection._validation import cross_val_predict
#boston房价数据集
boston = load_boston()

#print(boston)
#通过DESCR属性可以查看数据集的详细情况,这里数据有14列,前13列为特征数据,最后一列为标签数据。
#print(boston.DESCR)
#boston的data和target分别存储了特征和标签
#print(boston.data)
#print(boston.target)
#切分数据集
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.2, random_state=2) 

#简单线性回归
model1 = LinearRegression(normalize=True)
model1.fit(X_train, y_train)
#模型的拟合优度
simpleScore=model1.score(X_test, y_test)
print(simpleScore)
##回归系数
#print(model1.coef_)
#截距项
#print(model1.intercept_)
#print(simpleScore)

#模型测试,并利用均方根误差(MSE)对测试结果进行评价
#模型的拟合值
y_pred=model1.predict(X_test)
print("MSE:",metrics.mean_squared_error(y_test, y_pred))

#交叉验证
### draw the scatterplot, with color-coded training and testing points
import matplotlib.pyplot as plt
for feature, target in zip(feature_test, target_test):
    plt.scatter( feature, target, color=test_color ) 
for feature, target in zip(feature_train, target_train):
    plt.scatter( feature, target, color=train_color ) 

### labels for the legend
plt.scatter(feature_test[0], target_test[0], color=test_color, label="test")
plt.scatter(feature_test[0], target_test[0], color=train_color, label="train")


from sklearn.linear_model.base import LinearRegression

reg = LinearRegression()
reg.fit(feature_train, target_train)
print("Slope %s" % reg.coef_)
print("Intercept %s" % reg.intercept_)

print("Score = ", reg.score(feature_test, target_test))
### draw the regression line, once it's coded
try:
    plt.plot( feature_test, reg.predict(feature_test) )
except NameError:
    pass
reg.fit(feature_test, target_test)
plt.plot(feature_train, reg.predict(feature_train), color="b")
plt.xlabel(features_list[1])
plt.ylabel(features_list[0])
plt.legend()
plt.show()
def draw_data_size_vs_performance_chart():
    ''' Create figure for paper '''
    paths = glob('output/data-sizes/*.results/*/results.json') + \
            glob('output/model-h2048p512-mfs-true.results/*/results.json')
    df = read_json_files(paths)

    def parse_path(val):
        if 'model-h2048p512' in val:
            return 100
        else:
            return float(re.search(r'(\d+)\.results', val).group(1))

    df['data-pct'] = df['path'].apply(parse_path)
    df['words'] = 1.8e9 * df['data-pct'] / 100
    df = df.append([{
        'words': 1e11,
        'model': 'Yuan et al. (T: SemCor)',
        "competition": "SemEval13",
        'F1': 0.670
    }, {
        'words': 1e11,
        'model': 'Yuan et al. (T: OMSTI)',
        "competition": "SemEval13",
        'F1': 0.673
    }, {
        'words': 1e11,
        'model': 'Yuan et al. (T: SemCor)',
        "competition": "Senseval2",
        'F1': 0.736
    }, {
        'words': 1e11,
        'model': 'Yuan et al. (T: OMSTI)',
        "competition": "SemEval13",
        'F1': 0.673
    }, {
        'words': 1e11,
        'model': 'Yuan et al. (T: SemCor)',
        "competition": "Senseval2",
        'F1': 0.736
    }, {
        'words': 1e11,
        'model': 'Yuan et al. (T: OMSTI)',
        "competition": "Senseval2",
        'F1': 0.724
    }])
    print(df)

    def get_xy(competition, model):
        sub_df = df[df['model'].str.contains(model, regex=False)]
        sub_df = sub_df.query('competition == "%s"' %
                              competition).sort_values('words')
        return sub_df['words'], sub_df['F1']

    with PdfPages('output/data_size_vs_performance.pdf') as pdf:
        se13_semcor_handle, = plt.plot(*get_xy('SemEval13', '(T: SemCor)'),
                                       '-o',
                                       label='SemEval13 (T: SemCor)')
        se13_mun_handle, = plt.plot(*get_xy('SemEval13', '(T: SemCor+OMSTI)'),
                                    '--o',
                                    label='SemEval13 (T: OMSTI)')
        se2_semcor_handle, = plt.plot(*get_xy('Senseval2', '(T: SemCor)'),
                                      ':o',
                                      label='Senseval2 (T: SemCor)')
        se2_mun_handle, = plt.plot(*get_xy('Senseval2', '(T: SemCor+OMSTI)'),
                                   '-.o',
                                   label='Senseval2 (T: OMSTI)')
        plt.legend(handles=[
            se13_semcor_handle, se13_mun_handle, se2_semcor_handle,
            se2_mun_handle
        ],
                   loc='lower right')
        plt.axis([1.5e7, 1.1e11, 0, 1])
        plt.ylabel('F1')
        plt.xlabel('Tokens')
        plt.xscale('log')
        pdf.savefig()
        plt.show()
        plt.close()
    # extrapolate from data
    lr = LinearRegression()
    words, f1s = get_xy('SemEval13', 'Our LSTM (T: SemCor)')
    lr.fit(f1s.values.reshape([-1, 1]), np.log10(words.values.reshape([-1,
                                                                       1])))
    print('Extrapolated data size (words):')
    print(lr.predict([[0.75], [0.8]]))
Exemple #42
0
#print(boston.data)
#print(boston.target)
#切分数据集
X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                    boston.target,
                                                    test_size=0.2,
                                                    random_state=2)

#增加特征多项式让线性回归模型更好地拟合数据
#多项式的个数的不断增加,可以在训练集上有很好的效果,但很容易造成过拟合
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.fit_transform(X_test)
#多项式线性回归
model2 = LinearRegression(normalize=True)
model2.fit(X_train_poly, y_train)
mutilScore = model2.score(X_test_poly, y_test)
print(mutilScore)

#模型测试,并利用均方根误差(MSE)对测试结果进行评价
#模型的拟合值
y_pred = model2.predict(X_test_poly)
print("MSE:", metrics.mean_squared_error(y_test, y_pred))

#交叉验证
predicted = cross_val_predict(model2, boston.data, boston.target, cv=10)
print("MSE:", metrics.mean_squared_error(boston.target, predicted))

#画图
import matplotlib.pyplot as plt
plt.scatter(boston.target, predicted, color="y", marker="o")
def get_error(x, y):
    model = LinearRegression(normalize=True)
    model.fit(x, y)
    predict = model.predict(x)
    return np.average(np.abs(y - predict))
def get_cv_error(x_train, x_test, y_train, y_test):
    model = LinearRegression(normalize=True)
    model.fit(x_train, y_train)
    predict = model.predict(x_test)
    return np.average(np.abs(y_test - predict))
Exemple #45
0
    inp_prices = list()
    features = list()
    def get_inp_features(self): 
        return self.inp_features
    def get_inp_prices(self): 
        return self.inp_prices
    def get_features(self): 
        return self.features
    
    def read(self):
        F, N = map(int, raw_input().split(' '))              
        for _ in range(N):
            inp_f = map(float, raw_input().strip().split())
            self.inp_features.append(inp_f[:F:])
            self.inp_prices.append(inp_f[F::])
        questions = int(raw_input())        
        for _ in range(questions):
            self.features.append(map(float, raw_input().split()))
        
reader = inp_reader()
reader.read()
inp_features = reader.get_inp_features()
inp_prices = reader.get_inp_prices()
features = reader.get_features()
 
model = LinearRegression()

model.fit(inp_features, inp_prices)
prices=model.predict(features)
for el in prices:
    print (el[0])