Beispiel #1
0
def ARDRegression_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options):
    '''
    '''
    clf = ARDRegression()
    clf.fit(X[train], y[train][:, 0])
    y_pred = clf.predict(X[test])[:, None]
    return y_pred, clf
    def predict_features(self, df_features, df_target, idx=0, **kwargs):
        X = df_features.as_matrix()
        y = df_target.as_matrix()
        clf = ARDRegression(compute_score=True)
        clf.fit(X, y)

        return np.abs(clf.coef_)
Beispiel #3
0
def getTrainedClassifier(ticker, sd, ed, save=True):
    df = quandl.get('WIKI/' + ticker, start_date=sd, end_date=ed)
    df = df[[
        'Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume'
    ]]

    # df['HL_PCT'] = (df['Adj. High'] - df['Adj. Low']) * 100 / df['Adj. Low']
    # df['Change_PCT'] = (df['Adj. Close'] - df['Adj. Open']) * 100 / df['Adj. Open']

    # df = df[['Adj. Close','HL_PCT', 'Change_PCT', 'Adj. Volume']]
    # df['HL_PCT'] = df['HL_PCT'] * 10

    df['future'] = df['Adj. Close'].shift(-shift)
    df.dropna(inplace=True)
    X_train = np.array(df.drop(['future'], 1))
    y_train = np.array(df['future'])

    # X = preprocessing.scale(X)
    # X_lately = X[-shift:]
    # X = X[:-shift]
    # y = y[:-shift]
    # X_train, X_test = cross_validation.train_test_split(X,  test_size = 0.0)
    # y_train, y_test = cross_validation.train_test_split(y, test_size = 0.0)
    # p = preprocess_input(  799.70  ,  801.670 , 795.2501   ,   801.34  ,  1161986.0)
    # p1 = preprocess_input(135.10,135.83,135.10,135.6900,21976977)

    clf = ARDRegression()
    clf.fit(X_train, y_train)
    return clf
Beispiel #4
0
    def train(self):
        """
        Train the linear regression model based on the observed dataset
        """
        if self.normalize_output:
            (self.y, self.norm_mean,
             self.norm_sd) = zero_mean_unit_var_normalization(self.y)
        if self.intercept:
            train_X = sm.add_constant(self.X)
        else:
            train_X = self.X
        Phi = train_X
        regressor = ARDRegression()
        regressor.fit(Phi, self.y)
        # Best sigma
        self.sigma = np.sqrt(1. / regressor.alpha_)
        # Best alpha
        self.alpha = regressor.lambda_

        A = np.dot(Phi.T, Phi) / self.sigma**2. + self.alpha * np.eye(
            Phi.shape[1])
        A = A + np.eye(A.shape[0]) * 1e-5
        L = scipy.linalg.cho_factor(A)

        self.m = scipy.linalg.cho_solve(
            L,
            np.dot(Phi.T, self.y) / self.sigma**2)  # The posterior mean of w
        self.S = scipy.linalg.cho_solve(L, np.eye(
            Phi.shape[1]))  # The posterior covariance of w

        return self.m, self.S, self.sigma, self.alpha
Beispiel #5
0
def test_check_is_fitted():
    # Check is ValueError raised when non estimator instance passed
    assert_raises(ValueError, check_is_fitted, ARDRegression, "coef_")
    assert_raises(TypeError, check_is_fitted, "SVR", "support_")

    ard = ARDRegression()
    svr = SVR()

    try:
        assert_raises(NotFittedError, check_is_fitted, ard, "coef_")
        assert_raises(NotFittedError, check_is_fitted, svr, "support_")
    except ValueError:
        assert False, "check_is_fitted failed with ValueError"

    # NotFittedError is a subclass of both ValueError and AttributeError
    try:
        check_is_fitted(ard, "coef_", "Random message %(name)s, %(name)s")
    except ValueError as e:
        assert_equal(str(e), "Random message ARDRegression, ARDRegression")

    try:
        check_is_fitted(svr, "support_", "Another message %(name)s, %(name)s")
    except AttributeError as e:
        assert_equal(str(e), "Another message SVR, SVR")

    ard.fit(*make_blobs())
    svr.fit(*make_blobs())

    assert_equal(None, check_is_fitted(ard, "coef_"))
    assert_equal(None, check_is_fitted(svr, "support_"))
class ARDRegressionPrim(primitive):
    def __init__(self, random_state=0):
        super(ARDRegressionPrim, self).__init__(name='ARDRegression')
        self.hyperparams = []
        self.type = 'Regressor'
        self.description = "Bayesian ARD regression. Fit the weights of a regression model, using an ARD prior. The weights of the regression model are assumed to be in Gaussian distributions. Also estimate the parameters lambda (precisions of the distributions of the weights) and alpha (precision of the distribution of the noise). The estimation is done by an iterative procedures (Evidence Maximization)"
        self.hyperparams_run = {'default': True}
        self.random_state = random_state
        self.model = ARDRegression()
        self.accept_type = 'c_r'

    def can_accept(self, data):
        return self.can_accept_c(data, 'Regression')

    def is_needed(self, data):
        # data = handle_data(data)
        return True

    def fit(self, data):
        data = handle_data(data)
        self.model.fit(data['X'], data['Y'])

    def produce(self, data):
        output = handle_data(data)
        output['predictions'] = self.model.predict(output['X'])
        output['X'] = pd.DataFrame(output['predictions'],
                                   columns=[self.name + "Pred"])
        final_output = {0: output}
        return final_output
def test_return_std():
    # Test return_std option for both Bayesian regressors
    def f(X):
        return np.dot(X, w) + b

    def f_noise(X, noise_mult):
        return f(X) + np.random.randn(X.shape[0]) * noise_mult

    d = 5
    n_train = 50
    n_test = 10

    w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
    b = 1.0

    X = np.random.random((n_train, d))
    X_test = np.random.random((n_test, d))

    for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
        y = f_noise(X, noise_mult)

        m1 = BayesianRidge()
        m1.fit(X, y)
        y_mean1, y_std1 = m1.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)

        m2 = ARDRegression()
        m2.fit(X, y)
        y_mean2, y_std2 = m2.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
Beispiel #8
0
def test_check_is_fitted():
    # Check is TypeError raised when non estimator instance passed
    with pytest.raises(TypeError):
        check_is_fitted(ARDRegression)
    with pytest.raises(TypeError):
        check_is_fitted("SVR")

    ard = ARDRegression()
    svr = SVR()

    try:
        with pytest.raises(NotFittedError):
            check_is_fitted(ard)
        with pytest.raises(NotFittedError):
            check_is_fitted(svr)
    except ValueError:
        assert False, "check_is_fitted failed with ValueError"

    # NotFittedError is a subclass of both ValueError and AttributeError
    try:
        check_is_fitted(ard, msg="Random message %(name)s, %(name)s")
    except ValueError as e:
        assert str(e) == "Random message ARDRegression, ARDRegression"

    try:
        check_is_fitted(svr, msg="Another message %(name)s, %(name)s")
    except AttributeError as e:
        assert str(e) == "Another message SVR, SVR"

    ard.fit(*make_blobs())
    svr.fit(*make_blobs())

    assert check_is_fitted(ard) is None
    assert check_is_fitted(svr) is None
    def predict_features(self, df_features, df_target, idx=0, **kwargs):
        X = df_features.values
        y = df_target.values
        clf = ARDRegression(compute_score=True)
        clf.fit(X, y.ravel())

        return np.abs(clf.coef_)
def ard_regression(train, test):
    train = train.copy()
    test = test.copy()

    X = train.to_numpy()
    X_train = np.delete(X, [train.columns.get_loc('views')], axis=1)
    y_train = train['views']

    X = test.to_numpy()
    X_test = np.delete(X, [test.columns.get_loc('views')], axis=1)
    y_test = test['views']

    reg = ARDRegression(compute_score=True)
    reg.fit(X_train, y_train)

    y_pred = reg.predict(X_test)

    # The mean squared error
    print('Mean squared error: %.2f' %
          mean_squared_error(y_test, y_pred, squared=True))
    # The coefficient of determination: 1 is perfect prediction
    print('median absolute error: %.2f' %
          median_absolute_error(y_test, y_pred))

    return None
Beispiel #11
0
def test_check_is_fitted():
    # Check is ValueError raised when non estimator instance passed
    assert_raises(ValueError, check_is_fitted, ARDRegression, "coef_")
    assert_raises(TypeError, check_is_fitted, "SVR", "support_")

    ard = ARDRegression()
    svr = SVR(gamma='scale')

    try:
        assert_raises(NotFittedError, check_is_fitted, ard, "coef_")
        assert_raises(NotFittedError, check_is_fitted, svr, "support_")
    except ValueError:
        assert False, "check_is_fitted failed with ValueError"

    # NotFittedError is a subclass of both ValueError and AttributeError
    try:
        check_is_fitted(ard, "coef_", "Random message %(name)s, %(name)s")
    except ValueError as e:
        assert_equal(str(e), "Random message ARDRegression, ARDRegression")

    try:
        check_is_fitted(svr, "support_", "Another message %(name)s, %(name)s")
    except AttributeError as e:
        assert_equal(str(e), "Another message SVR, SVR")

    ard.fit(*make_blobs())
    svr.fit(*make_blobs())

    assert_equal(None, check_is_fitted(ard, "coef_"))
    assert_equal(None, check_is_fitted(svr, "support_"))
def bayeslr_python(fname, threshold):
# this function conducts the bayesian linear regression
# the data interaction from matlab is through excel files due to the restriction of matrix interation
    X = pd.read_excel(fname, sheetname=0, header=None, index=None)
    Y = pd.read_excel(fname, sheetname=1, header=None, index=None)
    X_row,X_col = X.shape
    Y_row,Y_col = Y.shape
    
    judge_Y = ~(pd.DataFrame.sum(Y, axis=0) == np.zeros(Y_col))
    
    X_blr = np.zeros((Y_col,X_col+1))
    sigma_blr = np.zeros((Y_col,X_col))
    
    for i in range(0,Y_col):
        if judge_Y[i]:
            y = Y.ix[:,i]
            clf = ARDRegression()
    #            clf.n_iter = 500
            clf.threshold_lambda = threshold
            
            clf.fit(X, y)
            coef = clf.coef_.T
            X_blr[i, :] = np.hstack((coef,clf.intercept_))
    
    X_blr = pd.DataFrame(X_blr)
    with pd.ExcelWriter(fname) as writer:
        X_blr.to_excel(writer, sheet_name=str(0), index=None, header=None)
Beispiel #13
0
def main_bak():
    # trial
    noiseVar = 0.01
    n = 500
    d = 10

    x = np.random.normal(0, 1, size=d * n).reshape((n, d))
    w = np.random.normal(10, 1, size=d)
    y = np.dot(x, w) + np.random.normal(0, noiseVar, size=n)

    t1 = time.time()

    print "Running iterative ard"
    (witer, gamma) = iterative_ard(Xtrain=x, ytrain=y, noiseVar=noiseVar)
    t2 = time.time()
    print "Running scikit ARD"
    ard = ARDRegression(compute_score=True)
    ard.fit(x, y)
    t3 = time.time()

    print "Time taken "
    print "Iterative:" + str(t2 - t1)
    print "scikit ard:" + str(t3 - t2)

    print "ALL W :"
    print witer
    print ard.coef_
    print w
def ARDRegression_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options):
    '''
    '''
    clf = ARDRegression()
    clf.fit(X[train], y[train][:, 0])
    y_pred = clf.predict(X[test])[:, None]
    return y_pred, clf
Beispiel #15
0
def test_ard_regression_predict_normalize_true():
    """Check that we can predict with `normalize=True` and `return_std=True`.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/18605
    """
    clf = ARDRegression(normalize=True)
    clf.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
    clf.predict([[1, 1]], return_std=True)
Beispiel #16
0
    def autorelevancedetermination(self):
        # Fit the ARD Regression
        clf = ARDRegression(compute_score=True)
        clf.fit(self.x_train, self.y_train)
        z = clf.predict(self.x_test)
        print(np.mean(self.y_test == z))

        return z
Beispiel #17
0
def test_toy_ard_object():
    # Test BayesianRegression ARD classifier
    X = np.array([[1], [2], [3]])
    Y = np.array([1, 2, 3])
    clf = ARDRegression(compute_score=True)
    clf.fit(X, Y)

    # Check that the model could approximately learn the identity function
    test = [[1], [3], [4]]
    assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
Beispiel #18
0
def test_ard_accuracy_on_easy_problem(seed, n_samples, n_features):
    # Check that ARD converges with reasonable accuracy on an easy problem
    # (Github issue #14055)
    X = np.random.RandomState(seed=seed).normal(size=(250, 3))
    y = X[:, 1]

    regressor = ARDRegression()
    regressor.fit(X, y)

    abs_coef_error = np.abs(1 - regressor.coef_[1])
    assert abs_coef_error < 1e-10
Beispiel #19
0
def make_linear(X, y):
    n_samples = np.shape(X)[0]
    n_features = np.shape(X)[1]

    ard = ARDRegression(compute_score=True)
    ard.fit(X, y)

    ols = LinearRegression()
    ols.fit(X, y)

    return ard, ols
Beispiel #20
0
    def runARDRegressor(self):
        lm = ARDRegression(fit_intercept=True, normalize=True)

        print("runARDRegressor\n")
        lm.fit(self.m_X_train, self.m_y_train)
        predictY = lm.predict(self.m_X_test)
        score = lm.score(self.m_X_test, self.m_y_test)
        predictTraingY = lm.predict(self.m_X_train)

        self.displayPredictPlot(predictY)
        self.displayResidualPlot(predictY, predictTraingY)
        self.dispalyModelResult(lm, predictY, score)
Beispiel #21
0
def test_update_of_sigma_in_ard():
    # Checks that `sigma_` is updated correctly after the last iteration
    # of the ARDRegression algorithm. See issue #10128.
    X = np.array([[1, 0], [0, 0]])
    y = np.array([0, 0])
    clf = ARDRegression(n_iter=1)
    clf.fit(X, y)
    # With the inputs above, ARDRegression prunes both of the two coefficients
    # in the first iteration. Hence, the expected shape of `sigma_` is (0, 0).
    assert clf.sigma_.shape == (0, 0)
    # Ensure that no error is thrown at prediction stage
    clf.predict(X, return_std=True)
def test_check_is_fitted_with_attributes(wrap):
    ard = ARDRegression()
    with pytest.raises(NotFittedError, match="is not fitted yet"):
        check_is_fitted(ard, wrap(["coef_"]))

    ard.fit(*make_blobs())

    # Does not raise
    check_is_fitted(ard, wrap(["coef_"]))

    # Raises when using attribute that is not defined
    with pytest.raises(NotFittedError, match="is not fitted yet"):
        check_is_fitted(ard, wrap(["coef_bad_"]))
Beispiel #23
0
    def fit_model_16(self,toWrite=False):
        model = ARDRegression()

        for data in self.cv_data:
            X_train, X_test, Y_train, Y_test = data
            model.fit(X_train,Y_train)
            pred = model.predict(X_test)
            print("Model 16 score %f" % (logloss(Y_test,pred),))

        if toWrite:
            f2 = open('model16/model.pkl','w')
            pickle.dump(model,f2)
            f2.close()
Beispiel #24
0
class _ARDRegressionImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Beispiel #25
0
def _ard(*,
         train,
         test,
         x_predict=None,
         metrics,
         n_iter=300,
         tol=0.001,
         alpha_1=1e-06,
         alpha_2=1e-06,
         lambda_1=1e-06,
         lambda_2=1e-06,
         compute_score=False,
         threshold_lambda=10000.0,
         fit_intercept=True,
         normalize=False,
         copy_X=True,
         verbose=False):
    """For more info visit : 
        https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ARDRegression.html#sklearn.linear_model.ARDRegression
    """

    model = ARDRegression(n_iter=n_iter,
                          tol=tol,
                          alpha_1=alpha_1,
                          alpha_2=alpha_2,
                          lambda_1=lambda_1,
                          lambda_2=lambda_2,
                          compute_score=compute_score,
                          threshold_lambda=threshold_lambda,
                          fit_intercept=fit_intercept,
                          normalize=normalize,
                          copy_X=copy_X,
                          verbose=verbose)
    model.fit(train[0], train[1])
    model_name = 'ARDRegression'
    y_hat = model.predict(test[0])

    if metrics == 'mse':
        accuracy = _mse(test[1], y_hat)
    if metrics == 'rmse':
        accuracy = _rmse(test[1], y_hat)
    if metrics == 'mae':
        accuracy = _mae(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)
Beispiel #26
0
def test_ard_accuracy_on_easy_problem():
    # Check that ARD converges with reasonable accuracy on an easy problem
    # (Github issue #14055)
    # This particular seed seems to converge poorly in the failure-case
    # (scipy==1.3.0, sklearn==0.21.2)
    seed = 45
    X = np.random.RandomState(seed=seed).normal(size=(250, 3))
    y = X[:, 1]

    regressor = ARDRegression(n_iter=600)
    regressor.fit(X, y)

    abs_coef_error = np.abs(1 - regressor.coef_[1])
    # Expect an accuracy of better than 1E-4 in most cases -
    # Failure-case produces 0.16!
    assert abs_coef_error < 0.01
Beispiel #27
0
    def predict(self):
        """
         trains the scikit-learn  python machine learning algorithm library function
         https://scikit-learn.org

         then passes the trained algorithm the features set and returns the
         predicted y test values form, the function

         then compares the y_test values from scikit-learn predicted to
         y_test values passed in

         then returns the accuracy
         """

        algorithm = ARDRegression(threshold_lambda=1e5)
        algorithm.fit(self.X_train, self.y_train)
        y_pred = list(algorithm.predict(self.X_test))
        self.acc = OneHotPredictor.get_accuracy(y_pred, self.y_test)
        return self.acc
Beispiel #28
0
def _fit_ardr(X: np.ndarray,
              y: np.ndarray,
              threshold_lambda: float = None,
              line_scan: bool = False,
              fit_intercept: bool = False,
              **kwargs) -> Dict[str, Any]:
    """
    Returns the solution `a` to the linear problem `Xa=y` obtained by
    using the automatic relevance determination regression (ARDR)
    method as implemented in scitkit-learn in the form of a dictionary
    with a key named `parameters`.

    Parameters
    -----------
    X
        fit matrix
    y
        target array
    threshold_lambda
        threshold lambda parameter forwarded to sklearn
    line_scan
        whether or not to perform line-scan in order to find optimal
        threshold-lambda
    fit_intercept
        center data or not, forwarded to sklearn
    """

    if threshold_lambda is not None and line_scan:
        raise ValueError('Specify threshold_lambda or set line_scan=True, not both')

    if threshold_lambda is None:
        threshold_lambda = 1e4

    if line_scan:
        return _fit_ardr_line_scan(X, y, fit_intercept=fit_intercept, **kwargs)
    else:
        ardr = ARDRegression(threshold_lambda=threshold_lambda, fit_intercept=fit_intercept,
                             **kwargs)
        ardr.fit(X, y)
        results = dict()
        results['parameters'] = ardr.coef_
        return results
Beispiel #29
0
def test_check_is_fitted():
    # Check is TypeError raised when non estimator instance passed
    assert_raises(TypeError, check_is_fitted, ARDRegression)
    assert_raises(TypeError, check_is_fitted, "SVR")

    ard = ARDRegression()
    svr = SVR()

    try:
        assert_raises(NotFittedError, check_is_fitted, ard)
        assert_raises(NotFittedError, check_is_fitted, svr)
    except ValueError:
        assert False, "check_is_fitted failed with ValueError"

    # NotFittedError is a subclass of both ValueError and AttributeError
    try:
        check_is_fitted(ard, msg="Random message %(name)s, %(name)s")
    except ValueError as e:
        assert str(e) == "Random message ARDRegression, ARDRegression"

    try:
        check_is_fitted(svr, msg="Another message %(name)s, %(name)s")
    except AttributeError as e:
        assert str(e) == "Another message SVR, SVR"

    ard.fit(*make_blobs())
    svr.fit(*make_blobs())

    assert check_is_fitted(ard) is None
    assert check_is_fitted(svr) is None

    # to be removed in 0.23
    assert_warns_message(
        DeprecationWarning,
        "Passing attributes to check_is_fitted is deprecated", check_is_fitted,
        ard, ['coef_'])
    assert_warns_message(DeprecationWarning,
                         "Passing all_or_any to check_is_fitted is deprecated",
                         check_is_fitted,
                         ard,
                         all_or_any=any)
class ARDR():
    """docstring for ClassName"""
    def __init__(self, ARDRegression, N):
        self.cores_number = int(np.ceil(multiprocessing.cpu_count() / N))
        self.selected_columns = []
        self.model = ARDRegression(alpha_1=1e-06,
                                   alpha_2=1e-06,
                                   compute_score=False,
                                   copy_X=True,
                                   fit_intercept=True,
                                   lambda_1=1e-06,
                                   lambda_2=1e-06,
                                   n_iter=300,
                                   normalize=False,
                                   threshold_lambda=10000.0,
                                   tol=0.001,
                                   verbose=False)

        print("ARDRegression Cores: ", np.nan)

    def fit(self, X_train, y_train, X_test, y_test, error_type="MAE"):
        try:
            self.selected_columns = np.random.choice(X_train.columns,
                                                     100,
                                                     replace=False)
            X_train = X_train[self.selected_columns]
        except Exception as E:
            X_train = X_train

        error_dict = {
            "MSE": "rmse",
            "R2": {"l1", "l2"},
            "MAE": "mae",
            "LOGLOSS": "multi_logloss"
        }
        error_metric = error_dict[error_type]
        self.model.fit(X_train, y_train)

    def predict(self, X_test):
        prediction = self.model.predict(X_test[self.selected_columns])
        return (prediction)
Beispiel #31
0
def ARD(X_train, y_train, X_test, y_test):
    '''
        Purpose: Use ARD to calculate accuracy
        Input: X_train, y_train, X_test, y_test
        Output: accuracy_score
   '''
    clf = ARDRegression(compute_score=True)
    clf = clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    y_pred = y_pred.round()
    #ols = LinearRegression()
    #ols.fit(X, y)
    return metrics.accuracy_score(y_test, y_pred)
from sklearn.linear_model import ARDRegression
from sklearn.model_selection import cross_val_predict
from sklearn.datasets import load_boston
from sklearn.metrics import explained_variance_score, mean_squared_error
import numpy as np
import pylab as pl
#Loading boston datasets 
boston = load_boston()
# Creating Regression Design Matrix 
x = boston.data
# Creating target dataset
y = boston.target
# Create ARDRegression Regression object 
ARD= ARDRegression(alpha_1=0.01, alpha_2=0.01, lambda_1=1e-06, lambda_2=1e-06)
# Fitting a linear model using the dataset
ARD.fit(x,y)
# Y predicted values
yp = ARD.predict(x)
#Calculation 10-Fold CV
yp_cv = cross_val_predict(ARD, x, y, cv=10)
#Printing RMSE and Explained Variance
Evariance=explained_variance_score(y,yp)
Evariance_cv=explained_variance_score(y,yp_cv)
RMSE =np.sqrt(mean_squared_error(y,yp))
RMSECV=np.sqrt(mean_squared_error(y,yp_cv))
print('Method: ARDRegression Regression')
print('RMSE on the dataset: %.4f' %RMSE)
print('RMSE on 10-fold CV: %.4f' %RMSECV)
print('Explained Variance Regression Score on the dataset: %.4f' %Evariance)
print('Explained Variance Regression 10-fold CV: %.4f' %Evariance_cv)
#plotting real vs predicted data
 def ard_regressor(self):
     x_train, x_test, y_train, y_test = self.preprocessing()
     model = ARDRegression()
     y_pred = model.fit(x_train, y_train).predict(x_test)
     self.printing(y_test, y_pred, 'ARD')
Beispiel #34
0
lambda_ = 4.
w = np.zeros(n_features)
# Only keep 10 weights of interest
relevant_features = np.random.randint(0, n_features, 10)
for i in relevant_features:
    w[i] = stats.norm.rvs(loc=0, scale=1. / np.sqrt(lambda_))
# Create noite with a precision alpha of 50.
alpha_ = 50.
noise = stats.norm.rvs(loc=0, scale=1. / np.sqrt(alpha_), size=n_samples)
# Create the target
y = np.dot(X, w) + noise

###############################################################################
# Fit the ARD Regression
clf = ARDRegression(compute_score=True)
clf.fit(X, y)

ols = LinearRegression()
ols.fit(X, y)

###############################################################################
# Plot the true weights, the estimated weights and the histogram of the
# weights
plt.figure(figsize=(6, 5))
plt.title("Weights of the model")
plt.plot(clf.coef_, 'b-', label="ARD estimate")
plt.plot(ols.coef_, 'r--', label="OLS estimate")
plt.plot(w, 'g-', label="Ground truth")
plt.xlabel("Features")
plt.ylabel("Values of the weights")
plt.legend(loc=1)
Beispiel #35
0
lambda_ = 4.
w = np.zeros(n_features)
# Only keep 10 weights of interest
relevant_features = np.random.randint(0, n_features, 10)
for i in relevant_features:
    w[i] = stats.norm.rvs(loc=0, scale=1. / np.sqrt(lambda_))
# Create noite with a precision alpha of 50.
alpha_ = 50.
noise = stats.norm.rvs(loc=0, scale=1. / np.sqrt(alpha_), size=n_samples)
# Create the target
y = np.dot(X, w) + noise

###############################################################################
# Fit the ARD Regression
clf = ARDRegression(compute_score=True)
clf.fit(X, y)

ols = LinearRegression()
ols.fit(X, y)

###############################################################################
# Plot the true weights, the estimated weights and the histogram of the
# weights
plt.figure(figsize=(6, 5))
plt.title("Weights of the model")
plt.plot(clf.coef_, 'b-', label="ARD estimate")
plt.plot(ols.coef_, 'r--', label="OLS estimate")
plt.plot(w, 'g-', label="Ground truth")
plt.xlabel("Features")
plt.ylabel("Values of the weights")
plt.legend(loc=1)
Beispiel #36
0
def learn_model(x_mat, y):
    #model = SVR(kernel='rbf')
    model = ARDRegression()
    model.fit(x_mat, y)
    return model
lambda_ = 4.
w = np.zeros(n_features)
# Only keep 10 weights of interest
relevant_features = np.random.randint(0, n_features, 10)
for i in relevant_features:
    w[i] = stats.norm.rvs(loc=0, scale=1. / np.sqrt(lambda_))
# Create noise with a precision alpha of 50.
alpha_ = 50.
noise = stats.norm.rvs(loc=0, scale=1. / np.sqrt(alpha_), size=n_samples)
# Create the target
y = np.dot(X, w) + noise

###############################################################################
# Fit the ARD Regression
clf = ARDRegression(compute_score=True)
clf.fit(X, y)

ols = LinearRegression()
ols.fit(X, y)

###############################################################################
# Plot the true weights, the estimated weights, the histogram of the
# weights, and predictions with standard deviations
plt.figure(figsize=(6, 5))
plt.title("Weights of the model")
plt.plot(clf.coef_, color='darkblue', linestyle='-', linewidth=2,
         label="ARD estimate")
plt.plot(ols.coef_, color='yellowgreen', linestyle=':', linewidth=2,
         label="OLS estimate")
plt.plot(w, color='orange', linestyle='-', linewidth=2, label="Ground truth")
plt.xlabel("Features")
    #Train normalizer on RNA seq, apply to rescaled gene expression
    if standardizeByTCGA:    
        rnaSeqExpressionNormalized, L2Normalizer = standardizeExpression(rnaSeqExpression, L2Normalizer, log10Normalize)
        rescaledExpressionClinical = L2Normalizer.transform(np.log10(rescaledExpressionClinical+1))
#    else:
#        prunedRnaSeqExpressionNormalized, L2Normalizer = standardizeExpression(prunedRnaSeqExpression.ix[cellExpression.shape[0],;], L2Normalizer, log10Normalize)
#        prunedArrayExpressionNormalized = L2Normalizer.transform(np.log10(prunedRescaledExpressionClinical+1))

    #Load Docetaxel IC50 Data
    docetaxelData = getDrugIC50('Docetaxel', inputFolder)
    
    #Assemble training data with both IC50 and expression data    
    docetaxelData = pd.merge(docetaxelData, rnaSeqExpressionNormalized, how='inner', left_index=True, right_index=True).drop('cell_line', axis=1)
        
    #Train Docetaxel model    
    clf.fit(docetaxelData.drop(['IC50'], axis=1), docetaxelData['IC50'])    
    
    #Validate on Clinical Data
    resistance_predictions = clf.predict(rescaledExpressionClinical)
    
    #Calculates ROC, first 11 samples correspond to sensitive patients, last 13 are resistant            
    roc_auc_score(np.hstack((np.repeat(0,11), np.repeat(1,13))), resistance_predictions)

    roc_data = pd.DataFrame()
    roc_data['fpr'], roc_data['tpr'],roc_data['thresholds'] = roc_curve(np.hstack((np.repeat(0,11), np.repeat(1,13))), resistance_predictions)


    #Plot Results
    from bokeh.charts import show, output_file
    from bokeh.plotting import figure