Exemple #1
0
def getTrainedClassifier(ticker, sd, ed, save=True):
    df = quandl.get('WIKI/' + ticker, start_date=sd, end_date=ed)
    df = df[[
        'Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume'
    ]]

    # df['HL_PCT'] = (df['Adj. High'] - df['Adj. Low']) * 100 / df['Adj. Low']
    # df['Change_PCT'] = (df['Adj. Close'] - df['Adj. Open']) * 100 / df['Adj. Open']

    # df = df[['Adj. Close','HL_PCT', 'Change_PCT', 'Adj. Volume']]
    # df['HL_PCT'] = df['HL_PCT'] * 10

    df['future'] = df['Adj. Close'].shift(-shift)
    df.dropna(inplace=True)
    X_train = np.array(df.drop(['future'], 1))
    y_train = np.array(df['future'])

    # X = preprocessing.scale(X)
    # X_lately = X[-shift:]
    # X = X[:-shift]
    # y = y[:-shift]
    # X_train, X_test = cross_validation.train_test_split(X,  test_size = 0.0)
    # y_train, y_test = cross_validation.train_test_split(y, test_size = 0.0)
    # p = preprocess_input(  799.70  ,  801.670 , 795.2501   ,   801.34  ,  1161986.0)
    # p1 = preprocess_input(135.10,135.83,135.10,135.6900,21976977)

    clf = ARDRegression()
    clf.fit(X_train, y_train)
    return clf
Exemple #2
0
def test_return_std():
    # Test return_std option for both Bayesian regressors
    def f(X):
        return np.dot(X, w) + b

    def f_noise(X, noise_mult):
        return f(X) + np.random.randn(X.shape[0]) * noise_mult

    d = 5
    n_train = 50
    n_test = 10

    w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
    b = 1.0

    X = np.random.random((n_train, d))
    X_test = np.random.random((n_test, d))

    for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
        y = f_noise(X, noise_mult)

        m1 = BayesianRidge()
        m1.fit(X, y)
        y_mean1, y_std1 = m1.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)

        m2 = ARDRegression()
        m2.fit(X, y)
        y_mean2, y_std2 = m2.predict(X_test, return_std=True)
        assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
def test_check_is_fitted():
    # Check is ValueError raised when non estimator instance passed
    assert_raises(ValueError, check_is_fitted, ARDRegression, "coef_")
    assert_raises(TypeError, check_is_fitted, "SVR", "support_")

    ard = ARDRegression()
    svr = SVR()

    try:
        assert_raises(NotFittedError, check_is_fitted, ard, "coef_")
        assert_raises(NotFittedError, check_is_fitted, svr, "support_")
    except ValueError:
        assert False, "check_is_fitted failed with ValueError"

    # NotFittedError is a subclass of both ValueError and AttributeError
    try:
        check_is_fitted(ard, "coef_", "Random message %(name)s, %(name)s")
    except ValueError as e:
        assert_equal(str(e), "Random message ARDRegression, ARDRegression")

    try:
        check_is_fitted(svr, "support_", "Another message %(name)s, %(name)s")
    except AttributeError as e:
        assert_equal(str(e), "Another message SVR, SVR")

    ard.fit(*make_blobs())
    svr.fit(*make_blobs())

    assert_equal(None, check_is_fitted(ard, "coef_"))
    assert_equal(None, check_is_fitted(svr, "support_"))
    def predict_features(self, df_features, df_target, idx=0, **kwargs):
        X = df_features.as_matrix()
        y = df_target.as_matrix()
        clf = ARDRegression(compute_score=True)
        clf.fit(X, y)

        return np.abs(clf.coef_)
Exemple #5
0
def ARDRegression_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options):
    '''
    '''
    clf = ARDRegression()
    clf.fit(X[train], y[train][:, 0])
    y_pred = clf.predict(X[test])[:, None]
    return y_pred, clf
Exemple #6
0
    def train(self):
        """
        Train the linear regression model based on the observed dataset
        """
        if self.normalize_output:
            (self.y, self.norm_mean,
             self.norm_sd) = zero_mean_unit_var_normalization(self.y)
        if self.intercept:
            train_X = sm.add_constant(self.X)
        else:
            train_X = self.X
        Phi = train_X
        regressor = ARDRegression()
        regressor.fit(Phi, self.y)
        # Best sigma
        self.sigma = np.sqrt(1. / regressor.alpha_)
        # Best alpha
        self.alpha = regressor.lambda_

        A = np.dot(Phi.T, Phi) / self.sigma**2. + self.alpha * np.eye(
            Phi.shape[1])
        A = A + np.eye(A.shape[0]) * 1e-5
        L = scipy.linalg.cho_factor(A)

        self.m = scipy.linalg.cho_solve(
            L,
            np.dot(Phi.T, self.y) / self.sigma**2)  # The posterior mean of w
        self.S = scipy.linalg.cho_solve(L, np.eye(
            Phi.shape[1]))  # The posterior covariance of w

        return self.m, self.S, self.sigma, self.alpha
Exemple #7
0
def createARDRegressor(params=None):
    info("Creating ARD Regressor", ind=4)

    ## Params
    params = mergeParams(ARDRegression(), params)
    tuneParams = getARDRegressorParams()
    grid = tuneParams['grid']

    info("With Parameters", ind=4)
    alpha_1 = setParam('alpha_1', params, grid, force=False)
    info("Param: alpha_1 = {0}".format(alpha_1), ind=6)

    lambda_1 = setParam('lambda_1', params, grid, force=False)
    info("Param: lambda_1 = {0}".format(lambda_1), ind=6)

    alpha_2 = setParam('alpha_2', params, grid, force=False)
    info("Param: alpha_2 = {0}".format(alpha_2), ind=6)

    lambda_2 = setParam('lambda_2', params, grid, force=False)
    info("Param: lambda_2 = {0}".format(lambda_2), ind=6)

    ## estimator
    reg = ARDRegression(alpha_1=alpha_1,
                        alpha_2=alpha_2,
                        lambda_1=lambda_1,
                        lambda_2=lambda_2)

    return {"estimator": reg, "params": tuneParams}
def ARDRegression_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options):
    '''
    '''
    clf = ARDRegression()
    clf.fit(X[train], y[train][:, 0])
    y_pred = clf.predict(X[test])[:, None]
    return y_pred, clf
    def predict_features(self, df_features, df_target, idx=0, **kwargs):
        X = df_features.values
        y = df_target.values
        clf = ARDRegression(compute_score=True)
        clf.fit(X, y.ravel())

        return np.abs(clf.coef_)
Exemple #10
0
    def __init__(self):
        # 알고리즘 이름
        self._name = 'ard'

        # 기본 경로
        self._f_path = os.path.abspath(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path +
                           "/regression/resource/regression_sample.csv",
                           sep=",",
                           encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = ARDRegression(normalize=True)

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)
Exemple #11
0
class ARDR():
    """docstring for ClassName"""
    def __init__(self, ARDRegression, N):
        self.cores_number = int(np.ceil(multiprocessing.cpu_count()/N))
        self.selected_columns = []
        self.model = ARDRegression(
                        alpha_1=1e-06, 
                        alpha_2=1e-06, 
                        compute_score=False, 
                        copy_X=True,
                        fit_intercept=True, 
                        lambda_1=1e-06, 
                        lambda_2=1e-06, 
                        n_iter=300,
                        normalize=False, 
                        threshold_lambda=10000.0, 
                        tol=0.001, verbose=False)


        print("ARDRegression Cores: ", np.nan)

    def fit(self, X_train, y_train, X_test, y_test, error_type = "MAE"):
        try:
            self.selected_columns = np.random.choice(X_train.columns, 100, replace = False)
            X_train = X_train[self.selected_columns]
        except Exception as E:
            X_train = X_train
              
        error_dict = {"MSE":"rmse", "R2":{"l1","l2"}, "MAE":"mae","LOGLOSS": "multi_logloss" }
        error_metric = error_dict[error_type]
        self.model.fit(X_train, y_train )

    def predict(self, X_test):
         prediction=self.model.predict(X_test[self.selected_columns])
         return(prediction)
def test_check_is_fitted():
    # Check is TypeError raised when non estimator instance passed
    assert_raises(TypeError, check_is_fitted, ARDRegression)
    assert_raises(TypeError, check_is_fitted, "SVR")

    ard = ARDRegression()
    svr = SVR()

    try:
        assert_raises(NotFittedError, check_is_fitted, ard)
        assert_raises(NotFittedError, check_is_fitted, svr)
    except ValueError:
        assert False, "check_is_fitted failed with ValueError"

    # NotFittedError is a subclass of both ValueError and AttributeError
    try:
        check_is_fitted(ard, msg="Random message %(name)s, %(name)s")
    except ValueError as e:
        assert str(e) == "Random message ARDRegression, ARDRegression"

    try:
        check_is_fitted(svr, msg="Another message %(name)s, %(name)s")
    except AttributeError as e:
        assert str(e) == "Another message SVR, SVR"

    ard.fit(*make_blobs())
    svr.fit(*make_blobs())

    assert check_is_fitted(ard) is None
    assert check_is_fitted(svr) is None
def ard_regression(train, test):
    train = train.copy()
    test = test.copy()

    X = train.to_numpy()
    X_train = np.delete(X, [train.columns.get_loc('views')], axis=1)
    y_train = train['views']

    X = test.to_numpy()
    X_test = np.delete(X, [test.columns.get_loc('views')], axis=1)
    y_test = test['views']

    reg = ARDRegression(compute_score=True)
    reg.fit(X_train, y_train)

    y_pred = reg.predict(X_test)

    # The mean squared error
    print('Mean squared error: %.2f' %
          mean_squared_error(y_test, y_pred, squared=True))
    # The coefficient of determination: 1 is perfect prediction
    print('median absolute error: %.2f' %
          median_absolute_error(y_test, y_pred))

    return None
def main_bak():
    # trial
    noiseVar = 0.01
    n = 500
    d = 10

    x = np.random.normal(0, 1, size=d * n).reshape((n, d))
    w = np.random.normal(10, 1, size=d)
    y = np.dot(x, w) + np.random.normal(0, noiseVar, size=n)

    t1 = time.time()

    print "Running iterative ard"
    (witer, gamma) = iterative_ard(Xtrain=x, ytrain=y, noiseVar=noiseVar)
    t2 = time.time()
    print "Running scikit ARD"
    ard = ARDRegression(compute_score=True)
    ard.fit(x, y)
    t3 = time.time()

    print "Time taken "
    print "Iterative:" + str(t2 - t1)
    print "scikit ard:" + str(t3 - t2)

    print "ALL W :"
    print witer
    print ard.coef_
    print w
class ARDRegressionPrim(primitive):
    def __init__(self, random_state=0):
        super(ARDRegressionPrim, self).__init__(name='ARDRegression')
        self.hyperparams = []
        self.type = 'Regressor'
        self.description = "Bayesian ARD regression. Fit the weights of a regression model, using an ARD prior. The weights of the regression model are assumed to be in Gaussian distributions. Also estimate the parameters lambda (precisions of the distributions of the weights) and alpha (precision of the distribution of the noise). The estimation is done by an iterative procedures (Evidence Maximization)"
        self.hyperparams_run = {'default': True}
        self.random_state = random_state
        self.model = ARDRegression()
        self.accept_type = 'c_r'

    def can_accept(self, data):
        return self.can_accept_c(data, 'Regression')

    def is_needed(self, data):
        # data = handle_data(data)
        return True

    def fit(self, data):
        data = handle_data(data)
        self.model.fit(data['X'], data['Y'])

    def produce(self, data):
        output = handle_data(data)
        output['predictions'] = self.model.predict(output['X'])
        output['X'] = pd.DataFrame(output['predictions'],
                                   columns=[self.name + "Pred"])
        final_output = {0: output}
        return final_output
def bayeslr_python(fname, threshold):
# this function conducts the bayesian linear regression
# the data interaction from matlab is through excel files due to the restriction of matrix interation
    X = pd.read_excel(fname, sheetname=0, header=None, index=None)
    Y = pd.read_excel(fname, sheetname=1, header=None, index=None)
    X_row,X_col = X.shape
    Y_row,Y_col = Y.shape
    
    judge_Y = ~(pd.DataFrame.sum(Y, axis=0) == np.zeros(Y_col))
    
    X_blr = np.zeros((Y_col,X_col+1))
    sigma_blr = np.zeros((Y_col,X_col))
    
    for i in range(0,Y_col):
        if judge_Y[i]:
            y = Y.ix[:,i]
            clf = ARDRegression()
    #            clf.n_iter = 500
            clf.threshold_lambda = threshold
            
            clf.fit(X, y)
            coef = clf.coef_.T
            X_blr[i, :] = np.hstack((coef,clf.intercept_))
    
    X_blr = pd.DataFrame(X_blr)
    with pd.ExcelWriter(fname) as writer:
        X_blr.to_excel(writer, sheet_name=str(0), index=None, header=None)
Exemple #17
0
    def autorelevancedetermination(self):
        # Fit the ARD Regression
        clf = ARDRegression(compute_score=True)
        clf.fit(self.x_train, self.y_train)
        z = clf.predict(self.x_test)
        print(np.mean(self.y_test == z))

        return z
 def __init__(self, random_state=0):
     super(ARDRegressionPrim, self).__init__(name='ARDRegression')
     self.hyperparams = []
     self.type = 'Regressor'
     self.description = "Bayesian ARD regression. Fit the weights of a regression model, using an ARD prior. The weights of the regression model are assumed to be in Gaussian distributions. Also estimate the parameters lambda (precisions of the distributions of the weights) and alpha (precision of the distribution of the noise). The estimation is done by an iterative procedures (Evidence Maximization)"
     self.hyperparams_run = {'default': True}
     self.random_state = random_state
     self.model = ARDRegression()
     self.accept_type = 'c_r'
Exemple #19
0
def test_toy_ard_object():
    # Test BayesianRegression ARD classifier
    X = np.array([[1], [2], [3]])
    Y = np.array([1, 2, 3])
    clf = ARDRegression(compute_score=True)
    clf.fit(X, Y)

    # Check that the model could approximately learn the identity function
    test = [[1], [3], [4]]
    assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
Exemple #20
0
def make_linear(X, y):
    n_samples = np.shape(X)[0]
    n_features = np.shape(X)[1]

    ard = ARDRegression(compute_score=True)
    ard.fit(X, y)

    ols = LinearRegression()
    ols.fit(X, y)

    return ard, ols
Exemple #21
0
def test_ard_accuracy_on_easy_problem(seed, n_samples, n_features):
    # Check that ARD converges with reasonable accuracy on an easy problem
    # (Github issue #14055)
    X = np.random.RandomState(seed=seed).normal(size=(250, 3))
    y = X[:, 1]

    regressor = ARDRegression()
    regressor.fit(X, y)

    abs_coef_error = np.abs(1 - regressor.coef_[1])
    assert abs_coef_error < 1e-10
Exemple #22
0
    def fit_model_16(self,toWrite=False):
        model = ARDRegression()

        for data in self.cv_data:
            X_train, X_test, Y_train, Y_test = data
            model.fit(X_train,Y_train)
            pred = model.predict(X_test)
            print("Model 16 score %f" % (logloss(Y_test,pred),))

        if toWrite:
            f2 = open('model16/model.pkl','w')
            pickle.dump(model,f2)
            f2.close()
def test_check_is_fitted_with_attributes(wrap):
    ard = ARDRegression()
    with pytest.raises(NotFittedError, match="is not fitted yet"):
        check_is_fitted(ard, wrap(["coef_"]))

    ard.fit(*make_blobs())

    # Does not raise
    check_is_fitted(ard, wrap(["coef_"]))

    # Raises when using attribute that is not defined
    with pytest.raises(NotFittedError, match="is not fitted yet"):
        check_is_fitted(ard, wrap(["coef_bad_"]))
Exemple #24
0
def ARD(X_train, y_train, X_test, y_test):
    '''
        Purpose: Use ARD to calculate accuracy
        Input: X_train, y_train, X_test, y_test
        Output: accuracy_score
   '''
    clf = ARDRegression(compute_score=True)
    clf = clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    y_pred = y_pred.round()
    #ols = LinearRegression()
    #ols.fit(X, y)
    return metrics.accuracy_score(y_test, y_pred)
Exemple #25
0
class _ARDRegressionImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
def make_forecast(local_array, local_mf_forecast_horizon_days,
                  local_days_in_focus_frame):
    local_forecast = []
    # simple normalization
    days = np.array([day for day in range(local_days_in_focus_frame)])
    days = np.divide(days, np.amax(days))
    x_y_data = np.zeros(shape=(days.shape[0], 2), dtype=np.dtype('float32'))
    x_y_data[:, 0] = days
    for local_time_serie in range(local_array.shape[0]):
        x_y_data[:, 1] = local_array[local_time_serie, :]
        x = x_y_data[:, 0].reshape(-1, 1)
        y = x_y_data[:, 1].reshape(-1, )
        y_max = np.amax(y)
        y = np.divide(y, y_max * (y_max != 0) + 1 * (y_max == 0))
        regression = RANSACRegressor(base_estimator=ARDRegression(),
                                     min_samples=29,
                                     max_trials=2000,
                                     random_state=0,
                                     loss='squared_loss',
                                     residual_threshold=2.0).fit(x, y)
        score = regression.score(x, y)
        print('time_serie, score of RANdom SAmple Consensus algorithm',
              local_time_serie, score)
        forecast_days = np.add(days, local_mf_forecast_horizon_days
                               )[-local_mf_forecast_horizon_days:].reshape(
                                   -1, 1)
        local_forecast_ts = regression.predict(forecast_days)
        local_forecast.append(local_forecast_ts)
    local_forecast = np.array(local_forecast)
    # simple denormalization
    local_array_max = np.amax(local_array, axis=1)
    local_forecast = np.multiply(
        local_forecast, local_array_max.reshape(local_array_max.shape[0], 1))
    print('local_forecast shape:', local_forecast.shape)
    return local_forecast
Exemple #27
0
def test_ard_accuracy_on_easy_problem():
    # Check that ARD converges with reasonable accuracy on an easy problem
    # (Github issue #14055)
    # This particular seed seems to converge poorly in the failure-case
    # (scipy==1.3.0, sklearn==0.21.2)
    seed = 45
    X = np.random.RandomState(seed=seed).normal(size=(250, 3))
    y = X[:, 1]

    regressor = ARDRegression(n_iter=600)
    regressor.fit(X, y)

    abs_coef_error = np.abs(1 - regressor.coef_[1])
    # Expect an accuracy of better than 1E-4 in most cases -
    # Failure-case produces 0.16!
    assert abs_coef_error < 0.01
Exemple #28
0
def train_regressor(xx, yy):

    X = []
    d = np.array(xx)
    y = np.array(yy)
    [X.append([i]) for i in d]
    y.reshape(len(y), )

    linearR = Lin_regress()

    reg0 = LinearRegression()
    reg1 = BayesianRidge()
    reg2 = RidgeCV()
    reg3 = ElasticNet()
    reg6 = ARDRegression()

    regresors = [reg0, reg1, reg2, reg3, reg6]

    predictor = []
    coef = []
    intercept = []
    for reg in regresors:
        predictor.append(reg.fit(X, y))
        #print reg.intercept_
        coef.append(reg.coef_[0])
        intercept.append(reg.intercept_)

    gradient = np.average(coef)
    intercept = np.average(intercept)

    print 'regression_avg: ' + str(gradient) + ' ' + str(intercept)
    mod = linearR.fit(X, y)
    return mod.coef_[0], mod.intercept_
Exemple #29
0
 def init_regressors(self):
     self.regressors = {
         'GradientBoostingRegressor': GradientBoostingRegressor(),
         'GaussianProcessRegressor': GaussianProcessRegressor(),
         'ARDRegression': ARDRegression(),
         'LinearRegression': LinearRegression(),
     }
Exemple #30
0
def get_model_from_name(model_name):
    model_map = {
        # Classifiers
        'LogisticRegression': LogisticRegression(n_jobs=-2),
        'RandomForestClassifier': RandomForestClassifier(n_jobs=-2),
        'RidgeClassifier': RidgeClassifier(),
        'XGBClassifier': xgb.XGBClassifier(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'SGDClassifier': SGDClassifier(n_jobs=-1),
        'Perceptron': Perceptron(n_jobs=-1),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),

        # Regressors
        'LinearRegression': LinearRegression(n_jobs=-2),
        'RandomForestRegressor': RandomForestRegressor(n_jobs=-2),
        'Ridge': Ridge(),
        'XGBRegressor': xgb.XGBRegressor(),
        'ExtraTreesRegressor': ExtraTreesRegressor(n_jobs=-1),
        'AdaBoostRegressor': AdaBoostRegressor(n_estimators=5),
        'RANSACRegressor': RANSACRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(presort=False),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'LassoLars': LassoLars(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'BayesianRidge': BayesianRidge(),
        'ARDRegression': ARDRegression(),
        'SGDRegressor': SGDRegressor(shuffle=False),
        'PassiveAggressiveRegressor':
        PassiveAggressiveRegressor(shuffle=False),

        # Clustering
        'MiniBatchKMeans': MiniBatchKMeans(n_clusters=8)
    }
    return model_map[model_name]
    def __init__(self, ARDRegression, N):
        self.cores_number = int(np.ceil(multiprocessing.cpu_count() / N))
        self.selected_columns = []
        self.model = ARDRegression(alpha_1=1e-06,
                                   alpha_2=1e-06,
                                   compute_score=False,
                                   copy_X=True,
                                   fit_intercept=True,
                                   lambda_1=1e-06,
                                   lambda_2=1e-06,
                                   n_iter=300,
                                   normalize=False,
                                   threshold_lambda=10000.0,
                                   tol=0.001,
                                   verbose=False)

        print("ARDRegression Cores: ", np.nan)
Exemple #32
0
 def __init__(self,
              n_iter=300,
              tol=1.e-3,
              alpha_1=1.e-6,
              alpha_2=1.e-6,
              lambda_1=1.e-6,
              lambda_2=1.e-6,
              compute_score=False,
              threshold_lambda=1.e+4,
              fit_intercept=True,
              normalize=False,
              copy_X=True,
              verbose=False):
     _ARDRegression.__init__(self, n_iter, tol, alpha_1, alpha_2, lambda_1,
                             lambda_2, compute_score, threshold_lambda,
                             fit_intercept, normalize, copy_X, verbose)
     BaseWrapperReg.__init__(self)
#+++++++++++++++++++++++++++++++++++++++++++++++++
#Importing sklearn, numpy pylab modules 
from sklearn.linear_model import ARDRegression
from sklearn.model_selection import cross_val_predict
from sklearn.datasets import load_boston
from sklearn.metrics import explained_variance_score, mean_squared_error
import numpy as np
import pylab as pl
#Loading boston datasets 
boston = load_boston()
# Creating Regression Design Matrix 
x = boston.data
# Creating target dataset
y = boston.target
# Create ARDRegression Regression object 
ARD= ARDRegression(alpha_1=0.01, alpha_2=0.01, lambda_1=1e-06, lambda_2=1e-06)
# Fitting a linear model using the dataset
ARD.fit(x,y)
# Y predicted values
yp = ARD.predict(x)
#Calculation 10-Fold CV
yp_cv = cross_val_predict(ARD, x, y, cv=10)
#Printing RMSE and Explained Variance
Evariance=explained_variance_score(y,yp)
Evariance_cv=explained_variance_score(y,yp_cv)
RMSE =np.sqrt(mean_squared_error(y,yp))
RMSECV=np.sqrt(mean_squared_error(y,yp_cv))
print('Method: ARDRegression Regression')
print('RMSE on the dataset: %.4f' %RMSE)
print('RMSE on 10-fold CV: %.4f' %RMSECV)
print('Explained Variance Regression Score on the dataset: %.4f' %Evariance)
Exemple #34
0
def learn_model(x_mat, y):
    #model = SVR(kernel='rbf')
    model = ARDRegression()
    model.fit(x_mat, y)
    return model
Exemple #35
0
# Create weigts with a precision lambda_ of 4.
lambda_ = 4.
w = np.zeros(n_features)
# Only keep 10 weights of interest
relevant_features = np.random.randint(0, n_features, 10)
for i in relevant_features:
    w[i] = stats.norm.rvs(loc=0, scale=1. / np.sqrt(lambda_))
# Create noite with a precision alpha of 50.
alpha_ = 50.
noise = stats.norm.rvs(loc=0, scale=1. / np.sqrt(alpha_), size=n_samples)
# Create the target
y = np.dot(X, w) + noise

###############################################################################
# Fit the ARD Regression
clf = ARDRegression(compute_score=True)
clf.fit(X, y)

ols = LinearRegression()
ols.fit(X, y)

###############################################################################
# Plot the true weights, the estimated weights and the histogram of the
# weights
plt.figure(figsize=(6, 5))
plt.title("Weights of the model")
plt.plot(clf.coef_, 'b-', label="ARD estimate")
plt.plot(ols.coef_, 'r--', label="OLS estimate")
plt.plot(w, 'g-', label="Ground truth")
plt.xlabel("Features")
plt.ylabel("Values of the weights")
    
if __name__ == "__main__":
    
    ########################### Set script paramaters  ###########################
    #Gene expression paramaters
    log10Normalize = True
    standardizeByTCGA = True #Normalize expression data in a unified way for TCGA and cell lines
    L2Normalizer = Normalizer(norm='l2', copy=True) #Method to normalize gene expression values
    
    
    #Gene Pruning parameters
    pruneUncorrelatedGenes = True; #Eliminates genes that are uncorrelated between array and RNASeq
    pruneCutoff = 0.001; #p value cutoff for pruning
    clinicalSplitPoint =  60 #for the array data, the first 60 entries correspond to NCI60 dataset
   
    clf = ARDRegression(normalize=False)

    #Location for training data
    inputFolder = '../output/standardizedData/2015-07-30/'
    docetaxelArrayFolder = '../data/docetaxel_validation/'
    outputFolder = '../output/DocetaxelClinical/' + np.str(date.today()) + '/'

    if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    #############################################################################
    
    cellExpression = joblib.load(inputFolder + 'cellExpression.pkl')
    tcgaExpression = joblib.load(inputFolder + 'tcgaExpression.pkl')
    mergedExpression = cellExpression.append(tcgaExpression)
            
    #Retrieves Combat homogonized data for NCI60 cell line and Docetaxel Clincal U95 Array Data
# Create weights with a precision lambda_ of 4.
lambda_ = 4.
w = np.zeros(n_features)
# Only keep 10 weights of interest
relevant_features = np.random.randint(0, n_features, 10)
for i in relevant_features:
    w[i] = stats.norm.rvs(loc=0, scale=1. / np.sqrt(lambda_))
# Create noise with a precision alpha of 50.
alpha_ = 50.
noise = stats.norm.rvs(loc=0, scale=1. / np.sqrt(alpha_), size=n_samples)
# Create the target
y = np.dot(X, w) + noise

###############################################################################
# Fit the ARD Regression
clf = ARDRegression(compute_score=True)
clf.fit(X, y)

ols = LinearRegression()
ols.fit(X, y)

###############################################################################
# Plot the true weights, the estimated weights, the histogram of the
# weights, and predictions with standard deviations
plt.figure(figsize=(6, 5))
plt.title("Weights of the model")
plt.plot(clf.coef_, color='darkblue', linestyle='-', linewidth=2,
         label="ARD estimate")
plt.plot(ols.coef_, color='yellowgreen', linestyle=':', linewidth=2,
         label="OLS estimate")
plt.plot(w, color='orange', linestyle='-', linewidth=2, label="Ground truth")