def getTrainedClassifier(ticker, sd, ed, save=True): df = quandl.get('WIKI/' + ticker, start_date=sd, end_date=ed) df = df[[ 'Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume' ]] # df['HL_PCT'] = (df['Adj. High'] - df['Adj. Low']) * 100 / df['Adj. Low'] # df['Change_PCT'] = (df['Adj. Close'] - df['Adj. Open']) * 100 / df['Adj. Open'] # df = df[['Adj. Close','HL_PCT', 'Change_PCT', 'Adj. Volume']] # df['HL_PCT'] = df['HL_PCT'] * 10 df['future'] = df['Adj. Close'].shift(-shift) df.dropna(inplace=True) X_train = np.array(df.drop(['future'], 1)) y_train = np.array(df['future']) # X = preprocessing.scale(X) # X_lately = X[-shift:] # X = X[:-shift] # y = y[:-shift] # X_train, X_test = cross_validation.train_test_split(X, test_size = 0.0) # y_train, y_test = cross_validation.train_test_split(y, test_size = 0.0) # p = preprocess_input( 799.70 , 801.670 , 795.2501 , 801.34 , 1161986.0) # p1 = preprocess_input(135.10,135.83,135.10,135.6900,21976977) clf = ARDRegression() clf.fit(X_train, y_train) return clf
def test_return_std(): # Test return_std option for both Bayesian regressors def f(X): return np.dot(X, w) + b def f_noise(X, noise_mult): return f(X) + np.random.randn(X.shape[0]) * noise_mult d = 5 n_train = 50 n_test = 10 w = np.array([1.0, 0.0, 1.0, -1.0, 0.0]) b = 1.0 X = np.random.random((n_train, d)) X_test = np.random.random((n_test, d)) for decimal, noise_mult in enumerate([1, 0.1, 0.01]): y = f_noise(X, noise_mult) m1 = BayesianRidge() m1.fit(X, y) y_mean1, y_std1 = m1.predict(X_test, return_std=True) assert_array_almost_equal(y_std1, noise_mult, decimal=decimal) m2 = ARDRegression() m2.fit(X, y) y_mean2, y_std2 = m2.predict(X_test, return_std=True) assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
def test_check_is_fitted(): # Check is ValueError raised when non estimator instance passed assert_raises(ValueError, check_is_fitted, ARDRegression, "coef_") assert_raises(TypeError, check_is_fitted, "SVR", "support_") ard = ARDRegression() svr = SVR() try: assert_raises(NotFittedError, check_is_fitted, ard, "coef_") assert_raises(NotFittedError, check_is_fitted, svr, "support_") except ValueError: assert False, "check_is_fitted failed with ValueError" # NotFittedError is a subclass of both ValueError and AttributeError try: check_is_fitted(ard, "coef_", "Random message %(name)s, %(name)s") except ValueError as e: assert_equal(str(e), "Random message ARDRegression, ARDRegression") try: check_is_fitted(svr, "support_", "Another message %(name)s, %(name)s") except AttributeError as e: assert_equal(str(e), "Another message SVR, SVR") ard.fit(*make_blobs()) svr.fit(*make_blobs()) assert_equal(None, check_is_fitted(ard, "coef_")) assert_equal(None, check_is_fitted(svr, "support_"))
def predict_features(self, df_features, df_target, idx=0, **kwargs): X = df_features.as_matrix() y = df_target.as_matrix() clf = ARDRegression(compute_score=True) clf.fit(X, y) return np.abs(clf.coef_)
def ARDRegression_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options): ''' ''' clf = ARDRegression() clf.fit(X[train], y[train][:, 0]) y_pred = clf.predict(X[test])[:, None] return y_pred, clf
def train(self): """ Train the linear regression model based on the observed dataset """ if self.normalize_output: (self.y, self.norm_mean, self.norm_sd) = zero_mean_unit_var_normalization(self.y) if self.intercept: train_X = sm.add_constant(self.X) else: train_X = self.X Phi = train_X regressor = ARDRegression() regressor.fit(Phi, self.y) # Best sigma self.sigma = np.sqrt(1. / regressor.alpha_) # Best alpha self.alpha = regressor.lambda_ A = np.dot(Phi.T, Phi) / self.sigma**2. + self.alpha * np.eye( Phi.shape[1]) A = A + np.eye(A.shape[0]) * 1e-5 L = scipy.linalg.cho_factor(A) self.m = scipy.linalg.cho_solve( L, np.dot(Phi.T, self.y) / self.sigma**2) # The posterior mean of w self.S = scipy.linalg.cho_solve(L, np.eye( Phi.shape[1])) # The posterior covariance of w return self.m, self.S, self.sigma, self.alpha
def createARDRegressor(params=None): info("Creating ARD Regressor", ind=4) ## Params params = mergeParams(ARDRegression(), params) tuneParams = getARDRegressorParams() grid = tuneParams['grid'] info("With Parameters", ind=4) alpha_1 = setParam('alpha_1', params, grid, force=False) info("Param: alpha_1 = {0}".format(alpha_1), ind=6) lambda_1 = setParam('lambda_1', params, grid, force=False) info("Param: lambda_1 = {0}".format(lambda_1), ind=6) alpha_2 = setParam('alpha_2', params, grid, force=False) info("Param: alpha_2 = {0}".format(alpha_2), ind=6) lambda_2 = setParam('lambda_2', params, grid, force=False) info("Param: lambda_2 = {0}".format(lambda_2), ind=6) ## estimator reg = ARDRegression(alpha_1=alpha_1, alpha_2=alpha_2, lambda_1=lambda_1, lambda_2=lambda_2) return {"estimator": reg, "params": tuneParams}
def predict_features(self, df_features, df_target, idx=0, **kwargs): X = df_features.values y = df_target.values clf = ARDRegression(compute_score=True) clf.fit(X, y.ravel()) return np.abs(clf.coef_)
def __init__(self): # 알고리즘 이름 self._name = 'ard' # 기본 경로 self._f_path = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) # 경고 메시지 삭제 warnings.filterwarnings('ignore') # 원본 데이터 로드 data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8") # 학습 및 테스트 데이터 분리 self._x = (data["year"] <= 2017) self._y = (data["year"] >= 2018) # 학습 데이터 분리 self._x_train, self._y_train = self.preprocessing(data[self._x]) # 테스트 데이터 분리 self._x_test, self._y_test = self.preprocessing(data[self._y]) # 모델 선언 self._model = ARDRegression(normalize=True) # 모델 학습 self._model.fit(self._x_train, self._y_train)
class ARDR(): """docstring for ClassName""" def __init__(self, ARDRegression, N): self.cores_number = int(np.ceil(multiprocessing.cpu_count()/N)) self.selected_columns = [] self.model = ARDRegression( alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300, normalize=False, threshold_lambda=10000.0, tol=0.001, verbose=False) print("ARDRegression Cores: ", np.nan) def fit(self, X_train, y_train, X_test, y_test, error_type = "MAE"): try: self.selected_columns = np.random.choice(X_train.columns, 100, replace = False) X_train = X_train[self.selected_columns] except Exception as E: X_train = X_train error_dict = {"MSE":"rmse", "R2":{"l1","l2"}, "MAE":"mae","LOGLOSS": "multi_logloss" } error_metric = error_dict[error_type] self.model.fit(X_train, y_train ) def predict(self, X_test): prediction=self.model.predict(X_test[self.selected_columns]) return(prediction)
def test_check_is_fitted(): # Check is TypeError raised when non estimator instance passed assert_raises(TypeError, check_is_fitted, ARDRegression) assert_raises(TypeError, check_is_fitted, "SVR") ard = ARDRegression() svr = SVR() try: assert_raises(NotFittedError, check_is_fitted, ard) assert_raises(NotFittedError, check_is_fitted, svr) except ValueError: assert False, "check_is_fitted failed with ValueError" # NotFittedError is a subclass of both ValueError and AttributeError try: check_is_fitted(ard, msg="Random message %(name)s, %(name)s") except ValueError as e: assert str(e) == "Random message ARDRegression, ARDRegression" try: check_is_fitted(svr, msg="Another message %(name)s, %(name)s") except AttributeError as e: assert str(e) == "Another message SVR, SVR" ard.fit(*make_blobs()) svr.fit(*make_blobs()) assert check_is_fitted(ard) is None assert check_is_fitted(svr) is None
def ard_regression(train, test): train = train.copy() test = test.copy() X = train.to_numpy() X_train = np.delete(X, [train.columns.get_loc('views')], axis=1) y_train = train['views'] X = test.to_numpy() X_test = np.delete(X, [test.columns.get_loc('views')], axis=1) y_test = test['views'] reg = ARDRegression(compute_score=True) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) # The mean squared error print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred, squared=True)) # The coefficient of determination: 1 is perfect prediction print('median absolute error: %.2f' % median_absolute_error(y_test, y_pred)) return None
def main_bak(): # trial noiseVar = 0.01 n = 500 d = 10 x = np.random.normal(0, 1, size=d * n).reshape((n, d)) w = np.random.normal(10, 1, size=d) y = np.dot(x, w) + np.random.normal(0, noiseVar, size=n) t1 = time.time() print "Running iterative ard" (witer, gamma) = iterative_ard(Xtrain=x, ytrain=y, noiseVar=noiseVar) t2 = time.time() print "Running scikit ARD" ard = ARDRegression(compute_score=True) ard.fit(x, y) t3 = time.time() print "Time taken " print "Iterative:" + str(t2 - t1) print "scikit ard:" + str(t3 - t2) print "ALL W :" print witer print ard.coef_ print w
class ARDRegressionPrim(primitive): def __init__(self, random_state=0): super(ARDRegressionPrim, self).__init__(name='ARDRegression') self.hyperparams = [] self.type = 'Regressor' self.description = "Bayesian ARD regression. Fit the weights of a regression model, using an ARD prior. The weights of the regression model are assumed to be in Gaussian distributions. Also estimate the parameters lambda (precisions of the distributions of the weights) and alpha (precision of the distribution of the noise). The estimation is done by an iterative procedures (Evidence Maximization)" self.hyperparams_run = {'default': True} self.random_state = random_state self.model = ARDRegression() self.accept_type = 'c_r' def can_accept(self, data): return self.can_accept_c(data, 'Regression') def is_needed(self, data): # data = handle_data(data) return True def fit(self, data): data = handle_data(data) self.model.fit(data['X'], data['Y']) def produce(self, data): output = handle_data(data) output['predictions'] = self.model.predict(output['X']) output['X'] = pd.DataFrame(output['predictions'], columns=[self.name + "Pred"]) final_output = {0: output} return final_output
def bayeslr_python(fname, threshold): # this function conducts the bayesian linear regression # the data interaction from matlab is through excel files due to the restriction of matrix interation X = pd.read_excel(fname, sheetname=0, header=None, index=None) Y = pd.read_excel(fname, sheetname=1, header=None, index=None) X_row,X_col = X.shape Y_row,Y_col = Y.shape judge_Y = ~(pd.DataFrame.sum(Y, axis=0) == np.zeros(Y_col)) X_blr = np.zeros((Y_col,X_col+1)) sigma_blr = np.zeros((Y_col,X_col)) for i in range(0,Y_col): if judge_Y[i]: y = Y.ix[:,i] clf = ARDRegression() # clf.n_iter = 500 clf.threshold_lambda = threshold clf.fit(X, y) coef = clf.coef_.T X_blr[i, :] = np.hstack((coef,clf.intercept_)) X_blr = pd.DataFrame(X_blr) with pd.ExcelWriter(fname) as writer: X_blr.to_excel(writer, sheet_name=str(0), index=None, header=None)
def autorelevancedetermination(self): # Fit the ARD Regression clf = ARDRegression(compute_score=True) clf.fit(self.x_train, self.y_train) z = clf.predict(self.x_test) print(np.mean(self.y_test == z)) return z
def __init__(self, random_state=0): super(ARDRegressionPrim, self).__init__(name='ARDRegression') self.hyperparams = [] self.type = 'Regressor' self.description = "Bayesian ARD regression. Fit the weights of a regression model, using an ARD prior. The weights of the regression model are assumed to be in Gaussian distributions. Also estimate the parameters lambda (precisions of the distributions of the weights) and alpha (precision of the distribution of the noise). The estimation is done by an iterative procedures (Evidence Maximization)" self.hyperparams_run = {'default': True} self.random_state = random_state self.model = ARDRegression() self.accept_type = 'c_r'
def test_toy_ard_object(): # Test BayesianRegression ARD classifier X = np.array([[1], [2], [3]]) Y = np.array([1, 2, 3]) clf = ARDRegression(compute_score=True) clf.fit(X, Y) # Check that the model could approximately learn the identity function test = [[1], [3], [4]] assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
def make_linear(X, y): n_samples = np.shape(X)[0] n_features = np.shape(X)[1] ard = ARDRegression(compute_score=True) ard.fit(X, y) ols = LinearRegression() ols.fit(X, y) return ard, ols
def test_ard_accuracy_on_easy_problem(seed, n_samples, n_features): # Check that ARD converges with reasonable accuracy on an easy problem # (Github issue #14055) X = np.random.RandomState(seed=seed).normal(size=(250, 3)) y = X[:, 1] regressor = ARDRegression() regressor.fit(X, y) abs_coef_error = np.abs(1 - regressor.coef_[1]) assert abs_coef_error < 1e-10
def fit_model_16(self,toWrite=False): model = ARDRegression() for data in self.cv_data: X_train, X_test, Y_train, Y_test = data model.fit(X_train,Y_train) pred = model.predict(X_test) print("Model 16 score %f" % (logloss(Y_test,pred),)) if toWrite: f2 = open('model16/model.pkl','w') pickle.dump(model,f2) f2.close()
def test_check_is_fitted_with_attributes(wrap): ard = ARDRegression() with pytest.raises(NotFittedError, match="is not fitted yet"): check_is_fitted(ard, wrap(["coef_"])) ard.fit(*make_blobs()) # Does not raise check_is_fitted(ard, wrap(["coef_"])) # Raises when using attribute that is not defined with pytest.raises(NotFittedError, match="is not fitted yet"): check_is_fitted(ard, wrap(["coef_bad_"]))
def ARD(X_train, y_train, X_test, y_test): ''' Purpose: Use ARD to calculate accuracy Input: X_train, y_train, X_test, y_test Output: accuracy_score ''' clf = ARDRegression(compute_score=True) clf = clf.fit(X_train, y_train) y_pred = clf.predict(X_test) y_pred = y_pred.round() #ols = LinearRegression() #ols.fit(X, y) return metrics.accuracy_score(y_test, y_pred)
class _ARDRegressionImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def make_forecast(local_array, local_mf_forecast_horizon_days, local_days_in_focus_frame): local_forecast = [] # simple normalization days = np.array([day for day in range(local_days_in_focus_frame)]) days = np.divide(days, np.amax(days)) x_y_data = np.zeros(shape=(days.shape[0], 2), dtype=np.dtype('float32')) x_y_data[:, 0] = days for local_time_serie in range(local_array.shape[0]): x_y_data[:, 1] = local_array[local_time_serie, :] x = x_y_data[:, 0].reshape(-1, 1) y = x_y_data[:, 1].reshape(-1, ) y_max = np.amax(y) y = np.divide(y, y_max * (y_max != 0) + 1 * (y_max == 0)) regression = RANSACRegressor(base_estimator=ARDRegression(), min_samples=29, max_trials=2000, random_state=0, loss='squared_loss', residual_threshold=2.0).fit(x, y) score = regression.score(x, y) print('time_serie, score of RANdom SAmple Consensus algorithm', local_time_serie, score) forecast_days = np.add(days, local_mf_forecast_horizon_days )[-local_mf_forecast_horizon_days:].reshape( -1, 1) local_forecast_ts = regression.predict(forecast_days) local_forecast.append(local_forecast_ts) local_forecast = np.array(local_forecast) # simple denormalization local_array_max = np.amax(local_array, axis=1) local_forecast = np.multiply( local_forecast, local_array_max.reshape(local_array_max.shape[0], 1)) print('local_forecast shape:', local_forecast.shape) return local_forecast
def test_ard_accuracy_on_easy_problem(): # Check that ARD converges with reasonable accuracy on an easy problem # (Github issue #14055) # This particular seed seems to converge poorly in the failure-case # (scipy==1.3.0, sklearn==0.21.2) seed = 45 X = np.random.RandomState(seed=seed).normal(size=(250, 3)) y = X[:, 1] regressor = ARDRegression(n_iter=600) regressor.fit(X, y) abs_coef_error = np.abs(1 - regressor.coef_[1]) # Expect an accuracy of better than 1E-4 in most cases - # Failure-case produces 0.16! assert abs_coef_error < 0.01
def train_regressor(xx, yy): X = [] d = np.array(xx) y = np.array(yy) [X.append([i]) for i in d] y.reshape(len(y), ) linearR = Lin_regress() reg0 = LinearRegression() reg1 = BayesianRidge() reg2 = RidgeCV() reg3 = ElasticNet() reg6 = ARDRegression() regresors = [reg0, reg1, reg2, reg3, reg6] predictor = [] coef = [] intercept = [] for reg in regresors: predictor.append(reg.fit(X, y)) #print reg.intercept_ coef.append(reg.coef_[0]) intercept.append(reg.intercept_) gradient = np.average(coef) intercept = np.average(intercept) print 'regression_avg: ' + str(gradient) + ' ' + str(intercept) mod = linearR.fit(X, y) return mod.coef_[0], mod.intercept_
def init_regressors(self): self.regressors = { 'GradientBoostingRegressor': GradientBoostingRegressor(), 'GaussianProcessRegressor': GaussianProcessRegressor(), 'ARDRegression': ARDRegression(), 'LinearRegression': LinearRegression(), }
def get_model_from_name(model_name): model_map = { # Classifiers 'LogisticRegression': LogisticRegression(n_jobs=-2), 'RandomForestClassifier': RandomForestClassifier(n_jobs=-2), 'RidgeClassifier': RidgeClassifier(), 'XGBClassifier': xgb.XGBClassifier(), 'GradientBoostingClassifier': GradientBoostingClassifier(), 'SGDClassifier': SGDClassifier(n_jobs=-1), 'Perceptron': Perceptron(n_jobs=-1), 'PassiveAggressiveClassifier': PassiveAggressiveClassifier(), # Regressors 'LinearRegression': LinearRegression(n_jobs=-2), 'RandomForestRegressor': RandomForestRegressor(n_jobs=-2), 'Ridge': Ridge(), 'XGBRegressor': xgb.XGBRegressor(), 'ExtraTreesRegressor': ExtraTreesRegressor(n_jobs=-1), 'AdaBoostRegressor': AdaBoostRegressor(n_estimators=5), 'RANSACRegressor': RANSACRegressor(), 'GradientBoostingRegressor': GradientBoostingRegressor(presort=False), 'Lasso': Lasso(), 'ElasticNet': ElasticNet(), 'LassoLars': LassoLars(), 'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(), 'BayesianRidge': BayesianRidge(), 'ARDRegression': ARDRegression(), 'SGDRegressor': SGDRegressor(shuffle=False), 'PassiveAggressiveRegressor': PassiveAggressiveRegressor(shuffle=False), # Clustering 'MiniBatchKMeans': MiniBatchKMeans(n_clusters=8) } return model_map[model_name]
def __init__(self, ARDRegression, N): self.cores_number = int(np.ceil(multiprocessing.cpu_count() / N)) self.selected_columns = [] self.model = ARDRegression(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300, normalize=False, threshold_lambda=10000.0, tol=0.001, verbose=False) print("ARDRegression Cores: ", np.nan)
def __init__(self, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False, threshold_lambda=1.e+4, fit_intercept=True, normalize=False, copy_X=True, verbose=False): _ARDRegression.__init__(self, n_iter, tol, alpha_1, alpha_2, lambda_1, lambda_2, compute_score, threshold_lambda, fit_intercept, normalize, copy_X, verbose) BaseWrapperReg.__init__(self)
#+++++++++++++++++++++++++++++++++++++++++++++++++ #Importing sklearn, numpy pylab modules from sklearn.linear_model import ARDRegression from sklearn.model_selection import cross_val_predict from sklearn.datasets import load_boston from sklearn.metrics import explained_variance_score, mean_squared_error import numpy as np import pylab as pl #Loading boston datasets boston = load_boston() # Creating Regression Design Matrix x = boston.data # Creating target dataset y = boston.target # Create ARDRegression Regression object ARD= ARDRegression(alpha_1=0.01, alpha_2=0.01, lambda_1=1e-06, lambda_2=1e-06) # Fitting a linear model using the dataset ARD.fit(x,y) # Y predicted values yp = ARD.predict(x) #Calculation 10-Fold CV yp_cv = cross_val_predict(ARD, x, y, cv=10) #Printing RMSE and Explained Variance Evariance=explained_variance_score(y,yp) Evariance_cv=explained_variance_score(y,yp_cv) RMSE =np.sqrt(mean_squared_error(y,yp)) RMSECV=np.sqrt(mean_squared_error(y,yp_cv)) print('Method: ARDRegression Regression') print('RMSE on the dataset: %.4f' %RMSE) print('RMSE on 10-fold CV: %.4f' %RMSECV) print('Explained Variance Regression Score on the dataset: %.4f' %Evariance)
def learn_model(x_mat, y): #model = SVR(kernel='rbf') model = ARDRegression() model.fit(x_mat, y) return model
# Create weigts with a precision lambda_ of 4. lambda_ = 4. w = np.zeros(n_features) # Only keep 10 weights of interest relevant_features = np.random.randint(0, n_features, 10) for i in relevant_features: w[i] = stats.norm.rvs(loc=0, scale=1. / np.sqrt(lambda_)) # Create noite with a precision alpha of 50. alpha_ = 50. noise = stats.norm.rvs(loc=0, scale=1. / np.sqrt(alpha_), size=n_samples) # Create the target y = np.dot(X, w) + noise ############################################################################### # Fit the ARD Regression clf = ARDRegression(compute_score=True) clf.fit(X, y) ols = LinearRegression() ols.fit(X, y) ############################################################################### # Plot the true weights, the estimated weights and the histogram of the # weights plt.figure(figsize=(6, 5)) plt.title("Weights of the model") plt.plot(clf.coef_, 'b-', label="ARD estimate") plt.plot(ols.coef_, 'r--', label="OLS estimate") plt.plot(w, 'g-', label="Ground truth") plt.xlabel("Features") plt.ylabel("Values of the weights")
if __name__ == "__main__": ########################### Set script paramaters ########################### #Gene expression paramaters log10Normalize = True standardizeByTCGA = True #Normalize expression data in a unified way for TCGA and cell lines L2Normalizer = Normalizer(norm='l2', copy=True) #Method to normalize gene expression values #Gene Pruning parameters pruneUncorrelatedGenes = True; #Eliminates genes that are uncorrelated between array and RNASeq pruneCutoff = 0.001; #p value cutoff for pruning clinicalSplitPoint = 60 #for the array data, the first 60 entries correspond to NCI60 dataset clf = ARDRegression(normalize=False) #Location for training data inputFolder = '../output/standardizedData/2015-07-30/' docetaxelArrayFolder = '../data/docetaxel_validation/' outputFolder = '../output/DocetaxelClinical/' + np.str(date.today()) + '/' if not os.path.exists(outputFolder): os.makedirs(outputFolder) ############################################################################# cellExpression = joblib.load(inputFolder + 'cellExpression.pkl') tcgaExpression = joblib.load(inputFolder + 'tcgaExpression.pkl') mergedExpression = cellExpression.append(tcgaExpression) #Retrieves Combat homogonized data for NCI60 cell line and Docetaxel Clincal U95 Array Data
# Create weights with a precision lambda_ of 4. lambda_ = 4. w = np.zeros(n_features) # Only keep 10 weights of interest relevant_features = np.random.randint(0, n_features, 10) for i in relevant_features: w[i] = stats.norm.rvs(loc=0, scale=1. / np.sqrt(lambda_)) # Create noise with a precision alpha of 50. alpha_ = 50. noise = stats.norm.rvs(loc=0, scale=1. / np.sqrt(alpha_), size=n_samples) # Create the target y = np.dot(X, w) + noise ############################################################################### # Fit the ARD Regression clf = ARDRegression(compute_score=True) clf.fit(X, y) ols = LinearRegression() ols.fit(X, y) ############################################################################### # Plot the true weights, the estimated weights, the histogram of the # weights, and predictions with standard deviations plt.figure(figsize=(6, 5)) plt.title("Weights of the model") plt.plot(clf.coef_, color='darkblue', linestyle='-', linewidth=2, label="ARD estimate") plt.plot(ols.coef_, color='yellowgreen', linestyle=':', linewidth=2, label="OLS estimate") plt.plot(w, color='orange', linestyle='-', linewidth=2, label="Ground truth")