def run(): x, y, _ = getInput() lr = LinearRegressor(x, y) w = lr.RLS(0.08, "lasso") avgInputs = (np.mean(x, axis=0)) guess = np.dot(avgInputs, w) print("mean input prediction: " + str(guess[0, 0]))
def run(confidence, numKFolds, lam, numIter = 10000): x, y, _ = getInput() # lambda (use p1 to find optimal lambda) l = lam sampleError = [] count = 1 #10-Fold Means (splits data into shuffled 9 training 1 testing with seed 69) kf = KFold(n_splits=numKFolds, shuffle = True, random_state = 69) for train_index, test_index in kf.split(x): x_train, x_test = x[train_index], x[test_index] #input set y_train, y_test = y[train_index], y[test_index] #output set print("training fold: " + str(count)) error, _ = train(x_train, y_train, x_test, y_test, l) sampleError.append(np.sum(error)) count +=1 #Bootstrapping with t-distribution print() interval, mu, sigma, avgList= bootstrap(confidence, sampleError, numIter) # PLOTTING n, bins, patches = plt.hist(avgList, 100, normed=1 , ec='black') y = normpdf(bins, mu, sigma) plt.plot(bins, y, 'r--') plt.title("Histogram of Averages Bootrapped from Averages of 10 Fold CR") plt.xlabel("Bootstrapped Average Error") plt.ylabel("Number of Errors inside Interval") print() print("Confidence Interval: " + str(interval)) plt.show()
from sklearn.decomposition import PCA from sklearn import svm from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score from sklearn.model_selection import train_test_split, KFold from sklearn.preprocessing import label_binarize from sklearn.multiclass import OneVsRestClassifier from itertools import product import numpy as np from sklearn.metrics.ranking import roc_auc_score import warnings warnings.filterwarnings("ignore") from p1 import getInput x, _, y = getInput() pca = PCA(30) x = pca.fit_transform(x) yEnv_Pert = y[:, 3] yGene_Pert = y[:, 4] yList = (list(zip(yEnv_Pert, yGene_Pert))) yList = ["".join(tuple) for tuple in yList] envClass = [ "Indole", "O2-starvation", "RP-overexpress", "Antibacterial", "Carbon-limitation", "Dna-damage", "Zinc-limitation", "none" ] geneClass = [ "appY_KO", "arcA_KO", "argR_KO", "cya_KO", "fis_OE", "fnr_KO", "frdC_KO",
''' Created on Nov 20, 2017 @author: Amir ''' import numpy as np from sklearn import linear_model from sklearn.model_selection import KFold from p1 import LinearRegressor, getInput, train x, y = getInput() t = np.array([0 for _ in range(195)]) ok = 0 numKFolds = 10 for i in range(150, 300): print("test: " + str(i + 1)) count = 0 kf = KFold(n_splits=numKFolds, shuffle=True, random_state=i) for train_index, test_index in kf.split(x): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] error, coeff = train(x_train, y_train, x_test, y_test, 0.04) if np.sum(error) > 5: t[test_index] = t[test_index] + 1 ok -= 1 break count = count + 1 ok += 1 for i in range(195): if t[i] > 10: