Example #1
0
File: p3.py Project: amazor/testhub
def run():
    x, y, _ = getInput()
    lr = LinearRegressor(x, y)
    w = lr.RLS(0.08, "lasso")
    avgInputs = (np.mean(x, axis=0))
    guess = np.dot(avgInputs, w)
    print("mean input prediction: " + str(guess[0, 0]))
Example #2
0
File: p2.py Project: amazor/testhub
def run(confidence, numKFolds, lam, numIter = 10000):
    x, y, _ = getInput()
    # lambda (use p1 to find optimal lambda)
    l = lam    
    sampleError = []
    
    count = 1
    #10-Fold Means (splits data into shuffled 9 training 1 testing with seed 69)
    kf = KFold(n_splits=numKFolds, shuffle = True, random_state = 69)
    for train_index, test_index in kf.split(x):
        x_train, x_test = x[train_index], x[test_index]     #input  set
        y_train, y_test = y[train_index], y[test_index]     #output set
        print("training fold: " + str(count))
        error, _ = train(x_train, y_train, x_test, y_test, l)
        sampleError.append(np.sum(error)) 
        count +=1   
    #Bootstrapping with t-distribution
    print()
    interval, mu, sigma, avgList= bootstrap(confidence, sampleError, numIter)
    
    #        PLOTTING
    n, bins, patches = plt.hist(avgList, 100,  normed=1 , ec='black')
    y = normpdf(bins, mu, sigma)
    plt.plot(bins, y, 'r--')
    plt.title("Histogram of Averages Bootrapped from Averages of 10 Fold CR")
    plt.xlabel("Bootstrapped Average Error")
    plt.ylabel("Number of Errors inside Interval")
    print()
    print("Confidence Interval: " + str(interval))
    plt.show()
Example #3
0
from sklearn.decomposition import PCA
from sklearn import svm
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from itertools import product
import numpy as np
from sklearn.metrics.ranking import roc_auc_score

import warnings
warnings.filterwarnings("ignore")

from p1 import getInput

x, _, y = getInput()

pca = PCA(30)
x = pca.fit_transform(x)

yEnv_Pert = y[:, 3]
yGene_Pert = y[:, 4]
yList = (list(zip(yEnv_Pert, yGene_Pert)))
yList = ["".join(tuple) for tuple in yList]

envClass = [
    "Indole", "O2-starvation", "RP-overexpress", "Antibacterial",
    "Carbon-limitation", "Dna-damage", "Zinc-limitation", "none"
]
geneClass = [
    "appY_KO", "arcA_KO", "argR_KO", "cya_KO", "fis_OE", "fnr_KO", "frdC_KO",
Example #4
0
'''
Created on Nov 20, 2017

@author: Amir
'''
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import KFold
from p1 import LinearRegressor, getInput, train

x, y = getInput()

t = np.array([0 for _ in range(195)])
ok = 0
numKFolds = 10
for i in range(150, 300):
    print("test: " + str(i + 1))
    count = 0
    kf = KFold(n_splits=numKFolds, shuffle=True, random_state=i)
    for train_index, test_index in kf.split(x):
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
        error, coeff = train(x_train, y_train, x_test, y_test, 0.04)
        if np.sum(error) > 5:
            t[test_index] = t[test_index] + 1
            ok -= 1
            break
        count = count + 1
    ok += 1
for i in range(195):
    if t[i] > 10: