def LinearReg(): linreg = skl.LinearRegression() for dataset in kk.getDatasets(binary=False): print('Dataset Name:', dataset.DESCR[0:10]) X = dataset.data Y = dataset.target X_scaled = skp.scale(X) print('Without Preprocessing Score:', kk.fitModel(linreg, X, Y)) print('With Preprocessing Score:', kk.fitModel(linreg, X_scaled, Y))
def LogReg(): logregCV = skl.LogisticRegressionCV() for dataset in kk.getDatasets(binary=True): print('Dataset Name:', dataset.DESCR[0:10]) X = dataset.data Y = dataset.target X_scaled = skp.scale(X) logreg = logregCV print('Without Preprocessing Score:', kk.fitModel(logreg, X, Y)) print('With Preprocessing Score:', kk.fitModel(logreg, X_scaled, Y))
def trailAndError(M): scores = {} n_texamples = 100 print('Training set/Dimension Count/Tree Depth:') for i in range(1, M + 1): model = skt.DecisionTreeRegressor(max_depth=i) m = 100 * i n_f = 10 * i n_classes = i if (n_f > 10**6): n_f = 10**6 print('%d - %d - %d' % (n_texamples, n_f, m)) (X, Y) = kk.genData(n_features=n_f, n_classes=n_classes, m=n_texamples, target='C') sc, _ = kk.fitModel(model, X, Y, cv=False, ncv=0, roc_stats=False) scores[i] = sc if sc < 100: print('***************Score is ', sc) del model import matplotlib.pyplot as plt plt.plot(scores.keys(), scores.values()) plt.xlabel('Max Depth') plt.ylabel('Score') plt.grid(True) plt.show() del plt kk.plot2D(scores.keys(), scores.values(),\ X_label='Score', Y_label='Max Depth', scatter=False)
lasreg = skl.LassoCV() logreg = skl.LogisticRegressionCV() scores = [] ''' # Experimental Part for n_components in range(60,65): print('Number of components:',n_components) pca = pca.set_params(n_components=n_components) X = dataset.data X = kk.MeanNormalizer(X) X = pca.fit_transform(X) score,best_cv = kk.fitModel(linreg, X, Y, cv=True, ncv = 10) cvs.append(best_cv) scores.append(score) ''' # BEST SCORE CLASSIFICATION pca = skD.PCA() for dataset in kk.getDatasets(): print('Dataset Name:', dataset.DESCR[0:10]) X = dataset.data Y = dataset.target #pca.set_params(n_components=X.shape[1]-2) X = pca.fit_transform(X) X = kk.MeanNormalizer(X) # This step makes a diff for Ridge and Lasso Y = dataset.target fsc = kk.fitModel(linreg, X, Y) print('Scores:%.2f, %.2f, %.2f, %.2f'%(kk.fitModel(linreg, X, Y), \ kk.fitModel(rreg, X,Y), kk.fitModel(lasreg, X,Y), 0.00))
Rscores = [] Pscores = [] Sscores = [] for i in range(1, 50): num_dims = 10 * i n_samples = 2**i (X, Y) = skd.make_moons(n_samples=n_samples) X_train, X_test, Y_train, Y_test = sk.model_selection.train_test_split(X, Y, \ test_size=0.3,random_state=123) svcl = svm.SVC(kernel='linear') svcr = svm.SVC(kernel='rbf') svcp = svm.SVC(kernel='poly') svcsig = svm.SVC(kernel='sigmoid') scl, _ = kk.fitModel(svcl, X, Y, plotModel=False, roc_stats=False) scr, _ = kk.fitModel(svcr, X, Y, plotModel=False, roc_stats=False) scp, _ = kk.fitModel(svcp, X, Y, plotModel=False, roc_stats=False) scs, _ = kk.fitModel(svcsig, X, Y, plotModel=False, roc_stats=False) #kk.compare_models([svcl,svcr,svcp,svcsig], X_test, Y_test) #print(svcl.support_vectors_) Pscores.append(scp) Sscores.append(scs) Lscores.append(scl) Rscores.append(scr) #print('Scores:%f %f'%(scl,scr)) Y = list(range(1, len(Lscores) + 1)) Y = np.multiply(Y, 10)
import matplotlib.pyplot as plt import time bscores = [] linscores = [] logscores = [] linreg = sk.linear_model.LinearRegression() logreg = sk.linear_model.LogisticRegressionCV() breg = sk.naive_bayes.GaussianNB() st = time.time() X, Y = skd.make_moons(n_samples=10000, random_state=123) X_train,X_test, Y_train,Y_test = skm.train_test_split(X, Y, \ test_size=0.3,random_state=123) sc, linreg = kk.fitModel(linreg, X, Y, roc_stats=False) sc, logreg = kk.fitModel(logreg, X, Y, roc_stats=False) sc, breg = kk.fitModel(breg, X, Y, roc_stats=False) kk.compare_models([linreg, logreg, breg], X_test, Y_test) ''' for i in range(1,20): samples = 1000 * i print('Number of samples:',samples) X,Y = skd.make_moons(n_samples=samples,random_state=123) #X,Y = skd.make_circles(n_samples=1000) #kk.plot2D(X,properties=False) linscores.append(kk.fitModel(linreg, X,Y)) logscores.append(kk.fitModel(logreg, X,Y))
print ('Dataset Name:',dataset.DESCR[0:10]) X = dataset.data Y = dataset.target X = kk.MeanNormalizer(X) n_comp = X.shape[1] - cnt if n_comp <= 0: n_comp = X.shape[1] pca.set_params(n_components = n_comp) X = pca.fit_transform(X) print('Score:',kk.fitModel(bayes, X, Y)) del X,Y ''' gauss = skn.GaussianNB() bernoulli =skn.BernoulliNB() multi = skn.MultinomialNB() for dataset in kk.getDatasets(binary=True): #dataset = skd.load_breast_cancer() print ('Dataset Name:',dataset.DESCR[0:20],'\n=================================') X = dataset.data + np.random.random(size=(dataset.data.shape)) Y = dataset.target if(len(np.unique(Y)) > 2): bernoulli.set_params(binarize=True) else: print('Binary Classification') X = kk.MeanNormalizer(X) pca.set_params(n_components=np.random.randint(1, X.shape[1]+1)) X = pca.fit_transform(X) kk.fitModel(bayes,X,Y) kk.fitModel(gauss, X, Y,dsetProps=False) kk.fitModel(bernoulli, X, Y, dsetProps=False) #kk.fitModel(multi, X, Y, dsetProps= False,cv=False)