def regularization_test(): with open("log_regular.txt", "w+") as f: f.write("Regularization | lambda | Area | R2\n") f.write("-" * 50 + "\n") for reg in ['l2', 'l1']: for lamb in [0.0001, 0.001, 0.01, 0.1, 1.0]: logreg = LogReg(X, Y) logreg.optimize(m=100, epochs=5000, eta=0.01, regularization=reg, lamb=lamb) ypred_train = logreg.p_train ypred_test = logreg.p_test area = gain_chart(logreg.Y_test, ypred_test, plot=False) R2 = prob_acc(logreg.Y_test, ypred_test, plot=False) f.write(" %s | %g | %.4f | %.4f \n" % (reg, lamb, area, R2)) f.write("-" * 50 + "\n")
from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import cross_validate, train_test_split, KFold from metrics import gain_chart, prob_acc from sklearn.tree import DecisionTreeClassifier from resampling import Resample X, Y = get_data() Y = Y.flatten() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.5) #r = Resample(X_train, Y_train) #X_train, Y_train = r.Over() clf_rf = RandomForestClassifier(n_estimators=100, max_depth=8, min_samples_split=100) clf_rf.fit(X_train, Y_train) ypred_test = clf_rf.predict_proba(X_test) gain_chart(Y_test, ypred_test) prob_acc(Y_test, ypred_test) clf_dt = DecisionTreeClassifier(max_depth=6, min_samples_split=200) clf_dt.fit(X_train, Y_train) ypred_test = clf_dt.predict_proba(X_test) gain_chart(Y_test, ypred_test) prob_acc(Y_test, ypred_test)
clf_rf = RandomForestClassifier(n_estimators=100, max_depth=depths[i]) clf_dt = DecisionTreeClassifier(max_depth=depths[i]) kf = KFold(n_splits=n_splits) for train, valid in kf.split(X_train): # RANDOM FOREST clf_rf.fit(X_train[train], Y_train[train]) ypred_train = clf_rf.predict_proba(X_train[train]) ypred_test = clf_rf.predict_proba(X_train[valid]) cv_gains_rf[0, i] += gain_chart(Y_train[train], ypred_train, plot=False) cv_gains_rf[1, i] += gain_chart(Y_train[valid], ypred_test, plot=False) cv_probs_rf[0, i] += prob_acc(Y_train[train], ypred_train, plot=False) cv_probs_rf[1, i] += prob_acc(Y_train[valid], ypred_test, plot=False) # REGULAR CLASSIFICATION TREE clf_dt.fit(X_train[train], Y_train[train]) ypred_train = clf_dt.predict_proba(X_train[train]) ypred_test = clf_dt.predict_proba(X_train[valid]) cv_gains_dt[0, i] += gain_chart(Y_train[train], ypred_train, plot=False) cv_gains_dt[1, i] += gain_chart(Y_train[valid], ypred_test, plot=False)
import sys sys.path.append("network/") from NN import NeuralNet from metrics import gain_chart, prob_acc #X, Y = get_data(normalized = False, standardized = False) X, Y = get_data() #with open("nn_arch.txt", "w+") as f: f.write("Activation | Hidden layers | Nodes | Area Test | R2 Test | Error rate \n") f.write("-"*70) for act in ['tanh', 'sigmoid', 'relu']: for size in [5, 10, 20, 50, 100]: for n_lay in [1, 2, 3]: nn = NeuralNet(X, Y.flatten(), nodes = [23] + [size]*n_lay + [2], \ activations = [act]*n_lay + [None], cost_func = 'log') nn.split_data(frac = 0.5, shuffle = True) nn.TrainNN(epochs = 2000, batchSize = 200, eta0 = 0.01, n_print = 100) ypred_test = nn.feed_forward(nn.xTest, isTraining=False) acc = nn.accuracy(nn.yTest, ypred_test) err_rate = 1 - acc/100 area = gain_chart(nn.yTest, ypred_test, plot=False) R2 = prob_acc(nn.yTest, ypred_test, plot=False) f.write("\n %s | %i | %i | %.5f | %.5f | %.3f "\ %(act, n_lay, size, area, R2, err_rate)) f.write("\n" + "-"*70)
from read_data import get_data import sys sys.path.append("network/") from logreg import LogReg from metrics import gain_chart, prob_acc #X, Y = get_data(normalized = False, standardized = False) X, Y = get_data() logreg = LogReg(X, Y) logreg.optimize(m=100, epochs=5000, eta=0.01) #, regularization='l2', lamb=0.0001) ypred_train = logreg.p_train ypred_test = logreg.p_test gain_chart(logreg.Y_test, ypred_test) prob_acc(logreg.Y_test, ypred_test) def regularization_test(): with open("log_regular.txt", "w+") as f: f.write("Regularization | lambda | Area | R2\n") f.write("-" * 50 + "\n") for reg in ['l2', 'l1']: for lamb in [0.0001, 0.001, 0.01, 0.1, 1.0]: logreg = LogReg(X, Y) logreg.optimize(m=100, epochs=5000,