def regularization_test():

    with open("log_regular.txt", "w+") as f:

        f.write("Regularization | lambda | Area | R2\n")
        f.write("-" * 50 + "\n")
        for reg in ['l2', 'l1']:
            for lamb in [0.0001, 0.001, 0.01, 0.1, 1.0]:

                logreg = LogReg(X, Y)
                logreg.optimize(m=100,
                                epochs=5000,
                                eta=0.01,
                                regularization=reg,
                                lamb=lamb)

                ypred_train = logreg.p_train
                ypred_test = logreg.p_test
                area = gain_chart(logreg.Y_test, ypred_test, plot=False)
                R2 = prob_acc(logreg.Y_test, ypred_test, plot=False)

                f.write("  %s  |  %g  |  %.4f  |  %.4f  \n" %
                        (reg, lamb, area, R2))

            f.write("-" * 50 + "\n")
Beispiel #2
0
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_validate, train_test_split, KFold
from metrics import gain_chart, prob_acc
from sklearn.tree import DecisionTreeClassifier
from resampling import Resample

X, Y = get_data()
Y = Y.flatten()
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.5)
#r = Resample(X_train, Y_train)
#X_train, Y_train = r.Over()

clf_rf = RandomForestClassifier(n_estimators=100,
                                max_depth=8,
                                min_samples_split=100)

clf_rf.fit(X_train, Y_train)

ypred_test = clf_rf.predict_proba(X_test)

gain_chart(Y_test, ypred_test)
prob_acc(Y_test, ypred_test)

clf_dt = DecisionTreeClassifier(max_depth=6, min_samples_split=200)
clf_dt.fit(X_train, Y_train)

ypred_test = clf_dt.predict_proba(X_test)

gain_chart(Y_test, ypred_test)
prob_acc(Y_test, ypred_test)
    kf = KFold(n_splits=n_splits)

    for train, valid in kf.split(X_train):

        # RANDOM FOREST
        clf_rf.fit(X_train[train], Y_train[train])

        ypred_train = clf_rf.predict_proba(X_train[train])
        ypred_test = clf_rf.predict_proba(X_train[valid])

        cv_gains_rf[0, i] += gain_chart(Y_train[train],
                                        ypred_train,
                                        plot=False)
        cv_gains_rf[1, i] += gain_chart(Y_train[valid], ypred_test, plot=False)
        cv_probs_rf[0, i] += prob_acc(Y_train[train], ypred_train, plot=False)
        cv_probs_rf[1, i] += prob_acc(Y_train[valid], ypred_test, plot=False)

        # REGULAR CLASSIFICATION TREE
        clf_dt.fit(X_train[train], Y_train[train])

        ypred_train = clf_dt.predict_proba(X_train[train])
        ypred_test = clf_dt.predict_proba(X_train[valid])

        cv_gains_dt[0, i] += gain_chart(Y_train[train],
                                        ypred_train,
                                        plot=False)
        cv_gains_dt[1, i] += gain_chart(Y_train[valid], ypred_test, plot=False)
        cv_probs_dt[0, i] += prob_acc(Y_train[train], ypred_train, plot=False)
        cv_probs_dt[1, i] += prob_acc(Y_train[valid], ypred_test, plot=False)
Beispiel #4
0
import sys
sys.path.append("network/")
from NN import NeuralNet
from metrics import gain_chart, prob_acc

#X, Y = get_data(normalized = False, standardized = False)
X, Y = get_data()

#with open("nn_arch.txt", "w+") as f:

    f.write("Activation | Hidden layers | Nodes | Area Test | R2 Test | Error rate \n")
    f.write("-"*70)

    for act in ['tanh', 'sigmoid', 'relu']:
        for size in [5, 10, 20, 50, 100]:
            for n_lay in [1, 2, 3]:
                nn = NeuralNet(X, Y.flatten(), nodes = [23] + [size]*n_lay + [2], \
                    activations = [act]*n_lay + [None], cost_func = 'log')
                nn.split_data(frac = 0.5, shuffle = True)
                nn.TrainNN(epochs = 2000, batchSize = 200, eta0 = 0.01, n_print = 100)

                ypred_test = nn.feed_forward(nn.xTest, isTraining=False)
                acc = nn.accuracy(nn.yTest, ypred_test)
                err_rate = 1 - acc/100
                area = gain_chart(nn.yTest, ypred_test, plot=False)
                R2 = prob_acc(nn.yTest, ypred_test, plot=False)

                f.write("\n  %s  |  %i  |  %i  |  %.5f  |  %.5f  |  %.3f "\
                        %(act, n_lay, size, area, R2, err_rate))
        f.write("\n" + "-"*70)
import sys
sys.path.append("network/")
from logreg import LogReg
from metrics import gain_chart, prob_acc

#X, Y = get_data(normalized = False, standardized = False)
X, Y = get_data()

logreg = LogReg(X, Y)
logreg.optimize(m=100, epochs=5000,
                eta=0.01)  #, regularization='l2', lamb=0.0001)

ypred_train = logreg.p_train
ypred_test = logreg.p_test
gain_chart(logreg.Y_test, ypred_test)
prob_acc(logreg.Y_test, ypred_test)


def regularization_test():

    with open("log_regular.txt", "w+") as f:

        f.write("Regularization | lambda | Area | R2\n")
        f.write("-" * 50 + "\n")
        for reg in ['l2', 'l1']:
            for lamb in [0.0001, 0.001, 0.01, 0.1, 1.0]:

                logreg = LogReg(X, Y)
                logreg.optimize(m=100,
                                epochs=5000,
                                eta=0.01,