if ver_score[x-1][-1] > score:
                score = ver_score[x-1][-1]
                max_x = x
                max_y = y
                print "X: ", x, ". Y: ", y, ":"
                print "Training Score: ", train_score[x-1][-1], ". Verification Score: ", \
                     ver_score[x-1][-1]
    
#Example params given. Will be decided by optimizing.
def gen_mlp():
    clf = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(7,3), random_state=1, shuffle=True, warm_start=1, verbose=1)
    clf.fit(X_train_2008, Y_train_2008)
    return clf

# Save Model 
mlp = read_make_pkl("saved_objs/mlp.pkl", gen_mlp)

def main(argv):
    try:
        opts, args = getopt.getopt(argv,"ho:",["output="])
    except getopt.GetoptError:
        print 'mlpclassifier.py [-o [2008] [2012] [tune]]'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'mlpclassifer.py [-o [2008] [2012] [tune]]'
            sys.exit()
        elif opt in ("-o", "--output"):
            if (arg == "2008"):
                # DEBUG
                print("mlp.predict(X_test_2008).shape" +
            neighbor = n
            print "Number of neighbors: ", neighbor,
            print "Training Score: ", train_score[-1], ". Verification Score: ", \
                 ver_score[-1]
    return neighbor


#Example params given. Will be decided by optimizing.
def gen_knearest():
    clf = KNeighborsClassifier(n_neighbors=10)
    clf.fit(X_train_2008, Y_train_2008)
    return clf


# Save Model
knearest = read_make_pkl("saved_objs/knearest.pkl", gen_knearest)


def main(argv):
    try:
        opts, args = getopt.getopt(argv, "ho:", ["output="])
    except getopt.GetoptError:
        print 'knearest_neighbor.py [-o [2008] [2012] [tune]]'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'knearest_neighbor.py [-o [2008] [2012] [tune]]'
            sys.exit()
        elif opt in ("-o", "--output"):
            if (arg == "2008"):
                # DEBUG
Example #3
0
import os.path
import numpy as np
import pkl_help
from pkl_help import read_make_pkl
from sklearn import preprocessing
import preprocess_help as ph

def get_csv_data(filename):
    data = np.genfromtxt(filename, delimiter=",")
    return data


train_2008 = read_make_pkl("saved_objs/train_2008.pkl",
                           lambda: get_csv_data("data/train_2008.csv"),
                           compress=True)

test_2008 = read_make_pkl("saved_objs/test_2008.pkl",
                          lambda: get_csv_data("data/test_2008.csv"),
                          compress=True)

test_2012 = read_make_pkl("saved_objs/test_2012.pkl",
                          lambda: get_csv_data("data/test_2012.csv"),
                          compress=True)
##################################################################################
X_train_2008 = read_make_pkl("saved_objs/X_train_2008.pkl",
                           lambda: ph.remove_header_and_normalize_train(train_2008),
                           compress=True)
Y_train_2008 = read_make_pkl("saved_objs/Y_train_2008.pkl",
                           lambda: ph.grab_train_Y(train_2008),
                           compress=True)
X_ver_2008 = read_make_pkl("saved_objs/X_ver_2008.pkl",
Example #4
0
#bst = xgb.train(plst, dtrain, evals=evallist)


# Generate xgb model
def gen_xgb():
    evals = [(X_ver_2008, Y_ver_2008)]
    model = xgb.XGBRegressor()
    model.fit(X_train_2008,
              Y_train_2008,
              eval_set=evals,
              early_stopping_rounds=10,
              verbose=True)
    return model


xgb_model = read_make_pkl("saved_objs/xgb.pkl", gen_xgb)


def main(argv):
    try:
        opts, args = getopt.getopt(argv, "ho:", ["output="])
    except getopt.GetoptError:
        print 'xgb.py [-o [2008] [2012]]'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'xgb.py [-o [2008] [2012]]'
            sys.exit()
        elif opt in ("-o", "--output"):
            if (arg == "2008"):
                preds = round_predictions(xgb_model.predict(X_test_2008))
Example #5
0
# Get training data
X_train_2008 = get_pkl("saved_objs/X_train_2008.pkl")
Y_train_2008 = get_pkl("saved_objs/Y_train_2008.pkl")
X_test_2008 = get_pkl("saved_objs/X_test_2008.pkl")
X_test_2012 = get_pkl("saved_objs/X_test_2012.pkl")


# function to generate lasso model
def gen_lasso():
    model = LassoCV(cv=10, n_jobs=-1, verbose=True)
    model.fit(X_train_2008, Y_train_2008)
    return model


# Save model
lasso = read_make_pkl("saved_objs/lasso.pkl", gen_lasso)


def main(argv):
    try:
        opts, args = getopt.getopt(argv, "ho:", ["output="])
    except getopt.GetoptError:
        print 'lasso.py [-o [2008] [2012]]'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'lasso.py [-o [2008] [2012]]'
            sys.exit()
        elif opt in ("-o", "--output"):
            if (arg == "2008"):
                preds = round_predictions(lasso.predict(X_test_2008))
# Generate xgb model
def gen_xgb():
    evals = [(X_ver_2008, Y_ver_2008)]
    model = xgb.XGBRegressor(n_estimators=100000)
    #cvresult = xgb.cv(model.get_params(), dtrain, nfold=10,
    #       early_stopping_rounds=20)
    #print ("CVRESULT:\n" + str(cvresult))
    model.fit(X_train_2008,
              Y_train_2008,
              eval_set=evals,
              early_stopping_rounds=20,
              verbose=True)
    return model


xgb_model = read_make_pkl("saved_objs/xgb_100000_estimators.pkl", gen_xgb)


def xgb_preds_2008():
    return xgb_model.predict(X_test_2008)


def xgb_preds_2012():
    return xgb_model.predict(X_test_2012)


read_make_pkl("saved_objs/xgb_100k_estimators_preds_2008.pkl", xgb_preds_2008)
read_make_pkl("saved_objs/xgb_100k_estimators_preds_2012.pkl", xgb_preds_2012)


def main(argv):
Example #7
0
#bst = xgb.train(plst, dtrain, evals=evallist)


# Generate xgb model
def gen_xgb():
    evals = [(X_ver_2008, Y_ver_2008)]
    model = xgb.XGBRegressor(n_estimators=1000)
    model.fit(X_train_2008,
              Y_train_2008,
              eval_set=evals,
              early_stopping_rounds=20,
              verbose=True)
    return model


xgb_model = read_make_pkl("saved_objs/xgb_1000_estimators.pkl", gen_xgb)


def main(argv):
    try:
        opts, args = getopt.getopt(argv, "ho:", ["output="])
    except getopt.GetoptError:
        print 'xgb_1000_estimators.py [-o [2008] [2012]]'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'xgb_1000_estimators.py [-o [2008] [2012]]'
            sys.exit()
        elif opt in ("-o", "--output"):
            if (arg == "2008"):
                preds = round_predictions(xgb_model.predict(X_test_2008))
Example #8
0
                print "Estimators: ", estimators, ". Max Depth: ", depth, ":"
                print "Training Score: ", train_score[estimators-1][-1], ". Verification Score: ", \
                     ver_score[estimators-1][-1]
    return (max_depth, max_est)


#Example params given. Will be decided by optimizing.
def gen_adaboost():
    clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                             n_estimators=100)
    clf.fit(X_train_2008, Y_train_2008)
    return clf


# Save Model
adaboost = read_make_pkl("saved_objs/adaboost.pkl", gen_adaboost)


def main(argv):
    try:
        opts, args = getopt.getopt(argv, "ho:", ["output="])
    except getopt.GetoptError:
        print 'adaboost.py [-o [2008] [2012] [tune]]'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'adaboost.py [-o [2008] [2012] [tune]]'
            sys.exit()
        elif opt in ("-o", "--output"):
            if (arg == "2008"):
                # DEBUG
Example #9
0
import os.path
import numpy as np
import pkl_help
from pkl_help import read_make_pkl
from sklearn import svm
import preprocess_help as ph
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC

X_train_2008 = read_make_pkl(
    "saved_objs/X_train_2008.pkl",
    lambda: ph.remove_header_and_normalize_train(train_2008),
    compress=True)
Y_train_2008 = read_make_pkl("saved_objs/Y_train_2008.pkl",
                             lambda: ph.grab_train_Y(train_2008),
                             compress=True)
X_ver_2008 = read_make_pkl(
    "saved_objs/X_ver_2008.pkl",
    lambda: ph.remove_header_and_normalize_ver(train_2008),
    compress=True)
Y_ver_2008 = read_make_pkl("saved_objs/Y_ver_2008.pkl",
                           lambda: ph.grab_ver_Y(train_2008),
                           compress=True)

params = [{
    'kernel': ['rbf'],
    'gamma': [1e-3, 1e-4],
    'C': [1, 10, 100, 1000]
}, {
Example #10
0
    print("d1: " + str(model.predict(X_test_2008).shape))
    print("d2: " + str(model.coef_.shape))
    #
    return model


def lin_reg_modified_predict(model, X):
    preds = model.predict(X).reshape(-1, 1)
    mpreds = round_predictions(preds)
    # Debug
    print("mpreds.shape: " + str(mpreds.shape))
    return mpreds


# Save model
lin_reg_model = read_make_pkl("saved_objs/lin_reg.pkl", gen_lin_reg_model)


def main(argv):
    try:
        opts, args = getopt.getopt(argv, "ho:", ["output="])
    except getopt.GetoptError:
        print 'linear_regression.py [-o [2008] [2012]]'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'linear_regression.py [-o [2008] [2012]]'
            sys.exit()
        elif opt in ("-o", "--output"):
            if (arg == "2008"):
                # DEBUG
Example #11
0
    model = RandomForestClassifier(max_depth=15,
                                   n_estimators=30,
                                   max_features='auto')
    model.fit(X_train_2008, Y_train_2008)
    return model


def rand_forest_modified_predict(model, Y):
    preds = model.predict(Y).reshape(-1, 1)
    # Debug
    print("preds.shape: " + str(preds.shape))
    return preds


# Save Model
rand_forest_model = read_make_pkl("saved_objs/rand_forest.pkl",
                                  gen_rand_forest)


def main(argv):
    try:
        opts, args = getopt.getopt(argv, "ho:", ["output="])
    except getopt.GetoptError:
        print 'random_forest.py [-o [2008] [2012] [graphs]]'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'random_forest.py [-o [2008] [2012] [graphs]]'
            sys.exit()
        elif opt in ("-o", "--output"):
            if (arg == "2008"):
                # DEBUG
Example #12
0
# Get training data
X_train_2008 = get_pkl("saved_objs/X_train_2008.pkl")
Y_train_2008 = get_pkl("saved_objs/Y_train_2008.pkl")
X_test_2008 = get_pkl("saved_objs/X_test_2008.pkl")
X_test_2012 = get_pkl("saved_objs/X_test_2012.pkl")


# function to generate ridge model
def gen_ridge():
    model = RidgeCV()
    model.fit(X_train_2008, Y_train_2008)
    return model


# Save model
ridge = read_make_pkl("saved_objs/ridge.pkl", gen_ridge)


def main(argv):
    try:
        opts, args = getopt.getopt(argv, "ho:", ["output="])
    except getopt.GetoptError:
        print 'ridge.py [-o [2008] [2012]]'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'ridge.py [-o [2008] [2012]]'
            sys.exit()
        elif opt in ("-o", "--output"):
            if (arg == "2008"):
                preds = round_predictions(ridge.predict(X_test_2008))
Example #13
0
def gen_voting():
    print "Making Voting Classifier"
    clf = VotingClassifier(estimators=[('ridge', ridge), ('mlp', mlp),
                                       ('rand_forest', rand_forest),
                                       ('adaboost', adaboost)],
                           voting='hard')
    print "Training Voting Classifier"
    clf.fit(X_train_2008, Y_train_2008)
    print "Scoring Voting Classifier"
    print "Training Score: ", clf.score(X_train_2008, Y_train_2008)
    print "Ver Score: ", clf.score(X_ver, Y_ver)
    return clf


# Save Model
voting = read_make_pkl("saved_objs/voting.pkl", gen_voting)


def main(argv):
    try:
        opts, args = getopt.getopt(argv, "ho:", ["output="])
    except getopt.GetoptError:
        print 'voting.py [-o [2008] [2012]]'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'voting.py [-o [2008] [2012]]'
            sys.exit()
        elif opt in ("-o", "--output"):
            if (arg == "2008"):
                # DEBUG