l = AllStateDataLoader()


def get_model_filename(type_dataset, objective_letter, real_letters):
    if real_letters == "":
        return os.path.join("model_linearsvc", "model_linearsvc_data_%s_%s_without_real_cascade_with_location_view.pkl" % (type_dataset, objective_letter))
    else:
        return os.path.join("model_linearsvc", "model_linearsvc_data_%s_%s_with_real_%s_cascade_with_location_view.pkl" % (type_dataset, objective_letter, real_letters))

for datasetname in ["2", "3", "4", "all"]:
    # Model D sans rien
    model_filename = get_model_filename(datasetname, "D", "")
    if not os.path.exists(model_filename):
        print("Calcul model %s sur dataset %s (%s)" % ("D", datasetname, model_filename))
        X = l.get_X_train(datasetname, "")
        y = l.get_y(datasetname, "D")
        model = fit_and_save_log(parameters, np.array(X), np.array(y), model_filename)
        
    # Model C avec info D
    model_filename = get_model_filename(datasetname, "C", "D")
    if not os.path.exists(model_filename):
        print("Calcul model %s sur dataset %s (%s)" % ("C", datasetname, model_filename))
        X = l.get_X_train(datasetname, "D")
        y = l.get_y(datasetname, "C")
        model = fit_and_save_log(parameters, np.array(X), np.array(y), model_filename)

    # Model E sans rien
    model_filename = get_model_filename(datasetname, "E", "")
    if not os.path.exists(model_filename):
        print("Calcul model %s sur dataset %s (%s)" % ("E", datasetname, model_filename))
        X = l.get_X_train(datasetname, "")
from sklearn import grid_search
import numpy as np


def score(y_predict, y_real):
    n = float(y_predict.shape[0])

    n_ok = float(np.sum(y_predict == y_real))

    return (n_ok/n)

l = AllStateDataLoader()
p = AllStatePredictor()

# X_2 = l.get_X_train("2", "")
y_2 = l.get_y("2", "ABCDEFG")
y_2_predict = p.predict_cascade("2", "extratrees", "ABCDEFG", kind="train")

# X_3 = l.get_X_train("3", "")
y_3 = l.get_y("3", "ABCDEFG")
y_3_predict = p.predict_cascade("3", "extratrees", "ABCDEFG", kind="train")

# X_4 = l.get_X_train("4", "")
y_4 = l.get_y("4", "ABCDEFG")
y_4_predict = p.predict_cascade("4", "extratrees", "ABCDEFG", kind="train")

# X_all = l.get_X_train("all", "")
y_all = l.get_y("all", "ABCDEFG")
y_all_predict = p.predict_cascade("all", "extratrees", "ABCDEFG", kind="train")

print "score 2   extratrees cascade : %.4f" % (score(y_2, y_2_predict))
            (type_dataset, objective_letter))
    else:
        return os.path.join(
            "model_linearsvc",
            "model_linearsvc_data_%s_%s_with_real_%s_cascade.pkl" %
            (type_dataset, objective_letter, real_letters))


for datasetname in ["2", "3", "4", "all"]:
    # Model D sans rien
    model_filename = get_model_filename(datasetname, "D", "")
    if not os.path.exists(model_filename):
        print("Calcul model %s sur dataset %s (%s)" %
              ("D", datasetname, model_filename))
        X = l.get_X_train(datasetname, "")
        y = l.get_y(datasetname, "D")
        model = fit_and_save_log(parameters, np.array(X), np.array(y),
                                 model_filename)

    # Model C avec info D
    model_filename = get_model_filename(datasetname, "C", "D")
    if not os.path.exists(model_filename):
        print("Calcul model %s sur dataset %s (%s)" %
              ("C", datasetname, model_filename))
        X = l.get_X_train(datasetname, "D")
        y = l.get_y(datasetname, "C")
        model = fit_and_save_log(parameters, np.array(X), np.array(y),
                                 model_filename)

    # Model E sans rien
    model_filename = get_model_filename(datasetname, "E", "")
import sys
sys.path.append("lib")

from AllStateDataLoader import AllStateDataLoader
from sklearn import linear_model
from sklearn import grid_search
import numpy as np

l = AllStateDataLoader()

# Model C sans rien
X_all = l.get_X_train("all", "")
y_all = l.get_y("all", "C")

parameters = {'penalty' : ['l2'], 'C' : np.logspace(-3, 0, 3)}
model_C = grid_search.GridSearchCV(
    linear_model.LogisticRegression(),
    parameters,
    verbose=2
)
model_D.fit(np.array(X_all), np.array(y_all))

# Model D sans rien
X_all = l.get_X_train("all", "")
y_all = l.get_y("all", "D")

parameters = {'penalty' : ['l2'], 'C' : np.logspace(-3, 0, 3)}
model_D = grid_search.GridSearchCV(
    linear_model.LogisticRegression(),
    parameters,
    verbose=2
# y_2_predict = p.predict_simple("2", "logistic", "ABCDEFG", kind="train")

# # X_3 = l.get_X_train("3", "")
# y_3 = l.get_y("3", "ABCDEFG")
# y_3_predict = p.predict_simple("3", "logistic", "ABCDEFG", kind="train")

# # X_all = l.get_X_train("all", "")
# y_all = l.get_y("all", "ABCDEFG")
# y_all_predict = p.predict_simple("all", "logistic", "ABCDEFG", kind="train")

# print "score 2   logistic : %.4f" % (score(y_2, y_2_predict))
# print "score 3   logistic : %.4f" % (score(y_3, y_3_predict))
# print "score all logistic : %.4f" % (score(y_all, y_all_predict))

# X_2 = l.get_X_train("2", "")
y_2 = l.get_y("2", "ABCDEFG")
y_2_predict = p.predict_simple("2", "linearsvc", "ABCDEFG", kind="train")

# X_3 = l.get_X_train("3", "")
y_3 = l.get_y("3", "ABCDEFG")
y_3_predict = p.predict_simple("3", "linearsvc", "ABCDEFG", kind="train")

# X_4 = l.get_X_train("4", "")
y_4 = l.get_y("4", "ABCDEFG")
y_4_predict = p.predict_simple("4", "linearsvc", "ABCDEFG", kind="train")

# X_all = l.get_X_train("all", "")
y_all = l.get_y("all", "ABCDEFG")
y_all_predict = p.predict_simple("all", "linearsvc", "ABCDEFG", kind="train")

print "score 2   linearsvc : %.4f" % (score(y_2, y_2_predict))
import sys
sys.path.append("lib")

from AllStateDataLoader import AllStateDataLoader
from sklearn import linear_model
from sklearn import grid_search
import numpy as np

l = AllStateDataLoader()

# Model C sans rien
X_all = l.get_X_train("all", "")
y_all = l.get_y("all", "C")

parameters = {'penalty': ['l2'], 'C': np.logspace(-3, 0, 3)}
model_C = grid_search.GridSearchCV(linear_model.LogisticRegression(),
                                   parameters,
                                   verbose=2)
model_D.fit(np.array(X_all), np.array(y_all))

# Model D sans rien
X_all = l.get_X_train("all", "")
y_all = l.get_y("all", "D")

parameters = {'penalty': ['l2'], 'C': np.logspace(-3, 0, 3)}
model_D = grid_search.GridSearchCV(linear_model.LogisticRegression(),
                                   parameters,
                                   verbose=2)
model_D.fit(np.array(X_all), np.array(y_all))

# Model C avec D