l = AllStateDataLoader() def get_model_filename(type_dataset, objective_letter, real_letters): if real_letters == "": return os.path.join("model_linearsvc", "model_linearsvc_data_%s_%s_without_real_cascade_with_location_view.pkl" % (type_dataset, objective_letter)) else: return os.path.join("model_linearsvc", "model_linearsvc_data_%s_%s_with_real_%s_cascade_with_location_view.pkl" % (type_dataset, objective_letter, real_letters)) for datasetname in ["2", "3", "4", "all"]: # Model D sans rien model_filename = get_model_filename(datasetname, "D", "") if not os.path.exists(model_filename): print("Calcul model %s sur dataset %s (%s)" % ("D", datasetname, model_filename)) X = l.get_X_train(datasetname, "") y = l.get_y(datasetname, "D") model = fit_and_save_log(parameters, np.array(X), np.array(y), model_filename) # Model C avec info D model_filename = get_model_filename(datasetname, "C", "D") if not os.path.exists(model_filename): print("Calcul model %s sur dataset %s (%s)" % ("C", datasetname, model_filename)) X = l.get_X_train(datasetname, "D") y = l.get_y(datasetname, "C") model = fit_and_save_log(parameters, np.array(X), np.array(y), model_filename) # Model E sans rien model_filename = get_model_filename(datasetname, "E", "") if not os.path.exists(model_filename): print("Calcul model %s sur dataset %s (%s)" % ("E", datasetname, model_filename)) X = l.get_X_train(datasetname, "")
from sklearn import grid_search import numpy as np def score(y_predict, y_real): n = float(y_predict.shape[0]) n_ok = float(np.sum(y_predict == y_real)) return (n_ok/n) l = AllStateDataLoader() p = AllStatePredictor() # X_2 = l.get_X_train("2", "") y_2 = l.get_y("2", "ABCDEFG") y_2_predict = p.predict_cascade("2", "extratrees", "ABCDEFG", kind="train") # X_3 = l.get_X_train("3", "") y_3 = l.get_y("3", "ABCDEFG") y_3_predict = p.predict_cascade("3", "extratrees", "ABCDEFG", kind="train") # X_4 = l.get_X_train("4", "") y_4 = l.get_y("4", "ABCDEFG") y_4_predict = p.predict_cascade("4", "extratrees", "ABCDEFG", kind="train") # X_all = l.get_X_train("all", "") y_all = l.get_y("all", "ABCDEFG") y_all_predict = p.predict_cascade("all", "extratrees", "ABCDEFG", kind="train") print "score 2 extratrees cascade : %.4f" % (score(y_2, y_2_predict))
(type_dataset, objective_letter)) else: return os.path.join( "model_linearsvc", "model_linearsvc_data_%s_%s_with_real_%s_cascade.pkl" % (type_dataset, objective_letter, real_letters)) for datasetname in ["2", "3", "4", "all"]: # Model D sans rien model_filename = get_model_filename(datasetname, "D", "") if not os.path.exists(model_filename): print("Calcul model %s sur dataset %s (%s)" % ("D", datasetname, model_filename)) X = l.get_X_train(datasetname, "") y = l.get_y(datasetname, "D") model = fit_and_save_log(parameters, np.array(X), np.array(y), model_filename) # Model C avec info D model_filename = get_model_filename(datasetname, "C", "D") if not os.path.exists(model_filename): print("Calcul model %s sur dataset %s (%s)" % ("C", datasetname, model_filename)) X = l.get_X_train(datasetname, "D") y = l.get_y(datasetname, "C") model = fit_and_save_log(parameters, np.array(X), np.array(y), model_filename) # Model E sans rien model_filename = get_model_filename(datasetname, "E", "")
import sys sys.path.append("lib") from AllStateDataLoader import AllStateDataLoader from sklearn import linear_model from sklearn import grid_search import numpy as np l = AllStateDataLoader() # Model C sans rien X_all = l.get_X_train("all", "") y_all = l.get_y("all", "C") parameters = {'penalty' : ['l2'], 'C' : np.logspace(-3, 0, 3)} model_C = grid_search.GridSearchCV( linear_model.LogisticRegression(), parameters, verbose=2 ) model_D.fit(np.array(X_all), np.array(y_all)) # Model D sans rien X_all = l.get_X_train("all", "") y_all = l.get_y("all", "D") parameters = {'penalty' : ['l2'], 'C' : np.logspace(-3, 0, 3)} model_D = grid_search.GridSearchCV( linear_model.LogisticRegression(), parameters, verbose=2
# y_2_predict = p.predict_simple("2", "logistic", "ABCDEFG", kind="train") # # X_3 = l.get_X_train("3", "") # y_3 = l.get_y("3", "ABCDEFG") # y_3_predict = p.predict_simple("3", "logistic", "ABCDEFG", kind="train") # # X_all = l.get_X_train("all", "") # y_all = l.get_y("all", "ABCDEFG") # y_all_predict = p.predict_simple("all", "logistic", "ABCDEFG", kind="train") # print "score 2 logistic : %.4f" % (score(y_2, y_2_predict)) # print "score 3 logistic : %.4f" % (score(y_3, y_3_predict)) # print "score all logistic : %.4f" % (score(y_all, y_all_predict)) # X_2 = l.get_X_train("2", "") y_2 = l.get_y("2", "ABCDEFG") y_2_predict = p.predict_simple("2", "linearsvc", "ABCDEFG", kind="train") # X_3 = l.get_X_train("3", "") y_3 = l.get_y("3", "ABCDEFG") y_3_predict = p.predict_simple("3", "linearsvc", "ABCDEFG", kind="train") # X_4 = l.get_X_train("4", "") y_4 = l.get_y("4", "ABCDEFG") y_4_predict = p.predict_simple("4", "linearsvc", "ABCDEFG", kind="train") # X_all = l.get_X_train("all", "") y_all = l.get_y("all", "ABCDEFG") y_all_predict = p.predict_simple("all", "linearsvc", "ABCDEFG", kind="train") print "score 2 linearsvc : %.4f" % (score(y_2, y_2_predict))
import sys sys.path.append("lib") from AllStateDataLoader import AllStateDataLoader from sklearn import linear_model from sklearn import grid_search import numpy as np l = AllStateDataLoader() # Model C sans rien X_all = l.get_X_train("all", "") y_all = l.get_y("all", "C") parameters = {'penalty': ['l2'], 'C': np.logspace(-3, 0, 3)} model_C = grid_search.GridSearchCV(linear_model.LogisticRegression(), parameters, verbose=2) model_D.fit(np.array(X_all), np.array(y_all)) # Model D sans rien X_all = l.get_X_train("all", "") y_all = l.get_y("all", "D") parameters = {'penalty': ['l2'], 'C': np.logspace(-3, 0, 3)} model_D = grid_search.GridSearchCV(linear_model.LogisticRegression(), parameters, verbose=2) model_D.fit(np.array(X_all), np.array(y_all)) # Model C avec D