def gen_Preds(model): df = Prep('test') IDs = df.Quote_Id X = df.drop(['Quote_Id'], axis=1).values prediction = model.predict(X) results = pd.DataFrame(data=prediction, columns=['Quote_Flag']) results = pd.concat([IDs, results], axis=1) results.to_csv("ida_a3_12590941.csv", index=False) return
def get_meta(): df = Prep() X, y = smoter(df) models = ["modelA.joblib", "modelB.joblib", "modelC.joblib", "modelD.joblib"] model_list=[] for model in models: model_list.append(load(model)) _meta_pred(X, y, model_list) return
def gen_meta_preds(): df = Prep('test') IDs = df.Quote_Id X = df.drop(['Quote_Id'], axis=1).values models = ["modelA.joblib", "modelB.joblib", "modelC.joblib", "modelD.joblib"] model_list=[] for model in models: model_list.append(load(model)) meta_res = pd.DataFrame(IDs) scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) for model in model_list: meta_res = pd.concat([meta_res, pd.DataFrame(model.predict(X))], axis=1) meta = load("meta.joblib") prediction = meta.predict(meta_res.drop(["Quote_Id"],axis=1)) results = pd.DataFrame(data=prediction, columns=['Quote_Flag']) results = pd.concat([IDs, results], axis=1) results.to_csv("ida_a3_12590941.csv", index=False) return
def get_TREES(): df = Prep() X, y = smoter(df) _TREES(X, y) return
def get_KNN(): df = Prep() X, y = smoter(df) _KNN(X, y) return
def get_SVM(): df = Prep() X, y = smoter(df) _SVM(X, y) return
from sklearn.neural_network import MLPClassifier as MLP from sklearn import svm from sklearn.decomposition import PCA from sklearn.neighbors import (NeighborhoodComponentsAnalysis, KNeighborsClassifier) # Three random forest methods from sklearn.ensemble import RandomForestClassifier as RAND from sklearn.ensemble import ExtraTreesClassifier as XTRA from sklearn.tree import DecisionTreeClassifier as DCIS # Memory optimisation from tempfile import mkdtemp from shutil import rmtree from joblib import (Memory, dump, load) FIELDS = Prep().drop(columns=['Quote_Flag']).columns.tolist() # the column names RANDOM_STATE = None # probably controls seeding idk CV = 10 # cross validation number, lower is quicker but higher is more robust ################################################################################ # Utility methods ################################################################################ def timeit(method): @wraps(method) def wrap(*args, **kwargs): ts = time.time() result = method(*args, **kwargs) te = time.time() print('function: {a} took {b:2.4f}s'.format(a=method.__name__.upper(), b=(te-ts))) return result return wrap
def main(): prep = Prep() # Parse Cifar 10 train data print "Parsing random patches" train_x, train_y = prep.parse_cifar10(loc=DATA_DIR) patches_dir = "{}/train_patches.pkl".format(SAVE_DIR) if os.path.isfile(patches_dir): print ">> already have ", patches_dir with open(patches_dir, "r") as f: patches = cPickle.load(f) else: # Extract patches patches = prep.random_patches(train_x, w=6, d=3, N=10000) # Dump patches with open(patches_dir, "wb") as f: cPickle.dump(patches, f) # Learn KMeans print "Learning Kmeans" k = 50 kmeans_dir = "{}/kmeans.pkl".format(SAVE_DIR) if os.path.isfile(kmeans_dir): print ">> already have ", kmeans_dir with open(kmeans_dir, "r") as f: kmeans = cPickle.load(f) else: kmeans = KMeans(n_clusters=k) kmeans.fit(patches) # Dump KMeans model with open(kmeans_dir, "wb") as f: cPickle.dump(kmeans, f) print ">> # Kmeans.centoid =", k # Extract K-means feature (stride = 1) print "Extracting Kmeans features : N, n*n*d -> N, (n-w)*(n-w)*K" w = 6 N, d, width, height = train_x.shape kmeans_ft_dir = "{}/kmeans_ft.pkl".format(SAVE_DIR) if os.path.isfile(kmeans_ft_dir): print ">> already have ", kmeans_ft_dir with open(kmeans_ft_dir, "r") as f: kmeans_ft = cPickle.load(f) else: kmeans_ft = np.empty((N, k, width - w + 1, height - w + 1)) for y in range(height - w): for x in range(width - w): patch = train_x[:, :, y : y + w, x : x + w].reshape(N, w * w * d) kmeans_ft[:, :, y, x] = kmeans.transform(patch) # Dump Kmeans features with open(kmeans_ft_dir, "wb") as f: cPickle.dump(kmeans_ft, f) print ">>", kmeans_ft.min(), kmeans_ft.max() # Pooling print "Pooling KMeans features : N, (n-w)*(n-w)*K -> N, (4*K)" pool_ft_dir = "{}/pool_ft.pkl".format(SAVE_DIR) if os.path.isfile(pool_ft_dir): print ">> already have ", pool_ft_dir with open(pool_ft_dir, "r") as f: pool_ft = cPickle.load(f) else: pool_ft = np.empty((N, 4 * k)) dy = int((kmeans_ft.shape[2]) / 2) dx = int((kmeans_ft.shape[3]) / 2) for i in range(10000): for y in range(2): for x in range(2): patch = kmeans_ft[i, :, dy * y : dy * (y + 1), dx * x : dx * (x + 1)].reshape((k, dy * dx)) pool_ft[i, 2 * y + x : 2 * y + x + k] = patch.mean(axis=1) # Dump Kmeans features with open(pool_ft_dir, "wb") as f: cPickle.dump(pool_ft, f) print ">> pool_ft.shape =", pool_ft.shape print ">>", pool_ft.min(), pool_ft.max() # Ensemble print "Learning Classifiers : rbf svm, knn, gnb, rf, ensemble" clf_svm_dir = "{}/clf_rbf_svm.pkl".format(SAVE_DIR) clf_knn_dir = "{}/clf_knn.pkl".format(SAVE_DIR) clf_gnb_dir = "{}/clf_gnb.pkl".format(SAVE_DIR) clf_rf_dir = "{}/clf_rf.pkl".format(SAVE_DIR) clf_ensemble_dir = "{}/clf_ensemble.pkl".format(SAVE_DIR) if os.path.isfile(clf_ensemble_dir): print ">> already have", clf_ensemble_dir with open(clf_svm_dir, "r") as f: clf1 = cPickle.load(f) with open(clf_knn_dir, "r") as f: clf2 = cPickle.load(f) with open(clf_gnb_dir, "r") as f: clf3 = cPickle.load(f) with open(clf_rf_dir, "r") as f: clf4 = cPickle.load(f) with open(clf_ensemble_dir, "r") as f: eclf = cPickle.load(f) else: clf1 = svm.SVC(kernel="rbf", C=1) clf2 = KNeighborsClassifier(n_neighbors=10) clf3 = GaussianNB() clf4 = RandomForestClassifier(random_state=1) eclf = VotingClassifier(estimators=[("svc", clf1), ("knn", clf2), ("gnb", clf3), ("rf", clf4)], voting="soft") clf1.fit(pool_ft, train_y) clf2.fit(pool_ft, train_y) clf3.fit(pool_ft, train_y) clf4.fit(pool_ft, train_y) with open(clf_svm_dir, "wb") as f: cPickle.dump(clf1, f) with open(clf_knn_dir, "wb") as f: cPickle.dump(clf2, f) with open(clf_gnb_dir, "wb") as f: cPickle.dump(clf3, f) with open(clf_rf_dir, "wb") as f: cPickle.dump(clf4, f) with open(clf_ensemble_dir, "wb") as f: cPickle.dump(eclf, f) for clf, label in zip( [clf1, clf2, clf3, clf4, eclf], ["RBF SVM", "KNN", "Gaussian NB", "Random Forest", "Ensemble"] ): scores = cross_validation.cross_val_score(clf, train_x, train_y, cv=5, scoring="accuracy") print ("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))
from kivy.config import Config from kivy.app import App from kivy.core.window import Window from prep import Prep # deny to resize windows Config.set('graphics', 'width', '800') Config.set('graphics', 'height', '600') Config.set('graphics', 'resizable', False) Config.set('kivy', 'keyboard_mode', 'system') # save configurations Config.write() prep = Prep() class mainApp(App): prep.prep() def build(self): Window.toggle_fullscreen() Window.fullscreen = False self.icon = 'res/icons/logo.png' self.title = 'QubitLab' Window.clearcolor = (1, 1, 1, 1) return prep.screen_manager sample_app = mainApp() sample_app.run()