Ejemplo n.º 1
0
def gen_Preds(model):
	df = Prep('test')
	IDs = df.Quote_Id
	X = df.drop(['Quote_Id'], axis=1).values
	prediction = model.predict(X)
	results = pd.DataFrame(data=prediction, columns=['Quote_Flag'])
	results = pd.concat([IDs, results], axis=1)
	results.to_csv("ida_a3_12590941.csv", index=False)
	return
Ejemplo n.º 2
0
def get_meta():
	df = Prep()
	X, y = smoter(df)
	models = ["modelA.joblib", "modelB.joblib", "modelC.joblib", "modelD.joblib"]
	model_list=[]
	for model in models:
		model_list.append(load(model))
	_meta_pred(X, y, model_list)
	return
Ejemplo n.º 3
0
def gen_meta_preds():
	df = Prep('test')
	IDs = df.Quote_Id
	X = df.drop(['Quote_Id'], axis=1).values

	models = ["modelA.joblib", "modelB.joblib", "modelC.joblib", "modelD.joblib"]
	model_list=[]
	
	for model in models:
		model_list.append(load(model))

	meta_res = pd.DataFrame(IDs)
	scaler = StandardScaler()
	scaler.fit(X)
	X = scaler.transform(X)
	for model in model_list:
		meta_res = pd.concat([meta_res, pd.DataFrame(model.predict(X))], axis=1)

	meta = load("meta.joblib")
	prediction = meta.predict(meta_res.drop(["Quote_Id"],axis=1))
	results = pd.DataFrame(data=prediction, columns=['Quote_Flag'])
	results = pd.concat([IDs, results], axis=1)
	results.to_csv("ida_a3_12590941.csv", index=False)
	return
Ejemplo n.º 4
0
def get_TREES():
	df = Prep()
	X, y = smoter(df)
	_TREES(X, y)
	return
Ejemplo n.º 5
0
def get_KNN():
	df = Prep()
	X, y = smoter(df)
	_KNN(X, y)
	return
Ejemplo n.º 6
0
def get_SVM():
	df = Prep()
	X, y = smoter(df)
	_SVM(X, y)
	return
Ejemplo n.º 7
0
from sklearn.neural_network import MLPClassifier as MLP
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.neighbors import (NeighborhoodComponentsAnalysis, KNeighborsClassifier)
# Three random forest methods 
from sklearn.ensemble import RandomForestClassifier as RAND
from sklearn.ensemble import ExtraTreesClassifier as XTRA
from sklearn.tree import DecisionTreeClassifier as DCIS

# Memory optimisation
from tempfile import mkdtemp
from shutil import rmtree
from joblib import (Memory, dump, load)


FIELDS = Prep().drop(columns=['Quote_Flag']).columns.tolist()  # the column names
RANDOM_STATE = None # probably controls seeding idk
CV = 10 # cross validation number, lower is quicker but higher is more robust

################################################################################
# Utility methods
################################################################################
def timeit(method):
	@wraps(method)
	def wrap(*args, **kwargs):
		ts = time.time()
		result = method(*args, **kwargs)
		te =  time.time()
		print('function: {a} took {b:2.4f}s'.format(a=method.__name__.upper(), b=(te-ts)))
		return result
	return wrap
Ejemplo n.º 8
0
def main():
    prep = Prep()

    # Parse Cifar 10 train data
    print "Parsing random patches"
    train_x, train_y = prep.parse_cifar10(loc=DATA_DIR)

    patches_dir = "{}/train_patches.pkl".format(SAVE_DIR)

    if os.path.isfile(patches_dir):
        print ">> already have ", patches_dir
        with open(patches_dir, "r") as f:
            patches = cPickle.load(f)
    else:
        # Extract patches
        patches = prep.random_patches(train_x, w=6, d=3, N=10000)
        # Dump patches
        with open(patches_dir, "wb") as f:
            cPickle.dump(patches, f)

    # Learn KMeans
    print "Learning Kmeans"
    k = 50
    kmeans_dir = "{}/kmeans.pkl".format(SAVE_DIR)

    if os.path.isfile(kmeans_dir):
        print ">> already have ", kmeans_dir
        with open(kmeans_dir, "r") as f:
            kmeans = cPickle.load(f)
    else:
        kmeans = KMeans(n_clusters=k)
        kmeans.fit(patches)
        # Dump KMeans model
        with open(kmeans_dir, "wb") as f:
            cPickle.dump(kmeans, f)
    print ">> # Kmeans.centoid =", k

    # Extract K-means feature (stride = 1)
    print "Extracting Kmeans features : N, n*n*d -> N, (n-w)*(n-w)*K"
    w = 6
    N, d, width, height = train_x.shape
    kmeans_ft_dir = "{}/kmeans_ft.pkl".format(SAVE_DIR)

    if os.path.isfile(kmeans_ft_dir):
        print ">> already have ", kmeans_ft_dir
        with open(kmeans_ft_dir, "r") as f:
            kmeans_ft = cPickle.load(f)
    else:
        kmeans_ft = np.empty((N, k, width - w + 1, height - w + 1))
        for y in range(height - w):
            for x in range(width - w):
                patch = train_x[:, :, y : y + w, x : x + w].reshape(N, w * w * d)
                kmeans_ft[:, :, y, x] = kmeans.transform(patch)
        # Dump Kmeans features
        with open(kmeans_ft_dir, "wb") as f:
            cPickle.dump(kmeans_ft, f)
    print ">>", kmeans_ft.min(), kmeans_ft.max()

    # Pooling
    print "Pooling KMeans features : N, (n-w)*(n-w)*K -> N, (4*K)"
    pool_ft_dir = "{}/pool_ft.pkl".format(SAVE_DIR)

    if os.path.isfile(pool_ft_dir):
        print ">> already have ", pool_ft_dir
        with open(pool_ft_dir, "r") as f:
            pool_ft = cPickle.load(f)
    else:
        pool_ft = np.empty((N, 4 * k))
        dy = int((kmeans_ft.shape[2]) / 2)
        dx = int((kmeans_ft.shape[3]) / 2)
        for i in range(10000):
            for y in range(2):
                for x in range(2):
                    patch = kmeans_ft[i, :, dy * y : dy * (y + 1), dx * x : dx * (x + 1)].reshape((k, dy * dx))
                    pool_ft[i, 2 * y + x : 2 * y + x + k] = patch.mean(axis=1)
        # Dump Kmeans features
        with open(pool_ft_dir, "wb") as f:
            cPickle.dump(pool_ft, f)
    print ">> pool_ft.shape =", pool_ft.shape
    print ">>", pool_ft.min(), pool_ft.max()

    # Ensemble
    print "Learning Classifiers : rbf svm, knn, gnb, rf, ensemble"
    clf_svm_dir = "{}/clf_rbf_svm.pkl".format(SAVE_DIR)
    clf_knn_dir = "{}/clf_knn.pkl".format(SAVE_DIR)
    clf_gnb_dir = "{}/clf_gnb.pkl".format(SAVE_DIR)
    clf_rf_dir = "{}/clf_rf.pkl".format(SAVE_DIR)
    clf_ensemble_dir = "{}/clf_ensemble.pkl".format(SAVE_DIR)

    if os.path.isfile(clf_ensemble_dir):
        print ">> already have", clf_ensemble_dir
        with open(clf_svm_dir, "r") as f:
            clf1 = cPickle.load(f)
        with open(clf_knn_dir, "r") as f:
            clf2 = cPickle.load(f)
        with open(clf_gnb_dir, "r") as f:
            clf3 = cPickle.load(f)
        with open(clf_rf_dir, "r") as f:
            clf4 = cPickle.load(f)
        with open(clf_ensemble_dir, "r") as f:
            eclf = cPickle.load(f)
    else:
        clf1 = svm.SVC(kernel="rbf", C=1)
        clf2 = KNeighborsClassifier(n_neighbors=10)
        clf3 = GaussianNB()
        clf4 = RandomForestClassifier(random_state=1)
        eclf = VotingClassifier(estimators=[("svc", clf1), ("knn", clf2), ("gnb", clf3), ("rf", clf4)], voting="soft")

        clf1.fit(pool_ft, train_y)
        clf2.fit(pool_ft, train_y)
        clf3.fit(pool_ft, train_y)
        clf4.fit(pool_ft, train_y)

        with open(clf_svm_dir, "wb") as f:
            cPickle.dump(clf1, f)
        with open(clf_knn_dir, "wb") as f:
            cPickle.dump(clf2, f)
        with open(clf_gnb_dir, "wb") as f:
            cPickle.dump(clf3, f)
        with open(clf_rf_dir, "wb") as f:
            cPickle.dump(clf4, f)
        with open(clf_ensemble_dir, "wb") as f:
            cPickle.dump(eclf, f)

    for clf, label in zip(
        [clf1, clf2, clf3, clf4, eclf], ["RBF SVM", "KNN", "Gaussian NB", "Random Forest", "Ensemble"]
    ):
        scores = cross_validation.cross_val_score(clf, train_x, train_y, cv=5, scoring="accuracy")
        print ("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))
Ejemplo n.º 9
0
from kivy.config import Config
from kivy.app import App
from kivy.core.window import Window
from prep import Prep

# deny to resize windows
Config.set('graphics', 'width', '800')
Config.set('graphics', 'height', '600')
Config.set('graphics', 'resizable', False)
Config.set('kivy', 'keyboard_mode', 'system')
# save configurations
Config.write()

prep = Prep()


class mainApp(App):
    prep.prep()

    def build(self):
        Window.toggle_fullscreen()
        Window.fullscreen = False
        self.icon = 'res/icons/logo.png'
        self.title = 'QubitLab'
        Window.clearcolor = (1, 1, 1, 1)
        return prep.screen_manager


sample_app = mainApp()
sample_app.run()