import xgboost as xgb import MNIST.DataClean as dc import numpy as np trainFrame = dc.loadTrainData(describe=False) trainData = dc.convertPandasDataFrameToNumpyArray(trainFrame) rf = xgb.XGBClassifier(n_estimators=100, seed=0, max_depth=8,) evalSet = [(trainData[:2000, 1:], trainData[:2000, 0])] rf.fit(trainData[:, 1:], trainData[:, 0], eval_set=evalSet, verbose=True) testFrame = dc.loadTestData() testData = dc.convertPandasDataFrameToNumpyArray(testFrame) testX = testData[:, 0:] #print("Random Forest Accuracy : ", rf.score(trainX, trainY)) print("Beginning prediction now") yPred = rf.predict(testX,) print("Prediction complete") np.savetxt('mnist-xgb.csv', np.c_[range(1,len(yPred)+1),yPred], delimiter=',', header = 'ImageId,Label', comments = '', fmt='%d') print("Save predictions to file complete")
import MNIST.DataClean as dc import sklearn.ensemble as ensemble import MLScripts.Helpers as helper import MLScripts.Metrics as metrics import csv train_frame = dc.load_train_data() train_data = dc.convertPandasDataFrameToNumpyArray(train_frame) test_frame = dc.load_test_data() test_data = dc.convertPandasDataFrameToNumpyArray(test_frame) random_forest = ensemble.RandomForestClassifier(n_estimators=100, max_depth=4, random_state=1) train_x = train_data[:, 1:] train_y = train_data[:, 0] random_forest.fit(train_x, train_y) cv_score = metrics.crossValidationScore(random_forest, train_x, train_y, cvCount=5) xTrain, xTest, yTrain, yTest = metrics.traintestSplit(train_x, train_y, randomState=1) cv_forest = ensemble.RandomForestClassifier(max_depth=4, n_estimators=100, random_state=1) cv_forest.fit(xTrain, yTrain) y_predict = cv_forest.predict(xTest) ta = metrics.trainingAccuracy(yTest, y_predict) rmse = metrics.rmse(yTest, y_predict) nrmse = metrics.nrmse(yTest, y_predict) predictors = dc.getColNames(train_frame)[1:] kfoldAccuracy = metrics.measureKFoldAccuracy(train_frame, random_forest, predictors, outputClass="label", outputClause="label", kFolds=10) print("Max Cross Validation Score : ", cv_score.max(), "\nAverage Cross Validation Score : ", cv_score.mean(), "\nExtraTreeCLassifier Score : ", random_forest.score(xTrain, yTrain),
from keras.layers import Input, merge from keras.layers.core import Flatten, Dense, Dropout, Activation from keras.layers.convolutional import Convolution2D, MaxPooling2D, AveragePooling2D from keras.layers.normalization import BatchNormalization import keras.utils.np_utils as kutils import keras.callbacks as callbacks from keras.utils.visualize_util import plot, model_to_dot import MNIST.DataClean as dc import numpy as np batch_size = 128 # 128 nb_epoch = 200 # 12 img_rows, img_cols = 28, 28 trainData = dc.convertPandasDataFrameToNumpyArray( dc.loadTrainData(describe=False)) trainX = trainData[:, 1:].reshape(trainData.shape[0], 1, img_rows, img_cols) trainX = trainX.astype(float) trainX /= 255.0 trainY = kutils.to_categorical(trainData[:, 0]) nb_classes = trainY.shape[1] input_layer = Input(shape=(1, 28, 28), name="input") #conv 1 conv1 = Convolution2D(96, 3, 3, activation='relu', init='glorot_uniform',
nb_conv = 3 trainPath = r"D:\Users\Yue\PycharmProjects\Kaggle\MNIST\Data\mnist_train.csv" validationPath = r"D:\Users\Yue\PycharmProjects\Kaggle\MNIST\Data\mnist_test.csv" trainData = pd.read_csv(trainPath, header=0).values trainX = trainData[:, 1:].reshape(trainData.shape[0], 1, img_rows, img_cols) trainX = trainX.astype(float) trainX /= 255.0 validationData = pd.read_csv(validationPath, header=0).values validateX = validationData[:, 1:].reshape(validationData.shape[0], 1, img_rows, img_cols) validateX = validateX.astype(float) validateX /= 255.0 testData = dc.convertPandasDataFrameToNumpyArray(dc.loadTestData()) testX = testData.reshape(testData.shape[0], 1, 28, 28) testX = testX.astype(float) testX /= 255.0 trainY = kutils.to_categorical(trainData[:, 0]) validationY = kutils.to_categorical(validationData[:, 0]) nb_classes = trainY.shape[1] cnn = models.Sequential() cnn.add(conv.ZeroPadding2D((1,1), input_shape=(1, 28, 28),)) cnn.add(conv.Convolution2D(nb_filters_1, nb_conv, nb_conv, activation="relu")) cnn.add(conv.ZeroPadding2D((1, 1))) cnn.add(conv.Convolution2D(nb_filters_1, nb_conv, nb_conv, activation="relu"))