Esempio n. 1
0
import xgboost as xgb
import MNIST.DataClean as dc
import numpy as np

trainFrame = dc.loadTrainData(describe=False)
trainData = dc.convertPandasDataFrameToNumpyArray(trainFrame)

rf = xgb.XGBClassifier(n_estimators=100, seed=0, max_depth=8,)
evalSet = [(trainData[:2000, 1:], trainData[:2000, 0])]
rf.fit(trainData[:, 1:], trainData[:, 0], eval_set=evalSet, verbose=True)


testFrame = dc.loadTestData()
testData = dc.convertPandasDataFrameToNumpyArray(testFrame)

testX = testData[:, 0:]
#print("Random Forest Accuracy : ", rf.score(trainX, trainY))
print("Beginning prediction now")
yPred = rf.predict(testX,)
print("Prediction complete")

np.savetxt('mnist-xgb.csv', np.c_[range(1,len(yPred)+1),yPred], delimiter=',', header = 'ImageId,Label', comments = '', fmt='%d')
print("Save predictions to file complete")
Esempio n. 2
0
import MNIST.DataClean as dc
import sklearn.ensemble as ensemble
import MLScripts.Helpers as helper
import MLScripts.Metrics as metrics
import csv

train_frame = dc.load_train_data()
train_data = dc.convertPandasDataFrameToNumpyArray(train_frame)
test_frame = dc.load_test_data()
test_data = dc.convertPandasDataFrameToNumpyArray(test_frame)

random_forest = ensemble.RandomForestClassifier(n_estimators=100, max_depth=4, random_state=1)
train_x = train_data[:, 1:]
train_y = train_data[:, 0]

random_forest.fit(train_x, train_y)
cv_score = metrics.crossValidationScore(random_forest, train_x, train_y, cvCount=5)

xTrain, xTest, yTrain, yTest = metrics.traintestSplit(train_x, train_y, randomState=1)
cv_forest = ensemble.RandomForestClassifier(max_depth=4, n_estimators=100, random_state=1)
cv_forest.fit(xTrain, yTrain)
y_predict = cv_forest.predict(xTest)
ta = metrics.trainingAccuracy(yTest, y_predict)
rmse = metrics.rmse(yTest, y_predict)
nrmse = metrics.nrmse(yTest, y_predict)

predictors = dc.getColNames(train_frame)[1:]
kfoldAccuracy = metrics.measureKFoldAccuracy(train_frame, random_forest, predictors, outputClass="label", outputClause="label", kFolds=10)

print("Max Cross Validation Score : ", cv_score.max(), "\nAverage Cross Validation Score : ", cv_score.mean(),
  "\nExtraTreeCLassifier Score : ", random_forest.score(xTrain, yTrain),
Esempio n. 3
0
from keras.layers import Input, merge
from keras.layers.core import Flatten, Dense, Dropout, Activation
from keras.layers.convolutional import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.layers.normalization import BatchNormalization
import keras.utils.np_utils as kutils
import keras.callbacks as callbacks
from keras.utils.visualize_util import plot, model_to_dot

import MNIST.DataClean as dc
import numpy as np

batch_size = 128  # 128
nb_epoch = 200  # 12
img_rows, img_cols = 28, 28

trainData = dc.convertPandasDataFrameToNumpyArray(
    dc.loadTrainData(describe=False))
trainX = trainData[:, 1:].reshape(trainData.shape[0], 1, img_rows, img_cols)
trainX = trainX.astype(float)
trainX /= 255.0

trainY = kutils.to_categorical(trainData[:, 0])
nb_classes = trainY.shape[1]

input_layer = Input(shape=(1, 28, 28), name="input")

#conv 1
conv1 = Convolution2D(96,
                      3,
                      3,
                      activation='relu',
                      init='glorot_uniform',
Esempio n. 4
0
nb_conv = 3

trainPath = r"D:\Users\Yue\PycharmProjects\Kaggle\MNIST\Data\mnist_train.csv"
validationPath = r"D:\Users\Yue\PycharmProjects\Kaggle\MNIST\Data\mnist_test.csv"

trainData = pd.read_csv(trainPath, header=0).values
trainX = trainData[:, 1:].reshape(trainData.shape[0], 1, img_rows, img_cols)
trainX = trainX.astype(float)
trainX /= 255.0

validationData = pd.read_csv(validationPath, header=0).values
validateX = validationData[:, 1:].reshape(validationData.shape[0], 1, img_rows, img_cols)
validateX = validateX.astype(float)
validateX /= 255.0

testData = dc.convertPandasDataFrameToNumpyArray(dc.loadTestData())
testX = testData.reshape(testData.shape[0], 1, 28, 28)
testX = testX.astype(float)
testX /= 255.0

trainY = kutils.to_categorical(trainData[:, 0])
validationY = kutils.to_categorical(validationData[:, 0])

nb_classes = trainY.shape[1]

cnn = models.Sequential()

cnn.add(conv.ZeroPadding2D((1,1), input_shape=(1, 28, 28),))
cnn.add(conv.Convolution2D(nb_filters_1, nb_conv, nb_conv,  activation="relu"))
cnn.add(conv.ZeroPadding2D((1, 1)))
cnn.add(conv.Convolution2D(nb_filters_1, nb_conv, nb_conv, activation="relu"))