예제 #1
0
from Data import DataExtract

X_train, X_test, Y_train, Y_test = DataExtract.load_minist_csv()

# 测试 LogisticRegression RandomForest xgboost
from sklearn.linear_model import LogisticRegressionCV
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

models = [
    ('LogisticRegression', LogisticRegressionCV(Cs=10,
                                                cv=5)),  # Cs正则约束,越小越强  cv:交叉验证
    ('RandomForest',
     RandomForestClassifier(n_estimators=50)),  # , criterion='gini'
    ('XGBoost',
     XGBClassifier(max_depth=3,
                   n_estimators=100,
                   silent=True,
                   objective='multi:softmax'))
]
for name, model in models:
    model.fit(X_train, Y_train)
    print(name, '训练集正确率:', model.score(X_train, Y_train))
    print(name, '测试集正确率:', model.score(X_test, Y_test))

# LogisticRegression 训练集正确率: 0.9294345238095238
# LogisticRegression 测试集正确率: 0.9158333333333334
# RandomForest 训练集正确率: 1.0
# RandomForest 测试集正确率: 0.8848809523809524
# XGBoost 训练集正确率: 0.9670238095238095
# XGBoost 测试集正确率: 0.9277380952380953
예제 #2
0
import numpy as np
from datetime import datetime
from Data import DataExtract, DataTransform
from Minist.Common.Util import error_rate
from sklearn.utils import shuffle

# keras test error rate 0.036071.
# Elapsted time for keras rmsprop:  0:00:25.107859
# tensorflow test error rate 0.025238.
# Elapsted time for tensorflow rmsprop:  0:08:09.270659

X_train, X_test, Y_train, Y_test = DataExtract.load_minist_csv(
    pca=False)  # pca=False
class_num = 10
Y_train_onehot = DataTransform.y2one_hot(Y_train, class_num=class_num)
Y_test_onehot = DataTransform.y2one_hot(Y_test, class_num=class_num)

N, D = X_train.shape
M = 512
batch_size = 300
epochs = 50

##### keras #####
from keras.models import Sequential
from keras.layers import Dense
# input: N,D  W1: D,M  W2: M,class_num
model = Sequential()
model.add(Dense(units=M, input_shape=(D, ),
                activation='relu'))  # input_dim = D, units为output个数
model.add(Dense(units=class_num))
예제 #3
0
from Data import DataExtract
from sklearn.mixture import GaussianMixture
from Data.DataTransform import purity, DBI

X_train, _, Y_train, _ =  DataExtract.load_minist_csv()


model = GaussianMixture(n_components=10)
model.fit(X_train)
# VxD
M = model.means_
var = model.covariances_
# 根据数据预测各组分的后验概率。
# NxV
R = model.predict_proba(X_train)

# 分类后的
print("Purity:", purity(Y_train, R))
# 2个聚类间std偏差和/聚类均值间距离的比值  越低越好
print("DBI:", DBI(X_train, M, R))