def main(): data = pd.read_csv('wine.csv') y = data['class'].values X = data.drop('class', axis=1).values X = normalize(X) #数据标准化 label = ['best', 'better', 'good'] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.4) #将数划分为训练集和测试集,标签也做同样的划分 clf = NaiveBayes() #引用朴素贝叶斯分类器 clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) while accuracy < 0.98: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # 使用PCA将维数降为2并绘制结果 Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=label)
def get_baseline(all=True): years, past_values, values = get_data() train_x, train_y, test_x, test_y = train_test_split(past_values, values) pred = train_x train_score = mean_squared_error(train_y, pred) print('Baseline Training Score: RMSE: %s' % '{:,.0f}'.format(math.sqrt(train_score))) pred = test_x test_score = mean_squared_error(test_y, pred) print('Baseline Test Score: RMSE: %s' % '{:,.0f}'.format(math.sqrt(test_score))) bttscore = 'RMSE: %s/%s' % ('{:,.0f}'.format( math.sqrt(train_score)), '{:,.0f}'.format(math.sqrt(test_score))) if all: plot_y = [i for i in train_y] + [x for x in test_y] plot_pred = [i for i in train_x] + [x for x in test_x] else: plot_y = [None for i in train_y] + [x for x in test_y] plot_pred = [None for i in train_x] + [x for x in test_x] return np.array(plot_y), np.array(plot_pred), np.array(years), bttscore
def oneFit(X, y, activation="relu", hidden_layers=(20, 20), test_size=0.2, loss=False): ''' process of one fit loss: return loss during iteration information if True return accuracy or array of loss ''' X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size) y_label = array2Label(y_test) # transform 2d array into 1d labels # 激活函数使用relu nn = NeuralNetworkClassifier(hidden_layer_sizes=hidden_layers, activation=activation) nn.fit(X_train, y_train) y_pred = nn.predict(X_test) # random guess 0.006 if not loss: return accuracy_score(y_label, y_pred) return nn.getIterLoss()
# Metrics are loss metrics that we want to have available for each epoch, # so we can review how are we doing at each training stage. # mse is mean_squared_error, mpe is mean_absolute_percentage_error model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse','mape']) # Here we're starting our training history=model.fit(train_x, train_y, verbose=2, epochs=epochs, batch_size=batches) return model, name, mparams, history if __name__ == '__main__': # Getting data formatted as a supervised problem years, past_values, values=get_data() X, Y = past_values, values # Split data into two parts: one for training, one for testing # Test part won't be seen by a model during training so it will # give us some idea how our model performs on a unseen data. train_x, train_y, test_x, test_y = train_test_split(X, Y) # Getting our command line parameters name, epochs, batches, plot=get_params() # Do the training model, name, mp, history=train_model(name, train_x, train_y, epochs, batches) # Save models and the training history for later use mname='models/model-%s-%d-%d' % (name, epochs, batches) model.save(mname+'.h5') with open(mname+'-history.pickle', 'wb') as ms: pickle.dump(history.history, ms) print() print('Model and its history saved in %s*' % mname) title='%s (epochs=%d, batch_size=%d)' % (name, epochs, batches) # Test our model on both data that has been seen # (training data set) and unseen (test data set) print('Scores for %s' % title)
import numpy as np import sys from nn import * from tools import train_test_split, one_hot_encoder from viz import plot_history data = np.load('datasets/cifar10.npz') X = data['arr_0'] y = data['arr_1'] X = X.astype(np.float32) / 255. - .5 y, _ = one_hot_encoder(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) np.random.seed(42) m = NeuralNetwork('crossentropy', optimizer=Adam(lr=0.05), verbose=True, verbose_step=1, debug=True, random_state=42) m.add(Flatten()) m.add(Dense(256, 'elu', weights_regularizer=L2Regularizer())) m.add(BatchNorm()) m.add(Dense(10, 'softmax')) # Just to initialize parameters m.fit(X_train, y_train, X_test, y_test, n_epochs=0, batch_size=256,
def test_train_test_split(): X_train, X_test = train_test_split(np.array([0, 1, 2]), random_state=42) assert X_train == np.array([0, 1]) assert X_test == np.array([2])