Ejemplo n.º 1
0
def main():

    data = pd.read_csv('wine.csv')
    y = data['class'].values

    X = data.drop('class', axis=1).values

    X = normalize(X)  #数据标准化
    label = ['best', 'better', 'good']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.4)  #将数划分为训练集和测试集,标签也做同样的划分

    clf = NaiveBayes()  #引用朴素贝叶斯分类器

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    while accuracy < 0.98:
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.4)

        clf = NaiveBayes()
        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # 使用PCA将维数降为2并绘制结果
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Naive Bayes",
                      accuracy=accuracy,
                      legend_labels=label)
def get_baseline(all=True):
    years, past_values, values = get_data()
    train_x, train_y, test_x, test_y = train_test_split(past_values, values)

    pred = train_x
    train_score = mean_squared_error(train_y, pred)
    print('Baseline Training Score: RMSE: %s' %
          '{:,.0f}'.format(math.sqrt(train_score)))

    pred = test_x
    test_score = mean_squared_error(test_y, pred)
    print('Baseline Test Score: RMSE: %s' %
          '{:,.0f}'.format(math.sqrt(test_score)))

    bttscore = 'RMSE: %s/%s' % ('{:,.0f}'.format(
        math.sqrt(train_score)), '{:,.0f}'.format(math.sqrt(test_score)))

    if all:
        plot_y = [i for i in train_y] + [x for x in test_y]
        plot_pred = [i for i in train_x] + [x for x in test_x]
    else:
        plot_y = [None for i in train_y] + [x for x in test_y]
        plot_pred = [None for i in train_x] + [x for x in test_x]
    return np.array(plot_y), np.array(plot_pred), np.array(years), bttscore
Ejemplo n.º 3
0
def oneFit(X,
           y,
           activation="relu",
           hidden_layers=(20, 20),
           test_size=0.2,
           loss=False):
    '''
    process of one fit
    loss: return loss during iteration information if True
    return accuracy or array of loss
    '''
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_size)
    y_label = array2Label(y_test)  # transform 2d array into 1d labels
    # 激活函数使用relu
    nn = NeuralNetworkClassifier(hidden_layer_sizes=hidden_layers,
                                 activation=activation)
    nn.fit(X_train, y_train)
    y_pred = nn.predict(X_test)
    # random guess 0.006
    if not loss:
        return accuracy_score(y_label, y_pred)
    return nn.getIterLoss()
Ejemplo n.º 4
0
    # Metrics are loss metrics that we want to have available for each epoch,
    # so we can review how are we doing at each training stage.
    # mse is mean_squared_error, mpe is mean_absolute_percentage_error
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse','mape'])
    # Here we're starting our training
    history=model.fit(train_x, train_y, verbose=2, epochs=epochs, batch_size=batches)
    return model, name, mparams, history

if __name__ == '__main__':
    # Getting data formatted as a supervised problem
    years, past_values, values=get_data()
    X, Y = past_values, values
    # Split data into two parts: one for training, one for testing
    # Test part won't be seen by a model during training so it will
    # give us some idea how our model performs on a unseen data.
    train_x, train_y, test_x, test_y = train_test_split(X, Y)
    # Getting our command line parameters
    name, epochs, batches, plot=get_params()
    # Do the training
    model, name, mp, history=train_model(name, train_x, train_y, epochs, batches)
    # Save models and the training history for later use
    mname='models/model-%s-%d-%d' % (name, epochs, batches)
    model.save(mname+'.h5')
    with open(mname+'-history.pickle', 'wb') as ms:
        pickle.dump(history.history, ms)
    print()
    print('Model and its history saved in %s*' % mname)
    title='%s (epochs=%d, batch_size=%d)' % (name, epochs, batches)
    # Test our model on both data that has been seen
    # (training data set) and unseen (test data set)
    print('Scores for %s' % title)
import numpy as np
import sys
from nn import *
from tools import train_test_split, one_hot_encoder
from viz import plot_history

data = np.load('datasets/cifar10.npz')
X = data['arr_0']
y = data['arr_1']
X = X.astype(np.float32) / 255. - .5
y, _ = one_hot_encoder(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

np.random.seed(42)
m = NeuralNetwork('crossentropy',
                  optimizer=Adam(lr=0.05),
                  verbose=True,
                  verbose_step=1,
                  debug=True,
                  random_state=42)
m.add(Flatten())
m.add(Dense(256, 'elu', weights_regularizer=L2Regularizer()))
m.add(BatchNorm())
m.add(Dense(10, 'softmax'))
# Just to initialize parameters
m.fit(X_train,
      y_train,
      X_test,
      y_test,
      n_epochs=0,
      batch_size=256,
def test_train_test_split():
    X_train, X_test = train_test_split(np.array([0, 1, 2]), random_state=42)
    assert X_train == np.array([0, 1])
    assert X_test == np.array([2])