def fit(self, X, y):
        """fit the model to data matrix X and target y"""
        mlp_hl_size = 50
        num_classes = np.unique(y).size
        inputs = X
        bias_X = self.add_bias(X)

        # unsupervised training (training auto encoders)
        for i, layer_size in enumerate(self.hidden_layer_sizes):
            auto_encoder = AutoEncoder(hidden_layer_size=layer_size)
            auto_encoder.fit(inputs)
            weights = auto_encoder.get_coefs()
            self.coefs_.append(weights[0])
            inputs = self.forward(bias_X, self.coefs_)  # no bias here

        # supervised training using MLP classifier
        mlp = MLPClassifier()
        mlp.fit(inputs, y)
        #print "MLP Score:", mlp.score(inputs, y)

        mlp_coefs = []
        for i, coefs in enumerate(mlp.coefs_):
            new_coefs = np.vstack((coefs, mlp.intercepts_[i]))
            self.coefs_.append(new_coefs)
        """
Beispiel #2
0
def test_ae(x_train_, y_train_, x_test_, y_test_):
    """
    AE测试
    """
    rst = []
    # 数据展平
    x_train_ = x_train_.reshape(x_train_.shape[0], -1)
    x_test_ = x_test_.reshape(x_test_.shape[0], -1)
    # 测试数据一半用于svc训练,一半用于测试
    x_test_train = x_test_[:5000]
    y_test_train = y_test_[:5000]
    x_test_test = x_test_[5000:]
    y_test_test = y_test_[5000:]
    # k对应不同降维目标
    for k in range(10, 200, 10):
        # 训练pca
        ae = AutoEncoder(28 * 28, k, 28 * 28)
        ae.fit(x_train_, x_train_)
        # 训练svc分类器
        svc = SVC(gamma='scale')
        svc.fit(ae.encode(x_test_train), y_test_train)
        # 测试分类器
        y_pred = svc.predict(ae.encode(x_test_test))
        accuracy = accuracy_score(y_test_test, y_pred)
        print(accuracy)
        rst.append(accuracy)
    return rst
Beispiel #3
0
from AutoEncoder import AutoEncoder
import numpy as np

x = [
    [[-1], [1], [1], [1]],
    [[1], [1], [1], [1]],
]
x = np.asarray(x)

# Build the auto-encoder
auto_encoder = AutoEncoder([3, 2], eta=0.05)
auto_encoder.assignX(x)
weights = auto_encoder.fit()

# Print the parameters
for i in range(len(weights)):
    print weights[i]
f = gzip.open(fname, 'rb')
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
f.close()

print("Partitioning Data")
X, y = train_set
X = np.rint(X * 256).astype(np.int).reshape(
    (-1, 1, 28, 28))  # convert to (0,255) int range (we'll do our own scaling)
mu, sigma = np.mean(X.flatten()), np.std(X.flatten())
X_train = X.astype(np.float64)
X_train = (X_train - mu) / sigma
X_train = X_train.astype(np.float32)
X_out = X_train.reshape((X_train.shape[0], -1))

print("Begin Training")
epochs = 20
ae = AutoEncoder(
    update_learning_rate=0.01,
    update_momentum=0.975,
    batch_iterator_train=FlipBatchIterator(batch_size=128),
    regression=True,
    max_epochs=epochs,
    verbose=1,
)
ae.fit(X_train, X_out, 5)

print("Saving Parameters")
ae.save_params_to("./data/conv_ae.np")

print("Done")
Beispiel #5
0
def train(filename, datasetX, datasetY, encoderPath, modelPath,
          predictionPath):
    # Split into training and testing set.
    index = int(len(datasetX) * TRAINING_DATA_PERCENTAGE)
    trainingX = np.array(datasetX[:index])
    trainingY = np.array(datasetY[:index])
    testingX = np.array(datasetX[index:])
    testingY = np.array(datasetY[index:])

    # Remove timestamps from training.
    trainingY = trainingY.transpose()[1].transpose()
    # Extract timestamps from testing.
    testTargetDates = testingY.transpose()[0].transpose()
    testingY = testingY.transpose()[1].transpose()

    numberOfInputParameters = len(datasetX[0][0])
    inputShape = (LOOKBACK, numberOfInputParameters)

    if USE_AUTOENCODER:
        if os.path.isfile(encoderPath):
            encoder = load_model(encoderPath)
        else:
            aec = AutoEncoder(inputShape)
            encoder = aec.fit(trainingX, testingX)
            encoder.save(encoderPath)

        for layer in encoder.layers:
            layer.trainable = False

    if os.path.isfile(modelPath):
        model = load_model(modelPath)
    else:
        outputShape = (1)
        if USE_AUTOENCODER:
            model = createModelWithEncoder(encoder, outputShape)
        else:
            model = createConvModel(inputShape, outputShape)

        # Fit the model.
        model.fit(trainingX,
                  trainingY,
                  epochs=150,
                  batch_size=256,
                  validation_data=[testingX, testingY],
                  callbacks=[TensorBoard(log_dir='/tmp/')])
        model.save(modelPath)

    # Evaluate the model.
    last = len(trainingX[0][0]) - 1
    mean = np.load(preprocessPath + filename + "_means.npy")[last]
    stddev = np.load(preprocessPath + filename + "_stddev.npy")[last]

    prediction = model.predict(testingX)

    with open(predictionPath, 'w') as csvfile:
        trainScores = model.evaluate(trainingX, trainingY)
        testScores = model.evaluate(testingX, testingY)
        predictionWriter = csv.writer(csvfile,
                                      delimiter=';',
                                      quotechar='|',
                                      quoting=csv.QUOTE_MINIMAL)
        predictionWriter.writerow([
            'Test Loss',
            str(testScores), '', 'Training Loss',
            str(trainScores)
        ])
        predictionWriter.writerow(
            ['Date', 'Prediction', 'Target', 'Prediction/Target'])
        lines = []
        for prediction, target, date in zip(prediction, testingY,
                                            testTargetDates):
            prediction = int(prediction[0] * stddev + mean)
            target = int(target * stddev + mean)
            fraction = prediction / target
            lines += [[
                str(date.date()),
                str(prediction),
                str(target),
                str(fraction)
            ]]

        lines = sorted(lines)

        for line in lines:
            predictionWriter.writerow(line)

        print(testScores)
train_set, valid_set, test_set = pickle.load(f,encoding='latin1')
f.close()

print("Partitioning Data")
X, y = train_set
X = np.rint(X * 256).astype(np.int).reshape((-1, 1, 28, 28))  # convert to (0,255) int range (we'll do our own scaling)
mu, sigma = np.mean(X.flatten()), np.std(X.flatten())
X_train = X.astype(np.float64)
X_train = (X_train - mu) / sigma
X_train = X_train.astype(np.float32)
X_out = X_train.reshape((X_train.shape[0], -1))


print("Begin Training")
epochs = 20
ae = AutoEncoder(
    update_learning_rate = 0.01,
    update_momentum = 0.975,
    batch_iterator_train=FlipBatchIterator(batch_size=128),
    regression=True,
    max_epochs= epochs,
    verbose=1,
    )
ae.fit(X_train, X_out,5)

print("Saving Parameters")
ae.save_params_to("./data/conv_ae.np")


print("Done")