def fit(self, X, y): """fit the model to data matrix X and target y""" mlp_hl_size = 50 num_classes = np.unique(y).size inputs = X bias_X = self.add_bias(X) # unsupervised training (training auto encoders) for i, layer_size in enumerate(self.hidden_layer_sizes): auto_encoder = AutoEncoder(hidden_layer_size=layer_size) auto_encoder.fit(inputs) weights = auto_encoder.get_coefs() self.coefs_.append(weights[0]) inputs = self.forward(bias_X, self.coefs_) # no bias here # supervised training using MLP classifier mlp = MLPClassifier() mlp.fit(inputs, y) #print "MLP Score:", mlp.score(inputs, y) mlp_coefs = [] for i, coefs in enumerate(mlp.coefs_): new_coefs = np.vstack((coefs, mlp.intercepts_[i])) self.coefs_.append(new_coefs) """
def test_ae(x_train_, y_train_, x_test_, y_test_): """ AE测试 """ rst = [] # 数据展平 x_train_ = x_train_.reshape(x_train_.shape[0], -1) x_test_ = x_test_.reshape(x_test_.shape[0], -1) # 测试数据一半用于svc训练,一半用于测试 x_test_train = x_test_[:5000] y_test_train = y_test_[:5000] x_test_test = x_test_[5000:] y_test_test = y_test_[5000:] # k对应不同降维目标 for k in range(10, 200, 10): # 训练pca ae = AutoEncoder(28 * 28, k, 28 * 28) ae.fit(x_train_, x_train_) # 训练svc分类器 svc = SVC(gamma='scale') svc.fit(ae.encode(x_test_train), y_test_train) # 测试分类器 y_pred = svc.predict(ae.encode(x_test_test)) accuracy = accuracy_score(y_test_test, y_pred) print(accuracy) rst.append(accuracy) return rst
from AutoEncoder import AutoEncoder import numpy as np x = [ [[-1], [1], [1], [1]], [[1], [1], [1], [1]], ] x = np.asarray(x) # Build the auto-encoder auto_encoder = AutoEncoder([3, 2], eta=0.05) auto_encoder.assignX(x) weights = auto_encoder.fit() # Print the parameters for i in range(len(weights)): print weights[i]
f = gzip.open(fname, 'rb') train_set, valid_set, test_set = pickle.load(f, encoding='latin1') f.close() print("Partitioning Data") X, y = train_set X = np.rint(X * 256).astype(np.int).reshape( (-1, 1, 28, 28)) # convert to (0,255) int range (we'll do our own scaling) mu, sigma = np.mean(X.flatten()), np.std(X.flatten()) X_train = X.astype(np.float64) X_train = (X_train - mu) / sigma X_train = X_train.astype(np.float32) X_out = X_train.reshape((X_train.shape[0], -1)) print("Begin Training") epochs = 20 ae = AutoEncoder( update_learning_rate=0.01, update_momentum=0.975, batch_iterator_train=FlipBatchIterator(batch_size=128), regression=True, max_epochs=epochs, verbose=1, ) ae.fit(X_train, X_out, 5) print("Saving Parameters") ae.save_params_to("./data/conv_ae.np") print("Done")
def train(filename, datasetX, datasetY, encoderPath, modelPath, predictionPath): # Split into training and testing set. index = int(len(datasetX) * TRAINING_DATA_PERCENTAGE) trainingX = np.array(datasetX[:index]) trainingY = np.array(datasetY[:index]) testingX = np.array(datasetX[index:]) testingY = np.array(datasetY[index:]) # Remove timestamps from training. trainingY = trainingY.transpose()[1].transpose() # Extract timestamps from testing. testTargetDates = testingY.transpose()[0].transpose() testingY = testingY.transpose()[1].transpose() numberOfInputParameters = len(datasetX[0][0]) inputShape = (LOOKBACK, numberOfInputParameters) if USE_AUTOENCODER: if os.path.isfile(encoderPath): encoder = load_model(encoderPath) else: aec = AutoEncoder(inputShape) encoder = aec.fit(trainingX, testingX) encoder.save(encoderPath) for layer in encoder.layers: layer.trainable = False if os.path.isfile(modelPath): model = load_model(modelPath) else: outputShape = (1) if USE_AUTOENCODER: model = createModelWithEncoder(encoder, outputShape) else: model = createConvModel(inputShape, outputShape) # Fit the model. model.fit(trainingX, trainingY, epochs=150, batch_size=256, validation_data=[testingX, testingY], callbacks=[TensorBoard(log_dir='/tmp/')]) model.save(modelPath) # Evaluate the model. last = len(trainingX[0][0]) - 1 mean = np.load(preprocessPath + filename + "_means.npy")[last] stddev = np.load(preprocessPath + filename + "_stddev.npy")[last] prediction = model.predict(testingX) with open(predictionPath, 'w') as csvfile: trainScores = model.evaluate(trainingX, trainingY) testScores = model.evaluate(testingX, testingY) predictionWriter = csv.writer(csvfile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL) predictionWriter.writerow([ 'Test Loss', str(testScores), '', 'Training Loss', str(trainScores) ]) predictionWriter.writerow( ['Date', 'Prediction', 'Target', 'Prediction/Target']) lines = [] for prediction, target, date in zip(prediction, testingY, testTargetDates): prediction = int(prediction[0] * stddev + mean) target = int(target * stddev + mean) fraction = prediction / target lines += [[ str(date.date()), str(prediction), str(target), str(fraction) ]] lines = sorted(lines) for line in lines: predictionWriter.writerow(line) print(testScores)
train_set, valid_set, test_set = pickle.load(f,encoding='latin1') f.close() print("Partitioning Data") X, y = train_set X = np.rint(X * 256).astype(np.int).reshape((-1, 1, 28, 28)) # convert to (0,255) int range (we'll do our own scaling) mu, sigma = np.mean(X.flatten()), np.std(X.flatten()) X_train = X.astype(np.float64) X_train = (X_train - mu) / sigma X_train = X_train.astype(np.float32) X_out = X_train.reshape((X_train.shape[0], -1)) print("Begin Training") epochs = 20 ae = AutoEncoder( update_learning_rate = 0.01, update_momentum = 0.975, batch_iterator_train=FlipBatchIterator(batch_size=128), regression=True, max_epochs= epochs, verbose=1, ) ae.fit(X_train, X_out,5) print("Saving Parameters") ae.save_params_to("./data/conv_ae.np") print("Done")