def train(logpath, modeldir, batch_size=256, epochs=100): modelpath = modeldir + 'model.h5' dictpath = modeldir + 'word_dict.json' for filepath in [logpath, modelpath, dictpath]: check_validity(filepath) check_file(logpath) # load data train_data = get_train_dataset(logpath) # pre-process from autoencoder import AutoEncoder # lazy load pre_processor = Preprocessor(filepath=dictpath) train_sr, time_sr = pre_processor.pre_process(train_data) autoencoder = AutoEncoder(shape=(train_sr.shape[1], train_sr.shape[2]), filepath=modelpath) cluster_model = Cluster(dirpath=modeldir) # train autoencoder.fit(train_sr, batch_size=batch_size, epochs=epochs) train_vector = autoencoder.transfer(train_sr) predict_result, cluster_number, dist_tbl = cluster_model.classify( train_vector) top_index = get_topn_sql(dist_tbl, topn=1) topn_sql = train_data[ top_index][:, -1] # typical SQL template for each cluster cluster_model.get_cluster_info(predict_result, time_sr, cluster_number) print("Train complete!") return cluster_number, topn_sql
def test_autoencoder(): """ Test that all components of the auto-encoder work correctly by executing a training run against generated data. """ input_shape = (3, ) epochs = 1000 # Generate some data x_train = np.random.rand(100, 3) x_test = np.random.rand(30, 3) # Define encoder and decoder model def create_encoder_model(input_shape): model_input = Input(shape=input_shape) encoder = Dense(4)(model_input) encoder = BatchNormalization()(encoder) encoder = Activation(activation='relu')(encoder) return Model(model_input, encoder) def create_decoder_model(embedding_shape): embedding_a = Input(shape=embedding_shape) decoder = Dense(3)(embedding_a) decoder = BatchNormalization()(decoder) decoder = Activation(activation='relu')(decoder) return Model(embedding_a, decoder) # Create auto-encoder network encoder_model = create_encoder_model(input_shape) decoder_model = create_decoder_model(encoder_model.output_shape) autoencoder = AutoEncoder(encoder_model, decoder_model) # Prepare auto-encoder for training autoencoder.compile(loss='binary_crossentropy', optimizer='adam') # Evaluate network before training to establish a baseline score_before = autoencoder.evaluate(x_train, x_train) # Train network autoencoder.fit(x_train, x_train, validation_data=(x_test, x_test), epochs=epochs) # Evaluate network score_after = autoencoder.evaluate(x_train, x_train) # Ensure that the training loss score improved as a result of the training assert (score_before > score_after)
# Convert images to numpy array of right dimensions print("\nConverting training images to numpy array of right dimensions") X_train = np.array(imgs_train).reshape((-1, ) + input_shape_model) print(">>> X_train.shape = " + str(X_train.shape)) print("Number of training images:", len(X_train)) # Create object for train augmentation completeTrainGen = data_augmentation(X_train, args.bs) print("\nStart training...") # Compiling model.compile(loss=args.loss, optimizer="adam") # Fitting model.fit(completeTrainGen, steps_per_epoch, n_epochs=args.e, batch_size=args.bs, wandb=args.wandb) # Saving model.save_models() print("Done training") print("\nCreating embeddings...") E_train = model.predict(X_train) E_train_flatten = E_train.reshape((-1, np.prod(output_shape_model))) # Read images query_map = loader.get_files(QueryDir) query_names, query_paths, imgs_query, query_classes = loader.get_data_paths( query_map)
mnist = tf.keras.datasets.mnist # extract train and val data (x_train, _),(x_val, _) = mnist.load_data() # reshape and normalize in range [0 .. 1] x_train, x_val = x_train.reshape(-1, 28*28) / 255.0, x_val.reshape(-1, 28*28) / 255.0 # init model model = AutoEncoder(z_dim=32) # set loss and optimizer type model.compile(optimizer='adam', loss='mean_squared_error') # train model model.fit(x_train, x_train, batch_size=32, epochs=20, verbose=1, validation_data=(x_val, x_val)) # show some results # =================== PLOTTING ============================ # images per row and col NUM_IMG_PER_ROW = 10 # to store images selected_imgs = [] # pick random indexes to visualize indexes = np.random.random_integers(x_train.shape[0], size=(1,NUM_IMG_PER_ROW**2)) # add to list for i in range(len(indexes)):
# data wrapper iterator = DataIterator(datas) fine_tuning_iterator = DataIterator(datas, labels=labels) # train autoencoder # assume the input dimension is input_d # the network is like input_d -> 4 -> 2 -> 4 -> input_d autoencoder = AutoEncoder() # train autoencoder without fine-tuning print "\ntrain autoencoder without fine-tuning ==========\n" autoencoder.fit([4, 2], iterator, stacked=True, learning_rate=0.02, max_epoch=5000, tied=True, activation="tanh") # encode data (without fine-tuning) encoded_datas = autoencoder.encode(datas) print "encoder (without fine-tuning) ================" print encoded_datas # train autoencoder with fine-tuning print "\ntrain autoencoder with fine-tuning ==========\n" autoencoder.fine_tune(fine_tuning_iterator, supervised=True, learning_rate=0.02, max_epoch=10000,
mnist = input_data.read_data_sets('MNIST_data') train_data, train_labels = mnist.train.images, mnist.train.labels test_data, test_labels = mnist.test.images, mnist.test.labels ''' 训练集和测试集分别包含55000和10000张图片, 每张图片表示为28×28矩阵, 矩阵元素是0-1浮点数, 越接近1则像素点颜色越接近黑色. 每个28×28矩阵被转换为长度为28×28=784的一维数组的形式存储. ''' _, m = train_data.shape autoEncoder = AutoEncoder(m, 256) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) autoEncoder.set_session(sess) loss = autoEncoder.fit(X=train_data, epochs=10) output = autoEncoder.reconstruct(train_data[0:100]) # plot loss's change w.r.t epochs plt.xlabel('epochs') plt.ylabel('loss') plt.plot(loss) # plot original and reconstructed images n_rows, n_cols = 2, 8 # 2行, 一行原始图像, 一行重构图像 idx = np.random.randint(0, 100, n_cols) # idx = np.array([i for i in range(n_cols)]) figure, axes = plt.subplots(n_rows, n_cols, sharex=True, sharey=True, figsize=(10, 5)) for fig, row in zip([train_data, output], axes): for i, ax in zip(idx, row): ax.imshow(fig[i].reshape(28, 28), cmap='Greys_r') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False)
# plt.show() #Ejercicio 1 b train_x_with_noise = add_noise(train_x, 2) test_x_with_noise = add_noise(train_x, 2) ae = AutoEncoder([25, 15], 10, [10, 25], activation='tanh', solver='lbfgs', eta=0.0001, max_iterations=30000, tol=0.0000001, verbose=True) ae.fit(train_x_with_noise, train_x) for i in range(32): prediction_1 = ae.predict(train_x_with_noise[i]) prediction_2 = ae.predict(train_x[i]) prediction_3 = ae.predict(test_x_with_noise[i]) plt.figure() plt.subplot(3, 2, 1) plt.imshow(train_x_with_noise[i].reshape(7, 5), 'gray_r') plt.title("Train Input noise", fontsize=15) plt.xticks([]) plt.yticks([]) plt.subplot(3, 2, 2) plt.imshow(prediction_1.reshape(7, 5), 'gray_r') plt.title('Predicted noise', fontsize=15)
#coding = utf-8 from autoencoder import AutoEncoder, DataIterator # train data datas = [[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]] # data wrapper iterator = DataIterator(datas) # train autoencoder # assume the input dimension is input_d # the network is like input_d -> 4 -> 2 -> 4 -> input_d autoencoder = AutoEncoder() autoencoder.fit([4, 2], iterator, stacked=True, learning_rate=0.1, max_epoch=5000) autoencoder.fine_tune(iterator, learning_rate=0.1, supervised=False) # after training # encode data encoded_datas = autoencoder.encode(datas) print "encoder ================" print encoded_datas # decode data decoded_datas = autoencoder.decode(encoded_datas) print "decoder ================" print decoded_datas
if args.fit: print("Autoencoder ccs item handler has started...") mv = MovieLens() mv.create_cold_start_items(n_ratings_threshold=5) dataloader = DataLoader(mv, batch_size=batch_size, shuffle=True, drop_last=True) model = AutoEncoder(input_dim=21, latent_dim=5) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) AutoEncoder.fit(model, num_epochs, dataloader, criterion, optimizer) for ccs_item in tqdm(mv.ccs_items()): print('ccs item:', ccs_item) while mv.is_ccs(ccs_item): u = mv.pick_random_user() print('user:', u) rated_ncs_items_by_u = mv.rated_ncs_items(u) u_rated_latents = [ model.encode(mv.features(m)) for m in rated_ncs_items_by_u ] ccs_latent = model.encode(mv.features(ccs_item)) cosine_sims = [ cosine(r_latent, ccs_latent) for r_latent in u_rated_latents ]
from autoencoder import AutoEncoder from oct_dataset import build_dataset # Declare the model autoencoder = AutoEncoder() x_train_noisy, x_train = build_dataset() # train the autoencoder model autoencoder.fit(x_train_noisy, x_train, epochs=100000, batch_size=128, shuffle=True, validation_data=(x_test_noisy, x_test), verbose=1) # visaulize decoed denoise image decoded_imgs = autoencoder.predict(x_test) n = 10 plt.figure(figsize=(20, 4)) for i in range(n): # display original ax = plt.subplot(2, n, i + 1) plt.imshow(x_test_noisy[i].reshape(256, 256)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False)
optimizer=keras.optimizers.adam(), metrics=['accuracy']) autoencoder_checkpoint_path = "./autoencoder_checkpoint" autoencoder_callbacks = [ EarlyStopping(monitor='val_acc', patience=10, verbose=0), ModelCheckpoint(autoencoder_checkpoint_path, monitor='val_acc', save_best_only=True, verbose=0) ] autoencoder_network.fit(x_train, x_train, validation_data=(x_test, x_test), batch_size=128, epochs=epochs, callbacks=autoencoder_callbacks) autoencoder_network.load_weights(autoencoder_checkpoint_path) embedding = encoder_model.outputs[-1] y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) # Add softmax layer to the pre-trained embedding network embedding = Dense(num_classes)(embedding) embedding = BatchNormalization()(embedding) embedding = Activation(activation='sigmoid')(embedding) model = Model(encoder_model.inputs[0], embedding)