Example #1
0
def train(logpath, modeldir, batch_size=256, epochs=100):
    modelpath = modeldir + 'model.h5'
    dictpath = modeldir + 'word_dict.json'
    for filepath in [logpath, modelpath, dictpath]:
        check_validity(filepath)
    check_file(logpath)
    # load data
    train_data = get_train_dataset(logpath)

    # pre-process
    from autoencoder import AutoEncoder  # lazy load

    pre_processor = Preprocessor(filepath=dictpath)
    train_sr, time_sr = pre_processor.pre_process(train_data)
    autoencoder = AutoEncoder(shape=(train_sr.shape[1], train_sr.shape[2]),
                              filepath=modelpath)
    cluster_model = Cluster(dirpath=modeldir)

    # train
    autoencoder.fit(train_sr, batch_size=batch_size, epochs=epochs)
    train_vector = autoencoder.transfer(train_sr)
    predict_result, cluster_number, dist_tbl = cluster_model.classify(
        train_vector)
    top_index = get_topn_sql(dist_tbl, topn=1)
    topn_sql = train_data[
        top_index][:, -1]  # typical SQL template for each cluster
    cluster_model.get_cluster_info(predict_result, time_sr, cluster_number)
    print("Train complete!")
    return cluster_number, topn_sql
def test_autoencoder():
    """
    Test that all components of the auto-encoder work correctly by executing a
    training run against generated data.
    """

    input_shape = (3, )
    epochs = 1000

    # Generate some data
    x_train = np.random.rand(100, 3)
    x_test = np.random.rand(30, 3)

    # Define encoder and decoder model
    def create_encoder_model(input_shape):
        model_input = Input(shape=input_shape)

        encoder = Dense(4)(model_input)
        encoder = BatchNormalization()(encoder)
        encoder = Activation(activation='relu')(encoder)

        return Model(model_input, encoder)

    def create_decoder_model(embedding_shape):
        embedding_a = Input(shape=embedding_shape)

        decoder = Dense(3)(embedding_a)
        decoder = BatchNormalization()(decoder)
        decoder = Activation(activation='relu')(decoder)

        return Model(embedding_a, decoder)

    # Create auto-encoder network
    encoder_model = create_encoder_model(input_shape)
    decoder_model = create_decoder_model(encoder_model.output_shape)
    autoencoder = AutoEncoder(encoder_model, decoder_model)

    # Prepare auto-encoder for training
    autoencoder.compile(loss='binary_crossentropy', optimizer='adam')

    # Evaluate network before training to establish a baseline
    score_before = autoencoder.evaluate(x_train, x_train)

    # Train network
    autoencoder.fit(x_train,
                    x_train,
                    validation_data=(x_test, x_test),
                    epochs=epochs)

    # Evaluate network
    score_after = autoencoder.evaluate(x_train, x_train)

    # Ensure that the training loss score improved as a result of the training
    assert (score_before > score_after)
Example #3
0
    # Convert images to numpy array of right dimensions
    print("\nConverting training images to numpy array of right dimensions")
    X_train = np.array(imgs_train).reshape((-1, ) + input_shape_model)
    print(">>> X_train.shape = " + str(X_train.shape))
    print("Number of training images:", len(X_train))
    # Create object for train augmentation
    completeTrainGen = data_augmentation(X_train, args.bs)
    print("\nStart training...")

    # Compiling
    model.compile(loss=args.loss, optimizer="adam")

    # Fitting
    model.fit(completeTrainGen,
              steps_per_epoch,
              n_epochs=args.e,
              batch_size=args.bs,
              wandb=args.wandb)

    # Saving
    model.save_models()
    print("Done training")

    print("\nCreating embeddings...")
    E_train = model.predict(X_train)
    E_train_flatten = E_train.reshape((-1, np.prod(output_shape_model)))

# Read images
query_map = loader.get_files(QueryDir)
query_names, query_paths, imgs_query, query_classes = loader.get_data_paths(
    query_map)
Example #4
0
    mnist = tf.keras.datasets.mnist

    # extract train and val data
    (x_train, _),(x_val, _) = mnist.load_data()

    # reshape and normalize in range [0 .. 1]
    x_train, x_val = x_train.reshape(-1, 28*28) / 255.0, x_val.reshape(-1, 28*28) / 255.0

    # init model
    model = AutoEncoder(z_dim=32)

    # set loss and optimizer type
    model.compile(optimizer='adam', loss='mean_squared_error')

    # train model
    model.fit(x_train, x_train, batch_size=32, epochs=20, verbose=1, validation_data=(x_val, x_val))

    # show some results
    # =================== PLOTTING ============================

    # images per row and col
    NUM_IMG_PER_ROW = 10

    # to store images
    selected_imgs = []

    # pick random indexes to visualize
    indexes = np.random.random_integers(x_train.shape[0], size=(1,NUM_IMG_PER_ROW**2))

    # add to list
    for i in range(len(indexes)):
Example #5
0
# data wrapper
iterator = DataIterator(datas)
fine_tuning_iterator = DataIterator(datas, labels=labels)

# train autoencoder
# assume the input dimension is input_d
# the network is like input_d -> 4 -> 2 -> 4 -> input_d
autoencoder = AutoEncoder()

# train autoencoder without fine-tuning
print "\ntrain autoencoder without fine-tuning ==========\n"
autoencoder.fit([4, 2],
                iterator,
                stacked=True,
                learning_rate=0.02,
                max_epoch=5000,
                tied=True,
                activation="tanh")

# encode data (without fine-tuning)
encoded_datas = autoencoder.encode(datas)
print "encoder (without fine-tuning) ================"
print encoded_datas

# train autoencoder with fine-tuning
print "\ntrain autoencoder with fine-tuning ==========\n"
autoencoder.fine_tune(fine_tuning_iterator,
                      supervised=True,
                      learning_rate=0.02,
                      max_epoch=10000,
Example #6
0
mnist = input_data.read_data_sets('MNIST_data')
train_data, train_labels = mnist.train.images, mnist.train.labels
test_data, test_labels = mnist.test.images, mnist.test.labels
'''
训练集和测试集分别包含55000和10000张图片, 每张图片表示为28×28矩阵, 矩阵元素是0-1浮点数, 越接近1则像素点颜色越接近黑色.
每个28×28矩阵被转换为长度为28×28=784的一维数组的形式存储.
'''

_, m = train_data.shape
autoEncoder = AutoEncoder(m, 256)
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    autoEncoder.set_session(sess)
    loss = autoEncoder.fit(X=train_data, epochs=10)
    output = autoEncoder.reconstruct(train_data[0:100])
    # plot loss's change w.r.t epochs
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.plot(loss)
    # plot original and reconstructed images
    n_rows, n_cols = 2, 8  # 2行, 一行原始图像, 一行重构图像
    idx = np.random.randint(0, 100, n_cols)
    # idx = np.array([i for i in range(n_cols)])
    figure, axes = plt.subplots(n_rows, n_cols, sharex=True, sharey=True, figsize=(10, 5))
    for fig, row in zip([train_data, output], axes):
        for i, ax in zip(idx, row):
            ax.imshow(fig[i].reshape(28, 28), cmap='Greys_r')
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)
Example #7
0
# plt.show()

#Ejercicio 1 b

train_x_with_noise = add_noise(train_x, 2)
test_x_with_noise = add_noise(train_x, 2)

ae = AutoEncoder([25, 15],
                 10, [10, 25],
                 activation='tanh',
                 solver='lbfgs',
                 eta=0.0001,
                 max_iterations=30000,
                 tol=0.0000001,
                 verbose=True)
ae.fit(train_x_with_noise, train_x)

for i in range(32):
    prediction_1 = ae.predict(train_x_with_noise[i])
    prediction_2 = ae.predict(train_x[i])
    prediction_3 = ae.predict(test_x_with_noise[i])

    plt.figure()
    plt.subplot(3, 2, 1)
    plt.imshow(train_x_with_noise[i].reshape(7, 5), 'gray_r')
    plt.title("Train Input noise", fontsize=15)
    plt.xticks([])
    plt.yticks([])
    plt.subplot(3, 2, 2)
    plt.imshow(prediction_1.reshape(7, 5), 'gray_r')
    plt.title('Predicted noise', fontsize=15)
#coding = utf-8
from autoencoder import AutoEncoder, DataIterator

# train data
datas = [[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]]

# data wrapper
iterator = DataIterator(datas)

# train autoencoder
# assume the input dimension is input_d
# the network is like input_d -> 4 -> 2 -> 4 -> input_d
autoencoder = AutoEncoder()
autoencoder.fit([4, 2],
                iterator,
                stacked=True,
                learning_rate=0.1,
                max_epoch=5000)
autoencoder.fine_tune(iterator, learning_rate=0.1, supervised=False)

# after training

# encode data
encoded_datas = autoencoder.encode(datas)
print "encoder ================"
print encoded_datas

# decode data
decoded_datas = autoencoder.decode(encoded_datas)
print "decoder ================"
print decoded_datas
Example #9
0
if args.fit:

    print("Autoencoder ccs item handler has started...")
    mv = MovieLens()
    mv.create_cold_start_items(n_ratings_threshold=5)

    dataloader = DataLoader(mv,
                            batch_size=batch_size,
                            shuffle=True,
                            drop_last=True)
    model = AutoEncoder(input_dim=21, latent_dim=5)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=1e-5)
    AutoEncoder.fit(model, num_epochs, dataloader, criterion, optimizer)

    for ccs_item in tqdm(mv.ccs_items()):
        print('ccs item:', ccs_item)
        while mv.is_ccs(ccs_item):
            u = mv.pick_random_user()
            print('user:', u)
            rated_ncs_items_by_u = mv.rated_ncs_items(u)
            u_rated_latents = [
                model.encode(mv.features(m)) for m in rated_ncs_items_by_u
            ]
            ccs_latent = model.encode(mv.features(ccs_item))

            cosine_sims = [
                cosine(r_latent, ccs_latent) for r_latent in u_rated_latents
            ]
from autoencoder import AutoEncoder
from oct_dataset import build_dataset

# Declare the model
autoencoder = AutoEncoder()

x_train_noisy, x_train = build_dataset()

# train the autoencoder model
autoencoder.fit(x_train_noisy,
                x_train,
                epochs=100000,
                batch_size=128,
                shuffle=True,
                validation_data=(x_test_noisy, x_test),
                verbose=1)

# visaulize decoed denoise image
decoded_imgs = autoencoder.predict(x_test)

n = 10

plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test_noisy[i].reshape(256, 256))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
Example #11
0
                            optimizer=keras.optimizers.adam(),
                            metrics=['accuracy'])

autoencoder_checkpoint_path = "./autoencoder_checkpoint"

autoencoder_callbacks = [
    EarlyStopping(monitor='val_acc', patience=10, verbose=0),
    ModelCheckpoint(autoencoder_checkpoint_path,
                    monitor='val_acc',
                    save_best_only=True,
                    verbose=0)
]

autoencoder_network.fit(x_train,
                        x_train,
                        validation_data=(x_test, x_test),
                        batch_size=128,
                        epochs=epochs,
                        callbacks=autoencoder_callbacks)

autoencoder_network.load_weights(autoencoder_checkpoint_path)
embedding = encoder_model.outputs[-1]

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# Add softmax layer to the pre-trained embedding network
embedding = Dense(num_classes)(embedding)
embedding = BatchNormalization()(embedding)
embedding = Activation(activation='sigmoid')(embedding)

model = Model(encoder_model.inputs[0], embedding)