# build model to extract the manifold and learn a mapping / encoder to be able # to reproduce this on test data embedder = SDAEmbedder((n_features, 10, 2), noise=0.1, reconstruction_penalty=1.0, embedding_penalty=0.1, sparsity_penalty=0.0, learning_rate=0.1, seed=0) # use the randomly initialized encoder to measure the baseline code = embedder.encode(data) score_code_data = local_match(data, code, query_size=50, ratio=1, seed=0) print "kNN score match after pre-training code/data:", score_code_data fig = pl.figure(1) _, _, corr = pairwise_distances(data, code, ax=fig.add_subplot(3, 1, 1), title="random") print "Pairwise distances correlation:", corr print "Training encoder to unroll the embedded data..." start = time.time() embedder.pre_train(data, slice_=slice(None, None), epochs=1000, batch_size=100) print "done in %ds" % (time.time() - start) # evaluation of the quality of the embedding by comparing kNN queries from the # original (high dim) data and the low dim code on the one hand, and from the # ground truth low dim manifold and the low dim code on the other hand code = embedder.encode(data) score_code_data = local_match(data, code, query_size=50, ratio=1, seed=0) print "kNN score match after pre-training code/data:", score_code_data _, _, corr = pairwise_distances(data, code, ax=fig.add_subplot(3, 1, 2),
learning_rate=0.1, seed=0) print "Training encoder to extract a semantic preserving 2D mapping" start = time.time() embedder.pre_train(data, slice_=slice(None, None), epochs=1000, batch_size=100) print "done in %ds" % (time.time() - start) # evaluation of the quality of the embedding by comparing kNN queries from the # original (high dim) data and the low dim code on the one hand, and from the # ground truth low dim manifold and the low dim code on the other hand fig = pl.figure(1) code = embedder.encode(data) score_code_data = local_match(data, code, query_size=50, ratio=1, seed=0) print "kNN score match after pre-training code/data:", score_code_data _, _, corr = pairwise_distances(data, code, ax=fig.add_subplot(1, 1, 1), title="pre-training") print "Pairwise distances correlation:", corr ## fine tuning #print "Fine tuning encoder to unroll the embedded data..." #start = time.time() #embedder.fine_tune(data, epochs=100, batch_size=5) #print "done in %ds" % (time.time() - start) #code = embedder.encode(data) #score_code_data = local_match(data, code, query_size=50, ratio=1, seed=0) #print "kNN score match after fine-tuning code/data:", score_code_data #_, _, corr = pairwise_distances(data, code, ax=fig.add_subplot(3, 1, 3), # title="fine tuning") #print "Pairwise distances correlation:", corr