def do_transform(train_path, test_path, new_train_path, new_test_path, pickle_path, model_path, pecent_encode, my_training_epochs, mysample_mathod, encode_function): # left is the left part besides the feature matrix matrix_train, train_left, train_right = file2matrix(train_path) matrix_test, test_left, test_right = file2matrix(test_path) dim_in = max(matrix_train.shape[1], matrix_test.shape[1]) extra_col = 0 # the dimension of two sparse matrix may be different if dim_in > matrix_train.shape[1]: extra_col = dim_in - matrix_train.shape[1] matrix_train = np.hstack((matrix_train, np.zeros((matrix_train.shape[0], extra_col), dtype=int))) if dim_in > matrix_test.shape[1]: extra_col = dim_in - matrix_test.shape[1] matrix_test = np.hstack( (matrix_test, np.zeros((matrix_test.shape[0], extra_col), dtype=int))) #[matrix_train_sampled, matrix_test_sampled] = get_sampled(matrix_train, matrix_test, 0.01) # save the trainset and test set (which will be used by dA module) f = gzip.open(pickle_path, 'w') cPickle.dump([matrix_train, matrix_test], f) f.close() dim_out = int(dim_in * pecent_encode) print 'before sampling', matrix_train.shape, matrix_test.shape, 'mapped to dim_out = ', dim_out matrix_train_new, matrix_test_new = dA.test_dA( 0, dim_in, dim_out, learning_rate=0.1, training_epochs=my_training_epochs, dataset=pickle_path, batch_size=10, output_path=model_path, sample_method=mysample_mathod, encode_function=encode_function) matrix2file(matrix_train_new, train_left, train_right, new_train_path) matrix2file(matrix_test_new, test_left, test_right, new_test_path) print 'encoded file written'
def do_transform(train_path, test_path, new_train_path, new_test_path, pickle_path, model_path, pecent_encode,my_training_epochs, mysample_mathod, encode_function): # left is the left part besides the feature matrix matrix_train, train_left, train_right= file2matrix(train_path) matrix_test, test_left, test_right = file2matrix(test_path) dim_in = max(matrix_train.shape[1], matrix_test.shape[1]) extra_col = 0 # the dimension of two sparse matrix may be different if dim_in > matrix_train.shape[1]: extra_col = dim_in - matrix_train.shape[1] matrix_train = np.hstack((matrix_train, np.zeros((matrix_train.shape[0], extra_col), dtype=int))) if dim_in > matrix_test.shape[1]: extra_col = dim_in - matrix_test.shape[1] matrix_test = np.hstack((matrix_test, np.zeros((matrix_test.shape[0], extra_col), dtype= int))) #[matrix_train_sampled, matrix_test_sampled] = get_sampled(matrix_train, matrix_test, 0.01) # save the trainset and test set (which will be used by dA module) f = gzip.open(pickle_path, 'w') cPickle.dump([matrix_train, matrix_test], f) f.close() dim_out = int(dim_in*pecent_encode) print 'before sampling', matrix_train.shape, matrix_test.shape, 'mapped to dim_out = ', dim_out matrix_train_new, matrix_test_new = dA.test_dA(0, dim_in, dim_out, learning_rate=0.1, training_epochs=my_training_epochs,dataset=pickle_path, batch_size=10, output_path=model_path, sample_method=mysample_mathod, encode_function=encode_function) matrix2file(matrix_train_new, train_left, train_right, new_train_path) matrix2file(matrix_test_new, test_left, test_right, new_test_path) print 'encoded file written'
def test_dA(): dA.test_dA(training_epochs=1, output_folder='tmp_dA_plots')
def test_dA(): dA.test_dA(training_epochs=3, output_folder="tmp_dA_plots")
def test_dA(): t0 = time.time() dA.test_dA(training_epochs=3, output_folder='tmp_dA_plots') print >> sys.stderr, "test_dA took %.3fs expected Xs in our buildbot" % ( time.time() - t0)
def test_dA(): t0=time.time() dA.test_dA(training_epochs = 3, output_folder = 'tmp_dA_plots') print >> sys.stderr, "test_dA took %.3fs expected Xs in our buildbot"%(time.time()-t0)
# -*- coding: utf-8 -*- """ Created on Sat Feb 27 01:04:40 2016 @author: bo run and save a dA model, to initialize my deep_clus model """ import dA dA.test_dA()