from scipy.optimize import minimize from initial_params import initial_params from sparse_autoencoder_cost import sparse_autoencoder_cost, sigmoid, der_sigmoid from load_mnist import generate_patch, load_data from display_network import display_network visible_size = 28 * 28 hidden_size = 196 sparsity_param = 0.1 lamda = 0.003 beta = 3 images = np.transpose(load_data())[:, 0:10000] patches = generate_patch() theta = initial_params(visible_size, hidden_size) J = lambda th: sparse_autoencoder_cost( visible_size, hidden_size, th, lambda x: sigmoid(x), lambda x: der_sigmoid(x), lamda, beta, sparsity_param, images ) options_ = {"maxiter": 800, "disp": True} result = minimize(J, theta, method="L-BFGS-B", jac=True, options=options_) opt_theta = result.x print result W1 = opt_theta[0 : hidden_size * visible_size].reshape(hidden_size, visible_size).transpose() display_network(W1)
from sparse_autoencoder_cost import sparse_autoencoder_cost, sigmoid, der_sigmoid from load_mnist import generate_patch, load_data visible_size = 28 * 28 hidden_size = 196 sparsity_param = 0.1 lamda = 0.003 beta = 3 images = np.transpose(load_data())[:, 0:10000] theta = initial_params(visible_size, hidden_size) x = generate_patch() (cost, der) = sparse_autoencoder_cost(visible_size, hidden_size, theta, lambda x : sigmoid(x), lambda x : der_sigmoid(x), lamda, beta, sparsity_param, images) print 'real der value : ' , der[0] # the value of analytic derivation of parameters epsilon = 0.00001 tmp= np.zeros((theta.shape[0], 1)) tmp[0][0] = 1 cost1 = sparse_autoencoder_cost(visible_size, hidden_size, theta + tmp*epsilon, lambda x : sigmoid(x), lambda x : der_sigmoid(x), lamda, beta, sparsity_param, images) cost2 = sparse_autoencoder_cost(visible_size, hidden_size, theta - tmp*epsilon,