def main(argv): assert len(argv) == 3, "Wrong command line parameters" wgt = read_wegiht_bias(argv[1]) im = display_effect(wgt) plt.axis("off") plt.savefig(to_jpg(argv[2]))
def main(): # Loading data print "Loading..." data = sample_image() # Initialize networks visible_size = 64 # number of input units hidden_size = [25, 16, 9] # number of hidden units of each layer #lamb = 0.0001 # weight decay parameter ''' lamb = 0 # No weight decay! beta = 0.01 ''' # sigmoid DEBUG lamb = 0.0001 beta = 3 # dpark initialize dpark_ctx = DparkContext() # Start training, and L-BFGS is adopted # We apply a stack-wise greedy training process layer_ind = range(len(hidden_size) + 1) layer_ind.remove(0) layer_size = [visible_size] + hidden_size opttheta = dict() # parameter vector of stack AE img = dict() # visualization mode sparsity_param = dict() for ind in layer_ind: # standard: 64 units -> sparsity parameter 0.01 sparsity_param[ind] = layer_size[ind - 1] * 0.01 / 64 for ind in layer_ind: print "start training layer No.%d" % ind # Obtain random parameters of considered layer theta = initial_parameter(layer_size[ind], layer_size[ind - 1]) # SGD with mini-batch options = (data, layer_size[ind - 1], layer_size[ind], lamb, sparsity_param[ind], beta, dpark_ctx) opttheta[ind] = stocha_grad_desc_agagrad(compute_cost, compute_grad, theta, options, step_size_init=0.2, max_iter=25, tol=1e-7) # Preparing next layer! W = opttheta.get(ind)[:layer_size[ind]*layer_size[ind-1]].\ reshape(layer_size[ind], layer_size[ind-1]) b = opttheta.get(ind)[2*layer_size[ind]*layer_size[ind-1]:\ 2*layer_size[ind]*layer_size[ind-1]+layer_size[ind]].\ reshape(layer_size[ind], 1) data = ReLU(np.dot(W, data) + b) # visulization shows img[ind] = display_effect(W) plt.axis('off') plt.savefig(str(ind) + '.jpg') # DEBUG fin = open("theta_DEBUG.pkl", "wb") pickle.dump((W, b), fin) fin.close() sys.exit() # Trained parameters of stack AE para_stack = vecstack2stack(opttheta, hidden_size, visible_size) # Save trained weights and bias out = open("weights_bias.pkl", "wb") pickle.dump(para_stack, out) out.close() print "Mission complete!"
def main(): # Loading data print "Loading..." data_train = sample_image() # Initialize networks visible_size = 64 # number of input units hidden_size = [25, 16, 9] # number of hidden units of each layer lamb = 0.0001 # weight decay parameter beta = 3 # weight of sparsity penalty dataset # dpark initialize dpark_ctx = DparkContext() # Start training, and L-BFGS is adopted # We apply a stack-wise greedy training process layer_ind = range(len(hidden_size) + 1) layer_ind.remove(0) layer_size = [visible_size] + hidden_size # desired average activation sparsity_param = dict() for ind in layer_ind: # standard: 64 units -> sparsity parameter 0.01 sparsity_param[ind] = layer_size[ind - 1] * 0.01 / 64 data = data_train opttheta = dict() # parameter vector of stack AE img = dict() # visualization mode for ind in layer_ind: print "start training layer No.%d" % ind # Obtain random parameters of considered layer theta = initial_parameter(layer_size[ind], layer_size[ind - 1]) # Training begins options = (data, layer_size[ind - 1], layer_size[ind], lamb, sparsity_param[ind], beta, dpark_ctx) opt = optimize.fmin_l_bfgs_b(compute_cost, theta, compute_grad, options) opttheta[ind] = opt[0] W = opttheta.get(ind)[:layer_size[ind]*layer_size[ind-1]].\ reshape(layer_size[ind], layer_size[ind-1]) data = np.dot(W, data) # visulization shows img[ind] = display_effect(W) plt.axis('off') plt.savefig(str(ind) + '.jpg') # Trained parameters of stack AE para_stack = vecstack2stack(opttheta, hidden_size, visible_size) # Save trained weights and bias out = open("weights_bias.pkl", "wb") pickle.dump(para_stack, out) out.close() print "Mission complete!"
def main(): # Loading data print "Loading..." data_train = sample_image() # Initialize networks visible_size = 64 # number of input units hidden_size = [25, 16, 9] # number of hidden units of each layer lamb = 0.0001 # weight decay parameter beta = 3 # weight of sparsity penalty dataset # Generate random training and testing set for DEBUG #data_train = np.random.rand(32, 100) # 1000 samples, with # dimensionality of 128 #data_test = np.random.rand(128, 10) # Start training, and L-BFGS is adopted # We apply a stack-wise greedy training process layer_ind = range(len(hidden_size) + 1) layer_ind.remove(0) layer_size = [visible_size] + hidden_size # desired average activation sparsity_param = dict() for ind in layer_ind: # standard: 64 units -> sparsity parameter 0.01 sparsity_param[ind] = layer_size[ind - 1] * 0.01 / 64 data = data_train opttheta = dict() # parameter vector of stack AE img = dict() # visualization mode for ind in layer_ind: print "start training layer No.%d" % ind # Obtain random parameters of considered layer theta = initial_parameter(layer_size[ind], layer_size[ind - 1]) # Training begins options = (data, layer_size[ind - 1], layer_size[ind], lamb, sparsity_param[ind], beta) opt = optimize.fmin_l_bfgs_b(compute_cost, theta, compute_grad, options) opttheta[ind] = opt[0] W = opttheta.get(ind)[:layer_size[ind]*layer_size[ind-1]].\ reshape(layer_size[ind], layer_size[ind-1]) data = np.dot(W, data) # visulization shows img[ind] = display_effect(W) plt.axis('off') plt.savefig(str(ind) + '.jpg') # Trained parameters of stack AE para_stack = vecstack2stack(opttheta, hidden_size, visible_size) # pre-trained data # Put data into Softmax or SVM # TO BE CONTINUED! print "Mission complete!"
def main(): # Loading data print "Loading..." data = sample_image() # Initialize networks visible_size = 64 # number of input units hidden_size = [25, 16, 9] # number of hidden units of each layer # lamb = 0.0001 # weight decay parameter """ lamb = 0 # No weight decay! beta = 0.01 """ # sigmoid DEBUG lamb = 0.0001 beta = 3 # dpark initialize dpark_ctx = DparkContext() # Start training, and L-BFGS is adopted # We apply a stack-wise greedy training process layer_ind = range(len(hidden_size) + 1) layer_ind.remove(0) layer_size = [visible_size] + hidden_size opttheta = dict() # parameter vector of stack AE img = dict() # visualization mode sparsity_param = dict() for ind in layer_ind: # standard: 64 units -> sparsity parameter 0.01 sparsity_param[ind] = layer_size[ind - 1] * 0.01 / 64 for ind in layer_ind: print "start training layer No.%d" % ind # Obtain random parameters of considered layer theta = initial_parameter(layer_size[ind], layer_size[ind - 1]) # SGD with mini-batch options = (data, layer_size[ind - 1], layer_size[ind], lamb, sparsity_param[ind], beta, dpark_ctx) opttheta[ind] = stocha_grad_desc_agagrad( compute_cost, compute_grad, theta, options, step_size_init=0.2, max_iter=25, tol=1e-7 ) # Preparing next layer! W = opttheta.get(ind)[: layer_size[ind] * layer_size[ind - 1]].reshape(layer_size[ind], layer_size[ind - 1]) b = opttheta.get(ind)[ 2 * layer_size[ind] * layer_size[ind - 1] : 2 * layer_size[ind] * layer_size[ind - 1] + layer_size[ind] ].reshape(layer_size[ind], 1) data = ReLU(np.dot(W, data) + b) # visulization shows img[ind] = display_effect(W) plt.axis("off") plt.savefig(str(ind) + ".jpg") # DEBUG fin = open("theta_DEBUG.pkl", "wb") pickle.dump((W, b), fin) fin.close() sys.exit() # Trained parameters of stack AE para_stack = vecstack2stack(opttheta, hidden_size, visible_size) # Save trained weights and bias out = open("weights_bias.pkl", "wb") pickle.dump(para_stack, out) out.close() print "Mission complete!"