num_hid = 1024 # initialize weights w_w1 = cm.CUDAMatrix(dim_in ** -0.5 * np.random.randn(dim_in, num_hid)) w_b1 = cm.CUDAMatrix(np.zeros((num_hid, 1))) w_w2 = cm.CUDAMatrix(num_hid ** -0.5 * np.random.randn(num_hid, dim_out)) w_b2 = cm.CUDAMatrix(np.zeros((dim_out, 1))) # initialize weight update matrices wu_w1 = cm.CUDAMatrix(np.zeros(w_w1.shape)) wu_b1 = cm.CUDAMatrix(np.zeros(w_b1.shape)) wu_w2 = cm.CUDAMatrix(np.zeros(w_w2.shape)) wu_b2 = cm.CUDAMatrix(np.zeros(w_b2.shape)) # initialize temporary storage h = cm.empty((num_hid, batch_size)) out = cm.empty((dim_out, batch_size)) delta = cm.empty((num_hid, batch_size)) # Train neural network. start_time = time.time() for epoch in range(num_epochs): print ( "Epoch " + str(epoch + 1) ) err = [] for batch in range(num_batches): # get current minibatch inp = dev_train.slice(batch*batch_size,(batch + 1)*batch_size) target = dev_lbl.slice(batch*batch_size,(batch + 1)*batch_size) # apply momentum
# model parameters num_vis = dat.shape[0] num_hid = 4096 # initialize weights w_vh = cm.CUDAMatrix(0.1 * np.random.randn(num_vis, num_hid)) w_v = cm.CUDAMatrix(np.zeros((num_vis, 1))) w_h = cm.CUDAMatrix(-4.*np.ones((num_hid, 1))) # initialize weight updates wu_vh = cm.CUDAMatrix(np.zeros((num_vis, num_hid))) wu_v = cm.CUDAMatrix(np.zeros((num_vis, 1))) wu_h = cm.CUDAMatrix(np.zeros((num_hid, 1))) # initialize temporary storage v = cm.empty((num_vis, batch_size)) h = cm.empty((num_hid, batch_size)) r = cm.empty((num_hid, batch_size)) start_time = time.time() for epoch in range(num_epochs): print ( "Epoch " + str(epoch + 1) ) err = [] for batch in range(num_batches): # get current minibatch v_true = dev_dat.slice(batch*batch_size,(batch + 1)*batch_size) v.assign(v_true) # apply momentum wu_vh.mult(momentum)