def least_squares_GD(y, tx, initial_w, max_iters, gamma): w = initial_w.reshape(-1, 1) y = y.reshape(-1, 1) for n_iter in range(max_iters): dw = compute_gradient(y, tx, w) w = w - gamma * dw loss = compute_loss(y, tx, w) return (w, loss)
def least_squares_SGD(y, tx, initial_w, max_iters, gamma): w = initial_w.reshape(-1, 1) y = y.reshape(-1, 1) np.random.seed(23) for n_iter in range(max_iters): rnd_i = np.random.randint( 0, len(y)) # Choose random element in the dataset dw = compute_gradient(y[rnd_i], tx[rnd_i, :].reshape(1, -1), w) w = w - gamma * dw loss = compute_loss(y, tx, w) return (w, loss)
for act in ("sigmoid", "relu"): s = 0.0 for j in range(100): config = [] w_list = [] b_list = [] for i in range(layer_number): config.append({"num": 3, "act_name": act}) w_list.append( np.matrix(np.random.normal(size=(3, 3))).astype("double")) b_list.append( np.matrix(np.random.normal(size=(3, 1))).astype("double")) y, dw, db = gradient.compute_gradient(config, w_list, b_list, "softmax_ce", x, y) s += abs(dw[i - 1]).mean() dw_mean_map[act].append(s) # print "[Activation Function = %s\tLayer = %d] : %f" %(act,layer_number,s) print(dw_mean_map["sigmoid"]) print(dw_mean_map["relu"]) t = np.arange(1, 21, 1) plot(t, dw_mean_map["sigmoid"], 'r--', t, dw_mean_map["relu"], 'bs') plt.ylabel('some numbers') plt.show() ''' همانطور که میبینید برای سیگموید واضحا دارد 0 میشود دلیلش هم این است که مشتق سیگموید ماکسیمم 0.25 است و برای ورودی های بزرگ تر به صورت نمایی کوچک میشود
# First, lets make sure your numerical gradient computation is correct for a # simple function. After you have implemented computeNumericalGradient.m, # run the following: if debug: gradient.check_gradient() # Now we can use it to check your cost function and derivative calculations # for the sparse autoencoder. # J is the cost function J = lambda x: sparse_autoencoder.sparse_autoencoder_cost(x, visible_size, hidden_size, lambda_, sparsity_param, beta, patches) num_grad = gradient.compute_gradient(J, theta) # Use this to visually compare the gradients side by side print num_grad, grad # Compare numerically computed gradients with the ones obtained from backpropagation diff = np.linalg.norm(num_grad - grad) / np.linalg.norm(num_grad + grad) print diff print "Norm of the difference between numerical and analytical num_grad (should be < 1e-9)\n\n" ##====================================================================== ## STEP 4: After verifying that your implementation of # sparseAutoencoderCost is correct, You can start training your sparse # autoencoder with minFunc (L-BFGS). # Randomly initialize the parameters
num_batches = data_size // batch_size dataset_x_train = np.array(dataset_x_train) dataset_y_train = np.array(dataset_y_train) for i in xrange(num_epochs): combined = zip(dataset_x_train, dataset_y_train) np.random.shuffle(combined) dataset_x_train, dataset_y_train = zip(*combined) epoch_corr = 0 for j in xrange(num_batches): dw = [0 for s in range(len(config))] db = [0 for s in range(len(config))] for k in xrange(batch_size): index = j * batch_size + k y, dw_i, db_i = gradient.compute_gradient(config, w_list, b_list, "softmax_ce", dataset_x_train[index], dataset_y_train[index]) dw += np.array(dw_i) db += np.array(db_i) y = np.argmax(y) y_true = np.argmax(dataset_y_train[index]) epoch_corr += (y == y_true) for k in xrange(len(config) - 1, -1, -1): w_list[k] -= learning_rate * dw[len(config) - 1 - k] / batch_size b_list[k] -= learning_rate * db[len(config) - 1 - k] / batch_size # print w_list[0][0,0] print "epoch #{}, the accuraccy is {:.6f}".format( i, float(epoch_corr) / data_size)
mu = 0.01 #pas dans la descente du gradient lambda0 = 0. # hyperparametre : parametre de pénalisation : je cherche des sources ayant un ecart constant, ici = 1 # j'ai du l'appeler lambda0 car lambda est une fonction python y1 = x1 # Je demarre avec mes sources melangees/observees y2 = x2 indice = 1 # compteur d'affichage std1 = np.std(y1) std2 = np.std(y2) for i in range(nb_iter + 1): DJ = gr.compute_gradient(B, y1, y2, x1, x2, lambda0) B = B - mu * DJ # mise a jour de la matrice de separation B[0, 0] /= std1 B[0, 1] /= std1 B[1, 0] /= std2 B[1, 1] /= std2 y1 = B[0, 0] * x1 + B[ 0, 1] * x2 # mise a jour d'une estimation des sources separees (approximation des sources avant melange) y2 = B[1, 1] * x2 + B[1, 0] * x1 std1 = np.std(y1) std2 = np.std(y2)
#Resizing and smoothing image image = origin.copy() thumbnail_size = image.width/thumbnail_factor, image.height/thumbnail_factor image.thumbnail(thumbnail_size, Image.NONE) image = image.filter(ImageFilter.MedianFilter(7)) draw = ImageDraw.Draw(image) width = image.size[0] height = image.size[1] pix = image.load() if utils_general.is_debug: image.save(join(debug_dir, filename + "_image+median.png"), "PNG") # Creates image for gradient then processing it image_gradient, gradient_abs = compute_gradient(pix, width, height, gradient_threshold) if utils_general.is_debug: image_gradient.save(join(debug_dir, filename + "_gradient.png"), "PNG") external_borders_map = np.zeros((width, height)) #[[0]*height for x in range(width)] external_borders_map = mark_external_borders(external_borders_map, gradient_abs, 0, height, 0, width, 25, 1, False) if utils_general.is_debug: map_to_image_and_save(image_gradient, external_borders_map, debug_dir, filename, "_processed_gradient.png", mode="data-wise") # Performs quick window search. Thus we obtain approximate location of document. processed_gradient_ii = compute_integral_image_buffer(external_borders_map, height, width, mode="buffer") x_pt, y_pt, x_window_size, y_window_size = integral_image_window_detect(processed_gradient_ii, width, height) if utils_general.is_debug: