def O_gradients_check(parameters, O_grads, X, Y, epsilon = 1e-7): """ Checks if backward_propagation computes correctly the gradient of Y by forward_propagation_n Arguments: parameters -- python dictionary containing your parameters "Wl", "bl" O_grads -- output of linear_activation_backward, contains O gradients of Y with respect to the parameters. X -- input datapoint, of shape (input size, 1) Y -- output of the deepnet epsilon -- tiny shift to the input to compute approximated gradient with formula: O_gradapprox = (Y(+) - Y(-))/(2*epsilon) Returns: difference -- difference between the approximated gradient and the backward propagation gradient: || O_grads - O_gradapprox ||_2 / (|| O_grads ||_2 + || O_gradapprox ||_2) """ # Set-up variables theta, _ = dictionary_to_vector(parameters) O_theta = O_gradients_to_vector(O_grads) num_parameters = theta.shape[0] Y_plus = np.zeros((num_parameters, 1)) # Vector Y_minus = np.zeros((num_parameters, 1)) O_gradapprox = np.zeros((num_parameters, 1)) # Compute gradapprox for i in range(num_parameters): # Compute Y_plus[i]. Inputs: "theta, epsilon". Output = "Y_plus[i]". thetaplus = np.copy(theta) # Step 1 thetaplus[i][0] = thetaplus[i][0] + epsilon # Step 2 Y_plus[i] = deepnet.FFNN_paper_model(X, vector_to_dictionary( thetaplus))[1] # Step 3 # Compute Y_minus[i]. Inputs: "theta, epsilon". Output = "Y_minus[i]". thetaminus = np.copy(theta) # Step 1 thetaminus[i][0] = thetaminus[i][0] - epsilon # Step 2 Y_minus[i] = deepnet.FFNN_paper_model(X, vector_to_dictionary( thetaminus))[1] # Step 3 # Compute gradapprox[i] O_gradapprox[i] = (Y_plus[i] - Y_minus[i]) / (2*epsilon) # Compare gradapprox to backward propagation gradients by computing difference. numerator = np.linalg.norm(O_theta - O_gradapprox) # Step 1' denominator = np.linalg.norm(O_theta) + np.linalg.norm(O_gradapprox) # Step 2' difference = numerator / denominator # Step 3' if difference > 1e-7: print ("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m") else: print ("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m") return difference
def compute_h_local(X, phi, parameters, J1=1, J2=0.4): """ Only valid for pertubated Heisenberg model: H = J1 * sum (sigma_i * sigma_i+1) + J2 * sum (sigma_i * sigma_i+2) Argument: X -- |sigma> phi -- phi(|sigma>, parameters) parameters -- python dictionary containing all parameters (output of initialization function) J1 -- J1 parameter in J1-J2 Heisenberg model J2 -- J2 parameter in J1-J2 Heisenberg model Returns: h_local -- Local Hamiltonian of the input |sigma> X """ L = X.shape[0] h_local = 0 for i in range(L): """ consider the nearst neighbour""" if X.item(i) * X.item((i + 1) % L) == 1: """ This case includes (1,1) and (-1,-1)""" h_local += J1 if X.item(i) * X.item((i + 1) % L) == -1: """ This case includes (1,-1) and (-1,1)""" new_X = np.copy(X) new_X[[i, (i + 1) % L]] = new_X[[(i + 1) % L, i]] new_phi = deepnet.FFNN_paper_model(new_X, parameters)[0] h_local += J1 * (2 * new_phi / phi - 1) """consider the next nearst neighbour""" if X.item(i) * X.item((i + 2) % L) == 1: h_local += J2 if X.item(i) * X.item((i + 2) % L) == -1: new_X = np.copy(X) new_X[[i, (i + 2) % L]] = new_X[[(i + 2) % L, i]] new_phi = deepnet.FFNN_paper_model(new_X, parameters)[0] h_local += J2 * (2 * new_phi / phi - 1) return h_local
def markov_chain(X, phi, parameters, n_sample=1000): """ Argument: X -- |sigma_0> phi -- phi(|sigma_0>, parameters) parameters -- python dictionary containing all parameters (output of initialization function) n_sample -- number of train set (length of the markov chain) Returns: train_set -- Training set with each row representing one |sigma> train_set_phi -- The wavefunction of each spin configuration in train_set """ L = X.shape[0] train_set = np.copy(X.T) # print(train_set) train_set_phi = [phi] for n in range(1, n_sample): # Copy last spin configuration propose_X = np.copy(train_set[n - 1]) # print(propose_X) """ Generate random int i-th position""" i = random.randint(0, L - 1) """ Generate the random distance: dmax = 2 """ distance = random.randint(-2, 2) j = (i + distance) % L propose_X[[i, j]] = propose_X[[j, i]] propose_X = propose_X.reshape((1, L)) # print(propose_X) propose_phi = deepnet.FFNN_paper_model(propose_X.T, parameters)[0] """ Accept / Reject """ r = min(1, propose_phi**2 / train_set_phi[n - 1]**2) # print(propose_X, propose_Y, train_set_Y[n-1], r) random_number = random.uniform(0, 1) if random_number > r: # rejected train_set = np.r_[train_set, train_set[n - 1].reshape((1, L))] train_set_phi.append(train_set_phi[n - 1]) elif random_number <= r: # accept train_set = np.r_[train_set, propose_X] train_set_phi.append(propose_phi) # print(n,"-th iteration: train set:", train_set, train_set_Y) return train_set, train_set_phi
def compute_S_matrix(train_set, parameters): """ Compute the S matrix for Stochastic Reconfiguration Arguments: train_set -- train_set -- Training set with each row representing one |sigma> parameters -- python dictionary containing your parameters: Wl, bl Returns: S_matrix """ n_sample = len(train_set) O_thetas = { } # Python dictionary containing all O_theta's: O_thetas['x0'], O_thetas['x1'], ... for i in range(len(train_set)): spin_configuration = train_set[i].reshape((-1, 1)) phi, Y, Z = deepnet.FFNN_paper_model(spin_configuration, parameters) O_grads = gradient_descent.compute_O_operator(spin_configuration, parameters, Z) O_thetas['x' + str(i)] = grad_check.O_gradients_to_vector(O_grads) if i == 0: O_thetas_average = np.copy(O_thetas['x0']) else: O_thetas_average = O_thetas_average + O_thetas['x' + str(i)] """ The above O_thetas_average has not been divided by n_sample""" O_thetas_average = O_thetas_average / n_sample for i in range(len(train_set)): O_diff = O_thetas['x' + str(i)] - O_thetas_average if i == 0: S_matrix = np.copy(np.dot(O_diff, O_diff.T)) else: S_matrix = S_matrix + (np.dot(O_diff, O_diff.T)) """ The above S matrix has not been divided by n_sample""" S_matrix = np.real(S_matrix / n_sample) assert (S_matrix.shape == (O_thetas_average.shape[0], O_thetas_average.shape[0])) return S_matrix
L = 6 # input X = np.array([-1, 1, -1, 1, -1, 1]).reshape((L, 1)) # Initialize parameters, then retrieve W1, b1, W2, b2. parameters = deepnet.initialize_parameters(L, n_h=2 * L, seed=1234, sigma=0.01) costs = [] num_iterations = 300 for iter in range(0, num_iterations): # Feedforward phi, Y, Z = deepnet.FFNN_paper_model(X, parameters) # Generate train_set train_set, train_set_phi = cost_function.markov_chain(X, phi, parameters, n_sample=1000) # Forward propagation: LINEAR -> RELU -> LINEAR -> SIGMOID. Inputs: "X, W1, b1". Output: "A1, cache1, A2, cache2". """ Finished above and in the process of generating the train_set """ # Compute cost hamiltonian = cost_function.compute_hamiltonian(train_set, train_set_phi, parameters)