Example #1
0
def O_gradients_check(parameters, O_grads, X, Y, epsilon = 1e-7):
    """
    Checks if backward_propagation computes correctly the gradient of Y by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "Wl", "bl"
    O_grads -- output of linear_activation_backward, contains O gradients of Y with respect to the parameters. 
    X -- input datapoint, of shape (input size, 1)
    Y -- output of the deepnet
    epsilon -- tiny shift to the input to compute approximated gradient with formula:
                  O_gradapprox = (Y(+) - Y(-))/(2*epsilon)
    
    Returns:
    difference -- difference between the approximated gradient and the backward propagation gradient:
                  || O_grads - O_gradapprox ||_2 / (|| O_grads ||_2 + || O_gradapprox ||_2)
    """
    
    # Set-up variables
    theta, _ = dictionary_to_vector(parameters)
    O_theta = O_gradients_to_vector(O_grads)
    num_parameters = theta.shape[0]
    Y_plus = np.zeros((num_parameters, 1)) # Vector
    Y_minus = np.zeros((num_parameters, 1))
    O_gradapprox = np.zeros((num_parameters, 1))
    
    # Compute gradapprox
    for i in range(num_parameters):
        
        # Compute Y_plus[i]. Inputs: "theta, epsilon". Output = "Y_plus[i]".
        thetaplus = np.copy(theta)                   # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon                   # Step 2
        Y_plus[i] = deepnet.FFNN_paper_model(X, vector_to_dictionary( thetaplus))[1]     # Step 3
        
        # Compute Y_minus[i]. Inputs: "theta, epsilon". Output = "Y_minus[i]".
        thetaminus = np.copy(theta)                            # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon              # Step 2        
        Y_minus[i] = deepnet.FFNN_paper_model(X, vector_to_dictionary( thetaminus))[1]  # Step 3
        
        # Compute gradapprox[i]
        O_gradapprox[i] = (Y_plus[i] - Y_minus[i]) / (2*epsilon)
    
    # Compare gradapprox to backward propagation gradients by computing difference.
    numerator = np.linalg.norm(O_theta - O_gradapprox)           # Step 1'
    denominator = np.linalg.norm(O_theta) + np.linalg.norm(O_gradapprox)         # Step 2'
    difference = numerator / denominator         # Step 3'

    if difference > 1e-7:
        print ("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m")
    else:
        print ("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m")
    
    return difference
Example #2
0
def compute_h_local(X, phi, parameters, J1=1, J2=0.4):
    """ 
    Only valid for pertubated Heisenberg model:
    H = J1 * sum (sigma_i * sigma_i+1) + J2 * sum (sigma_i * sigma_i+2)

    Argument:
    X -- |sigma>
    phi -- phi(|sigma>, parameters)
    parameters -- python dictionary containing all parameters (output of initialization function)
    J1 -- J1 parameter in J1-J2 Heisenberg model
    J2 -- J2 parameter in J1-J2 Heisenberg model   
    
    Returns:
    h_local -- Local Hamiltonian of the input |sigma> X
    """

    L = X.shape[0]

    h_local = 0

    for i in range(L):
        """ consider the nearst neighbour"""
        if X.item(i) * X.item((i + 1) % L) == 1:
            """ This case includes (1,1) and (-1,-1)"""
            h_local += J1

        if X.item(i) * X.item((i + 1) % L) == -1:
            """ This case includes (1,-1) and (-1,1)"""
            new_X = np.copy(X)
            new_X[[i, (i + 1) % L]] = new_X[[(i + 1) % L, i]]
            new_phi = deepnet.FFNN_paper_model(new_X, parameters)[0]
            h_local += J1 * (2 * new_phi / phi - 1)
        """consider the next nearst neighbour"""
        if X.item(i) * X.item((i + 2) % L) == 1:
            h_local += J2

        if X.item(i) * X.item((i + 2) % L) == -1:
            new_X = np.copy(X)
            new_X[[i, (i + 2) % L]] = new_X[[(i + 2) % L, i]]
            new_phi = deepnet.FFNN_paper_model(new_X, parameters)[0]
            h_local += J2 * (2 * new_phi / phi - 1)

    return h_local
Example #3
0
def markov_chain(X, phi, parameters, n_sample=1000):
    """
    Argument:
    X -- |sigma_0>
    phi -- phi(|sigma_0>, parameters)
    parameters -- python dictionary containing all parameters (output of initialization function)
    n_sample -- number of train set (length of the markov chain)
    
    Returns:
    train_set -- Training set with each row representing one |sigma>
    train_set_phi -- The wavefunction of each spin configuration in train_set
    """

    L = X.shape[0]

    train_set = np.copy(X.T)
    # print(train_set)
    train_set_phi = [phi]

    for n in range(1, n_sample):

        # Copy last spin configuration
        propose_X = np.copy(train_set[n - 1])
        # print(propose_X)
        """ Generate random int i-th position"""
        i = random.randint(0, L - 1)
        """ Generate the random distance: dmax = 2 """
        distance = random.randint(-2, 2)

        j = (i + distance) % L

        propose_X[[i, j]] = propose_X[[j, i]]
        propose_X = propose_X.reshape((1, L))
        # print(propose_X)

        propose_phi = deepnet.FFNN_paper_model(propose_X.T, parameters)[0]
        """ Accept / Reject """
        r = min(1, propose_phi**2 / train_set_phi[n - 1]**2)
        # print(propose_X, propose_Y, train_set_Y[n-1], r)
        random_number = random.uniform(0, 1)

        if random_number > r:  # rejected
            train_set = np.r_[train_set, train_set[n - 1].reshape((1, L))]
            train_set_phi.append(train_set_phi[n - 1])

        elif random_number <= r:  # accept
            train_set = np.r_[train_set, propose_X]
            train_set_phi.append(propose_phi)

        # print(n,"-th iteration: train set:", train_set, train_set_Y)

    return train_set, train_set_phi
def compute_S_matrix(train_set, parameters):
    """
    Compute the S matrix for Stochastic Reconfiguration

    Arguments:
    train_set -- train_set -- Training set with each row representing one |sigma>
    parameters -- python dictionary containing your parameters: Wl, bl

    Returns:
    S_matrix
    """

    n_sample = len(train_set)

    O_thetas = {
    }  # Python dictionary containing all O_theta's: O_thetas['x0'], O_thetas['x1'], ...

    for i in range(len(train_set)):
        spin_configuration = train_set[i].reshape((-1, 1))
        phi, Y, Z = deepnet.FFNN_paper_model(spin_configuration, parameters)
        O_grads = gradient_descent.compute_O_operator(spin_configuration,
                                                      parameters, Z)
        O_thetas['x' + str(i)] = grad_check.O_gradients_to_vector(O_grads)

        if i == 0:
            O_thetas_average = np.copy(O_thetas['x0'])
        else:
            O_thetas_average = O_thetas_average + O_thetas['x' + str(i)]
        """ The above O_thetas_average has not been divided by n_sample"""

    O_thetas_average = O_thetas_average / n_sample

    for i in range(len(train_set)):
        O_diff = O_thetas['x' + str(i)] - O_thetas_average

        if i == 0:
            S_matrix = np.copy(np.dot(O_diff, O_diff.T))
        else:
            S_matrix = S_matrix + (np.dot(O_diff, O_diff.T))
        """ The above S matrix has not been divided by n_sample"""

    S_matrix = np.real(S_matrix / n_sample)

    assert (S_matrix.shape == (O_thetas_average.shape[0],
                               O_thetas_average.shape[0]))
    return S_matrix
L = 6
# input
X = np.array([-1, 1, -1, 1, -1, 1]).reshape((L, 1))

# Initialize parameters, then retrieve W1, b1, W2, b2.

parameters = deepnet.initialize_parameters(L, n_h=2 * L, seed=1234, sigma=0.01)

costs = []
num_iterations = 300

for iter in range(0, num_iterations):
    # Feedforward

    phi, Y, Z = deepnet.FFNN_paper_model(X, parameters)

    # Generate train_set

    train_set, train_set_phi = cost_function.markov_chain(X,
                                                          phi,
                                                          parameters,
                                                          n_sample=1000)

    # Forward propagation: LINEAR -> RELU -> LINEAR -> SIGMOID. Inputs: "X, W1, b1". Output: "A1, cache1, A2, cache2".
    """ Finished above and in the process of generating the train_set """

    # Compute cost
    hamiltonian = cost_function.compute_hamiltonian(train_set, train_set_phi,
                                                    parameters)