Esempio n. 1
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    # 随机关闭各层的神经元(输出层除外)
    np.random.seed(1)

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]
    # 第一个隐藏层
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    D1 = np.random.rand(A1.shape[0], A1.shape[1])  # 产生随机的矩阵D1
    D1 = D1 < keep_prob  # 将D1按照keep_prob来选择将其转化为0/1
    A1 = A1 * D1  # Step 3: 关闭A1的一些神经元
    A1 = A1 / keep_prob  # Step 4: 放大没有关闭的神经元,目的是为了保持输出期望不变
    # 第二个隐藏层
    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)
    D2 = np.random.rand(A2.shape[0], A2.shape[1])
    D2 = D2 < keep_prob
    A2 = A2 * D2
    A2 = A2 / keep_prob
    # 输出层
    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Esempio n. 2
0
def drop_forward(X, parameters, keep_prob):
    ''' forward process with dropout
        A[l] = A[l]*d[l] (d[l] is array with value (0/1) same size as A[l] '''
    L = int(len(parameters) / 2)
    cache = {}
    cache['A' + str(0)] = X
    for l in range(1, L):
        cache['Z' + str(l)] = np.dot(
            parameters['W' + str(l)],
            cache['A' + str(l - 1)]) + parameters['b' + str(l)]
        cache['A' + str(l)] = relu(cache['Z' + str(l)])
        cache['d' + str(l)] = (np.random.rand(cache['A' + str(l)].shape[0],
                                              cache['A' + str(l)].shape[1]) <
                               keep_prob).astype(int)
        cache['A' + str(l)] = cache['A' + str(l)] * cache['d' + str(l)]
        cache['A' + str(l)] = cache['A' + str(l)] / keep_prob
        cache['W' + str(l)] = parameters['W' + str(l)]
        cache['b' + str(l)] = parameters['b' + str(l)]
    cache['Z' +
          str(L)] = np.dot(parameters['W' + str(L)],
                           cache['A' + str(L - 1)]) + parameters['b' + str(L)]
    cache['A' + str(L)] = sigmoid(cache['Z' + str(L)])
    cache['W' + str(L)] = parameters['W' + str(L)]
    cache['b' + str(L)] = parameters['b' + str(L)]
    return cache['A' + str(L)], cache
def forward_propagation_with_dropout(X, parameters, keep_prob = 0.5):

    np.random.seed(1)
    
    # retrieve parameters
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]
    # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)       # Steps 1-4 below correspond to the Steps 1-4 described above. 
    D1 = np.random.rand(A1.shape[0],A1.shape[1])                                         # Step 1: initialize matrix D1 = np.random.rand(..., ...)
    D1 = D1 < keep_prob                                      # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold)
    A1 = A1 * D1                                         # Step 3: shut down some neurons of A1
    A1 = A1 / keep_prob                                         # Step 4: scale the value of neurons that haven't been shut down
    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)
    D2 = np.random.rand(A2.shape[0],A2.shape[1])                                         # Step 1: initialize matrix D1 = np.random.rand(..., ...)
    D2 = D2 < keep_prob                                      # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold)
    A2 = A2 * D2                                         # Step 3: shut down some neurons of A1
    A2 = A2 / keep_prob                                    # Step 4: scale the value of neurons that haven't been shut down
    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)
    
    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)
    
    return A3, cache
Esempio n. 4
0
def forward_propagation_drop(X, parameters, keep_prob):
    np.random.seed(1)
    i = 1
    W = [None]
    B = [None]
    cache = []
    a = X
    while 'W' + str(i) in parameters:
        W.append(parameters['W' + str(i)])
        B.append(parameters['b' + str(i)])
        i += 1
    # 前向传播开始
    for j in range(1, i - 1):
        z = np.dot(W[j], a) + B[j]
        a = reg_utils.relu(z)
        D = np.random.rand(a.shape[0], a.shape[1])
        D = D < keep_prob
        a = a * D
        a /= keep_prob
        cache.append(z)
        cache.append(D)
        cache.append(a)
        cache.append(W[j])
        cache.append(B[j])
    z = np.dot(W[i - 1], a) + B[i - 1]
    a = reg_utils.sigmoid(z)
    cache.append(z)
    cache.append(a)
    cache.append(W[i - 1])
    cache.append(B[i - 1])
    return a, cache
Esempio n. 5
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    """
    Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID.
    
    Arguments:
    X -- input dataset, of shape (2, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
                    W1 -- weight matrix of shape (20, 2)
                    b1 -- bias vector of shape (20, 1)
                    W2 -- weight matrix of shape (3, 20)
                    b2 -- bias vector of shape (3, 1)
                    W3 -- weight matrix of shape (1, 3)
                    b3 -- bias vector of shape (1, 1)
    keep_prob - probability of keeping a neuron active during drop-out, scalar
    
    Returns:
    A3 -- last activation value, output of the forward propagation, of shape (1,1)
    cache -- tuple, information stored for computing the backward propagation
    """

    # retrieve parameters
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    ### START CODE HERE ### (approx. 4 lines)         # Steps 1-4 below correspond to the Steps 1-4 described above.
    D1 = np.random.rand(
        A1.shape[0],
        A1.shape[1])  # Step 1: initialize matrix D1 = np.random.rand(..., ...)
    D1 = np.where(
        D1 < keep_prob, 1, 0
    )  # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold)
    A1 = A1 * D1  # Step 3: shut down some neurons of A1
    A1 = A1 / keep_prob  # Step 4: scale the value of neurons that haven't been shut down
    ### END CODE HERE ###
    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)
    ### START CODE HERE ### (approx. 4 lines)

    D2 = np.random.rand(
        A2.shape[0],
        A2.shape[1])  # Step 1: initialize matrix D2 = np.random.rand(..., ...)
    D2 = np.where(
        D2 < keep_prob, 1, 0
    )  # Step 2: convert entries of D2 to 0 or 1 (using keep_prob as the threshold)
    A2 = A2 * D2  # Step 3: shut down some neurons of A2
    A2 = A2 / keep_prob  # Step 4: scale the value of neurons that haven't been shut down
    ### END CODE HERE ###
    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Esempio n. 6
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    np.random.seed(1)

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    Z1 = np.dot(W1, X) + b1
    A1 = reg_utils.relu(Z1)

    # 使用这样的方法来Drop_out
    D1 = np.random.rand(A1.shape[0],
                        A1.shape[1])  #步骤1:初始化矩阵D2 = np.random.rand(..., ...)
    D1 = D1 < keep_prob  # 步骤2:将D1的值转换为0或1(使​​用keep_prob作为阈值)
    A1 = A1 * D1  # 步骤3:舍弃A1的一些节点(将它的值变为0或False)
    A1 = A1 / keep_prob  # 步骤4:缩放未舍弃的节点(不为0)的值

    Z2 = np.dot(W2, A1) + b2
    A2 = reg_utils.relu(Z2)

    D2 = np.random.randn(A2.shape[0], A2.shape[1])
    D2 = D2 < keep_prob
    A2 = A2 * D2
    A2 = A2 / keep_prob

    Z3 = np.dot(W3, A2) + b3
    A3 = reg_utils.sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    np.random.seed(1)

    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']

    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)

    D1 = np.random.rand(A1.shape[0], A1.shape[1])
    D1 = D1 < keep_prob
    A1 = np.multiply(D1, A1)
    A1 = A1 / keep_prob

    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)

    D2 = np.random.rand(A2.shape[0], A2.shape[1])
    D2 = D2 < keep_prob
    A2 = np.multiply(D2, A2)
    A2 = A2 / keep_prob

    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Esempio n. 8
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    np.random.seed(1)
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    D1 = np.random.rand(A1.shape[0], A1.shape[1])
    D1 = D1 < keep_prob
    A1 = A1 * D1
    A1 = A1 / keep_prob

    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)
    D2 = np.random.rand(A2.shape[0], A2.shape[1])
    D2 = D2 < keep_prob
    A2 = A2 * D2
    A2 = A2 / keep_prob

    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Esempio n. 9
0
def forward_propagation_with_dropout(X, params, keep_prob=0.5):
    np.random.seed(1)
    W1, b1 = params["W1"], params["b1"]
    W2, b2 = params["W2"], params["b2"]
    W3, b3 = params["W3"], params["b3"]

    Z1 = np.dot(W1, X) + b1
    A1 = reg_utils.relu(Z1)

    # layer 1 drop out
    D1 = np.random.rand(A1.shape[0], A1.shape[1]) < keep_prob
    A1 = np.multiply(A1, D1)
    A1 = A1 / keep_prob

    Z2 = np.dot(W2, A1) + b2
    A2 = reg_utils.relu(Z2)

    # layer 2 drop out
    D2 = np.random.rand(A2.shape[0], A2.shape[1]) < keep_prob
    A2 = np.multiply(A2, D2)
    A2 = A2 / keep_prob

    # No dropout at final layer
    Z3 = np.dot(W3, A2) + b3
    A3 = reg_utils.sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Esempio n. 10
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    np.random.seed(1)

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    # Steps 1-4 below correspond to the Steps 1-4 described above.
    D1 = np.random.rand(
        A1.shape[0],
        A1.shape[1])  # Step 1: initialize matrix D1 = np.random.rand(..., ...)
    D1 = D1 < keep_prob  # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold)
    A1 = A1 * D1  # Step 3: shut down some neurons of A1
    A1 = np.divide(
        A1, keep_prob
    )  # Step 4: scale the value of neurons that haven't been shut down
    """
    import numpy as np
>>> a=np.random.rand(2,3)
>>> a
array([[ 0.01838459,  0.70233192,  0.53293226],
       [ 0.81863108,  0.74747237,  0.05302554]])
>>> a=a<0.5
>>> a
array([[ True, False, False],
       [False, False,  True]], dtype=bool)
>>> b=a*0.3
>>> b
array([[ 0.3,  0. ,  0. ],
       [ 0. ,  0. ,  0.3]])
>>> b=np.divide(b,0.1)
>>> b
array([[ 3.,  0.,  0.],
       [ 0.,  0.,  3.]])

    """
    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)

    D2 = np.random.rand(
        A2.shape[0],
        A2.shape[1])  # Step 1: initialize matrix D2 = np.random.rand(..., ...)
    D2 = D2 < keep_prob  # Step 2: convert entries of D2 to 0 or 1 (using keep_prob as the threshold)
    A2 = A2 * D2  # Step 3: shut down some neurons of A2
    A2 = A2 / keep_prob  # Step 4: scale the value of neurons that haven't been shut down

    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Esempio n. 11
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    """
    Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID.

    Arguments:
    X -- input dataset, of shape (2, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
                    W1 -- weight matrix of shape (20, 2)
                    b1 -- bias vector of shape (20, 1)
                    W2 -- weight matrix of shape (3, 20)
                    b2 -- bias vector of shape (3, 1)
                    W3 -- weight matrix of shape (1, 3)
                    b3 -- bias vector of shape (1, 1)
    keep_prob - probability of keeping a neuron active during drop-out, scalar

    Returns:
    A3 -- last activation value, output of the forward propagation, of shape (1,1)
    cache -- tuple, information stored for computing the backward propagation
    """

    np.random.seed(1)

    # retrieve parameters
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)

    # Step 1: initialize matrix D1 = np.random.rand(..., ...)
    # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold)
    # Step 3: shut down some neurons of A1
    # Step 4: scale the value of neurons that haven't been shut down
    D1 = np.random.rand(A1.shape[0], A1.shape[1]) < keep_prob
    A1 = np.multiply(A1, D1)
    A1/= keep_prob

    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)

    D2 = np.random.rand(A2.shape[0], A2.shape[1]) < keep_prob
    A2 = np.multiply(A2, D2)
    A2 /= keep_prob

    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Esempio n. 12
0
def forward_propagation_with_dropout(X,parameters,keep_prob=0.5):
    """
    实现具有随机舍弃节点的前向传播:
    Linear -> Relu + dropout -> Linear -> Relu + dropout -> Linear -> sigmoid
    
    参数:
        X - 输入数据集。维度为(2,示例数)
        parameters - 包含参数“W1”,"b1","W2","b2","W3","b3"的python字典
            W1  - 权重矩阵,维度为(20,2) ##layers_dims=[2,20,3,1]
            b1 - 偏向量,维度为(20,1)
            W2 - 权重矩阵,维度为(3,20)
            b2 - 偏向量,维度为(3,1)
            W3 - 权重矩阵,维度为(1,3)
            b3 - 偏向量,维度为(1,1)
        keep_prob - 随机删除的概率,实数
    返回:
        A3 - 最后的激活值,维度为(1,1),正向传播的输出
        cache - 存储了一些用于计算反向传播的数值的元组
    """
    np.random.seed(1)
    
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]
    
    #Linear -> Relu -> Linear -> Relu -> Linear -> sigmoid
    Z1 = np.dot(W1,X) + b1
    A1 = reg_utils.relu(Z1)
    
    #下面的步骤1-4对应于上述步骤的1-4
    D1 = np.random.rand(A1.shape[0],A1.shape[1])   #步骤1:初始化矩阵D1
    D1 = D1 < keep_prob                            #步骤2:将D1的值转换为0或者1
    A1 = A1 * D1                                   #步骤3:舍弃A1的一些节点(将它的值变为0或者False)
    A1 = A1 / keep_prob                            #步骤4:缩放未舍弃的节点(不为0)的值
    
    Z2 = np.dot(W2,A1) + b2
    A2 = reg_utils.relu(Z2)

    #下面的步骤1-4对应于上述步骤1-4
    D2 = np.random.rand(A2.shape[0],A2.shape[1])
    D2 = D2 < keep_prob
    A2 = A2 * D2
    A2 = A2 / keep_prob
    
    Z3 = np.dot(W3,A2)+b3
    A3 = reg_utils.sigmoid(Z3)

    cache = (Z1,D1,A1,W1,b1,Z2,D2,A2,W2,b2,Z3,A3,W3,b3)

    return A3,cache    
def forward_propagation_with_dropout(X, parameters, keep_prob):
    """
    Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID.
    
    Arguments:
    X -- input dataset, of shape (2, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
                    W1 -- weight matrix of shape (20, 2)
                    b1 -- bias vector of shape (20, 1)
                    W2 -- weight matrix of shape (3, 20)
                    b2 -- bias vector of shape (3, 1)
                    W3 -- weight matrix of shape (1, 3)
                    b3 -- bias vector of shape (1, 1)
    keep_prob - probability of keeping a neuron active during drop-out, scalar
    
    Returns:
    A3 -- last activation value, output of the forward propagation, of shape (1,1)
    cache -- tuple, information stored for computing the backward propagation
    """

    np.random.seed(1)

    # retrieve parameters
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)

    D1 = np.random.rand(A1.shape[0], A1.shape[1])
    D1 = (D1 < keep_prob)
    A1 = np.multiply(A1, D1)
    A1 = np.divide(A1, keep_prob)

    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)

    D2 = np.random.rand(A2.shape[0], A2.shape[1])
    D2 = (D2 < keep_prob)
    A2 = np.multiply(A2, D2)
    A2 = np.divide(A2, keep_prob)

    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Esempio n. 14
0
def forward_propagation_with_dropout(X, parameters, keep_prob = 0.5):
    """
    Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID.
    
    Arguments:
    X -- input dataset, of shape (2, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
                    W1 -- weight matrix of shape (20, 2)
                    b1 -- bias vector of shape (20, 1)
                    W2 -- weight matrix of shape (3, 20)
                    b2 -- bias vector of shape (3, 1)
                    W3 -- weight matrix of shape (1, 3)
                    b3 -- bias vector of shape (1, 1)
    keep_prob - probability of keeping a neuron active during drop-out, scalar
    
    Returns:
    A3 -- last activation value, output of the forward propagation, of shape (1,1)
    cache -- tuple, information stored for computing the backward propagation
    """
    
    np.random.seed(1)
    
    # retrieve parameters
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]
    
    # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    ### START CODE HERE ### (approx. 4 lines)         # Steps 1-4 below correspond to the Steps 1-4 described above. 
    D1 = np.random.rand(*A1.shape)                                         # Step 1: initialize matrix D1 = np.random.rand(..., ...)
    D1 = D1 < keep_prob                                         # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold)
    A1 = A1 * D1                                         # Step 3: shut down some neurons of A1
    A1 = A1 / keep_prob                                         # Step 4: scale the value of neurons that haven't been shut down
    ### END CODE HERE ###
    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)
    ### START CODE HERE ### (approx. 4 lines)
    D2 = np.random.rand(*A2.shape)                                         # Step 1: initialize matrix D2 = np.random.rand(..., ...)
    D2 = D2 < keep_prob                             # Step 2: convert entries of D2 to 0 or 1 (using keep_prob as the threshold)
    A2 = A2 * D2                                     # Step 3: shut down some neurons of A2
    A2 = A2 / keep_prob                                      # Step 4: scale the value of neurons that haven't been shut down
    ### END CODE HERE ###
    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)
    
    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)
    
    return A3, cache
Esempio n. 15
0
def forward_propagation_with_dropout(X, parameters, keep_prob = 0.5):
    """
    Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID.
    
    Arguments:
    X -- input dataset, of shape (2, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
                    W1 -- weight matrix of shape (20, 2)
                    b1 -- bias vector of shape (20, 1)
                    W2 -- weight matrix of shape (3, 20)
                    b2 -- bias vector of shape (3, 1)
                    W3 -- weight matrix of shape (1, 3)
                    b3 -- bias vector of shape (1, 1)
    keep_prob - probability of keeping a neuron active during drop-out, scalar
    
    Returns:
    A3 -- last activation value, output of the forward propagation, of shape (1,1)
    cache -- tuple, information stored for computing the backward propagation
    """
    np.random.seed(1)
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    
    z1 = np.matmul(W1, X) + b1 # shape (20, 1) 
    a1 = relu(z1)
    
    # 1) d[1] with same shape as a[1] np.random.rand() #
    d1 = np.random.rand(a1.shape[0], a1.shape[1])
    # 2) d[x] > keep_prob (0, 1) #
    d1 = d1 < keep_prob
    # 3) element wise product #
    a1 *= d1
    # 4) divide by keep_prob #
    a1 /= keep_prob
    
    z2 = np.matmul(W2, a1) + b2 # shape (3, 1)
    a2 = relu(z2)
    d2 = np.random.rand(a2.shape[0], a1.shape[1])
    d2 = d2 < keep_prob
    a2 *= d2
    a2 /= keep_prob
    
    z3 = np.matmul(W3, a2) + b3 # shape (1,1)
    a3 = sigmoid(z3)
    
    cache = (z1, d1, a1, W1, b1, z2, d2, a2, W2, b2, z3, a3, W3, b3)
    return a3, cache
Esempio n. 16
0
def forward_propagation_with_dropout(X, parameters, keep_prob):
    """
    实现具有随机舍弃节点的dropout
    LINEAR -> RELU + DROPUOUT -> RELU + DROPOUT -> LINEAR -> SIGMOID
    :param X:输入数据集,维度为(2,示例数)
    :param parameters:
        W1 - (20, 2)
        b1 - (20, 1)
        W2 - (3, 20)
        b2 - (3, 1)
        W3 - (1, 3)
        b3 - (1, 1)
    """
    np.random.seed(1)
    L = len(parameters)

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    Z1 = np.dot(W1, X) + b1
    A1 = reg_utils.relu(Z1)

    # 初始化矩阵D1
    D1 = np.random.rand(A1.shape[0], A1.shape[1])
    # 将D1的值转换为0或1(使用keep_prob作为阈值)
    D1 = D1 < keep_prob
    # 舍弃A1的部分节点
    A1 = A1 * D1
    # 缩放未舍弃的节点
    A1 = A1 / keep_prob

    Z2 = np.dot(W2, A1) + b2
    A2 = reg_utils.relu(Z2)

    D2 = np.random.rand(A2.shape[0], A2.shape[1])
    D2 = D2 < keep_prob
    A2 = A2 * D2
    A2 = A2 / keep_prob

    Z3 = np.dot(W3, A2) + b3
    A3 = reg_utils.sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Esempio n. 17
0
def forward_propagation_with_dropout(X,parameters,keep_prob=0.5):
    """
        实现具有随机舍弃节点的前向传播
        LINEAR->RELU+DROPOUT->LINEAR->RELU+DROPOUT->LINEAR-SIGMOID
    参数:
        X -输入数据集,维度(2,示例数)
        parameters 包含参数W1,b1,W2,b2,W3,b3的python字典
         W1 -权重矩阵,维度(20,2)
         b1 -偏向量,维度(20,1)
         W2 -权重矩阵,维度(3,20)
         b2 -偏向量,维度(3,1)
         W3 -权重矩阵,维度(1,3)
         b3 -偏向量,维度(1,1)
         keep_prob -随机删除节点的概率,实数
    返回:
        A3 -最后的激活值,正向传播的输出
        cache -存储了用于反向传播的数值的元组
    """
    np.random.seed(1)
    W1=parameters["W1"]
    W2=parameters["W2"]
    W3=parameters["W3"]
    b1=parameters["b1"]
    b2=parameters["b2"]
    b3=parameters["b3"]

    Z1=np.dot(W1,X)+b1
    A1=reg_utils.relu(Z1)

    D1=np.random.rand(A1.shape[0],A1.shape[1])
    D1=D1<keep_prob
    A1=A1*D1
    A1=A1/keep_prob

    Z2=np.dot(W2,A1)+b2
    A2=reg_utils.relu(Z2)

    D2=np.random.rand(A2.shape[0],A2.shape[1])
    D2=D2<keep_prob
    A2=A2*D2
    A2=A2/keep_prob

    Z3=np.dot(W3,A2)+b3
    A3=reg_utils.sigmoid(Z3)

    cache=(Z1,D1,A1,W1,b1,Z2,D2,A2,W2,b2,Z3,A3,W3,b3)

    return A3,cache
Esempio n. 18
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    '''
    Implements the forward propagation.

    Arguments:
    X -- input dataset, of shape (2, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
                    W1 -- weight matrix of shape (20, 2)
                    b1 -- bias vector of shape (20, 1)
                    W2 -- weight matrix of shape (3, 20)
                    b2 -- bias vector of shape (3, 1)
                    W3 -- weight matrix of shape (1, 3)
                    b3 -- bias vector of shape (1, 1)
    keep_prob - probability of keeping a neuron active during drop-out, scalar
    
    Returns:
    A3 -- last activation value, output of the forward propagation, of shape (1,1)
    cache -- tuple, information stored for computing the backward propagation
    '''
    np.random.seed(1)

    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']

    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    D1 = np.random.rand(A1.shape[0], A1.shape[1])
    D1 = (D1 < keep_prob)
    A1 = A1 * D1
    A1 = A1 / keep_prob

    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)
    D2 = np.random.rand(A2.shape[0], A2.shape[1])
    D2 = (D2 < keep_prob)
    A2 = A2 * D2
    A2 = A2 / keep_prob

    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    """
    implements the forward propagation: linear -> relu + dropout -> linear -> relu + dropout -> linear -> sigmoid
    
    arguments:
        X -- input dataset,shape(2,number of examples)
        parameters -- dictionary contraining your parameters
        kee_prob - probability of keeping a neuron active during dropout scalar
        
    returns:
        A3 -- last activation value
        cache -- tuple, information stored for computing the backward propagation
    """

    np.random.seed(1)

    #retrieve parameters
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    #start dropout
    D1 = np.random.rand(A1.shape[0], A1.shape[1])
    D1 = D1 < keep_prob
    A1 = A1 * D1
    A1 = A1 / keep_prob

    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)

    D2 = np.random.rand(A2.shape[0], A2.shape[1])
    D2 = D2 < keep_prob
    A2 = A2 * D2
    A2 = A2 / keep_prob

    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Esempio n. 20
0
def forward_propagate_with_reg(X, params, keep_prob=1):
    # retrieve parameters
    W1 = params["W1"]
    b1 = params["b1"]
    W2 = params["W2"]
    b2 = params["b2"]
    W3 = params["W3"]
    b3 = params["b3"]
    # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
    z1 = np.dot(W1, X) + b1
    D1 = np.random.rand(z1.shape[0], z1.shape[1]) < keep_prob
    # 每个样本dropout都不同, 所以不要用boardcast
    a1 = reg_utils.relu(z1) * D1 / keep_prob
    z2 = np.dot(W2, a1) + b2
    D2 = np.random.rand(z2.shape[0], z2.shape[1]) < keep_prob
    a2 = reg_utils.relu(z2) * D2 / keep_prob
    z3 = np.dot(W3, a2) + b3
    a3 = reg_utils.sigmoid(z3)
    cache = (D1, z1, a1, W1, b1, D2, z2, a2, W2, b2, z3, a3, W3, b3)
    return a3, cache
Esempio n. 21
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    """
    Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID.
    
    Arguments:
    X -- input dataset, of shape (2, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    keep_prob - probability of keeping a neuron active during drop-out, scalar
    
    Returns:
    A3 -- last activation value, output of the forward propagation, of shape (1,1)
    cache -- tuple, information stored for computing the backward propagation
    """

    np.random.seed(1)
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    D1 = np.random.rand(A1.shape[0], A1.shape[1]) < keep_prob
    A1 = A1 * D1
    A1 /= keep_prob

    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)
    D2 = np.random.rand(A2.shape[0], A2.shape[1]) < keep_prob
    A2 = A2 * D2
    A2 /= keep_prob

    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)
    return A3, cache
Esempio n. 22
0
def forward_propagation(X, parameters, keep_prob=1.0):
    np.random.seed(1)

    # retrieve parameters
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)

    # dropout
    D1 = np.random.rand(
        A1.shape[0], A1.shape[1]
    )  # Step 1: initialize matrix D1 from uniform distribution [0, 1)
    D1 = D1 < keep_prob  # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold)
    A1 = A1 * D1  # Step 3: shut down some neurons of A1
    A1 = A1 / keep_prob  # Step 4: scale the value of neurons that haven't been shut down

    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)

    # dropout
    D2 = np.random.rand(A2.shape[0], A2.shape[1])
    D2 = D2 < keep_prob
    A2 = A2 * D2
    A2 = A2 / keep_prob

    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
def forward_propagation_with_dropout(X,parameters,keep_prob=0.5):
    """
    在第1层和第2层采用Inverted Dropout
    """
    np.random.seed(1)
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    W3=parameters["W3"]
    b3=parameters["b3"]

    # Linear->ReLU->Linear->ReLU->Linear->Sigmoid
    Z1=np.dot(W1,X)+b1
    A1=reg_utils.relu(Z1)

    # 采用Inverted Dropout
    D1=np.random.rand(A1.shape[0],A1.shape[1])  # 初始化矩阵,与A1具有相同维度。
    D1=D1<keep_prob # 将低于keep_prob的值设置为0,将高于keep_prob的值设置为1
    A1=A1*D1 # 舍弃A1的一些结点,将它的值变为0或False
    A1=A1/keep_prob # 缩放未舍弃的结点的值(这一步最关键,体现了“Inverted”)

    Z2=np.dot(W2,A1)+b2
    A2=reg_utils.relu(Z2)

    # 采用Inverted Dropout
    D2=np.random.rand(A2.shape[0],A2.shape[1])
    D2=D2<keep_prob
    A2=A2*D2
    A2=A2/keep_prob

    Z3=np.dot(W3,A2)+b3
    A3=reg_utils.sigmoid(Z3)

    cache=(Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)
    return A3,cache
Esempio n. 24
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    """
    实现具有随机舍弃节点的前向传播。
    LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID.

    参数:
        X  - 输入数据集,维度为(2,示例数)
        parameters - 包含参数“W1”,“b1”,“W2”,“b2”,“W3”,“b3”的python字典:
            W1  - 权重矩阵,维度为(20,2)
            b1  - 偏向量,维度为(20,1)
            W2  - 权重矩阵,维度为(3,20)
            b2  - 偏向量,维度为(3,1)
            W3  - 权重矩阵,维度为(1,3)
            b3  - 偏向量,维度为(1,1)
        keep_prob  - 随机删除的概率,实数
    返回:
        A3  - 最后的激活值,维度为(1,1),正向传播的输出
        cache - 存储了一些用于计算反向传播的数值的元组
    """
    np.random.seed(1)

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    #LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
    Z1 = np.dot(W1, X) + b1
    A1 = reg_utils.relu(Z1)

    #下面的步骤1-4对应于上述的步骤1-4。
    D1 = np.random.rand(A1.shape[0],
                        A1.shape[1])  #步骤1:初始化矩阵D1 = np.random.rand(..., ...)
    D1 = D1 < keep_prob  #步骤2:将D1的值转换为0或1(使​​用keep_prob作为阈值)
    A1 = A1 * D1  #步骤3:舍弃A1的一些节点(将它的值变为0或False)
    A1 = A1 / keep_prob  #步骤4:缩放未舍弃的节点(不为0)的值
    """
    #不理解的同学运行一下下面代码就知道了。
    import numpy as np
    np.random.seed(1)
    A1 = np.random.randn(1,3)

    D1 = np.random.rand(A1.shape[0],A1.shape[1])
    keep_prob=0.5
    D1 = D1 < keep_prob
    print(D1)

    A1 = 0.01
    A1 = A1 * D1
    A1 = A1 / keep_prob
    print(A1)
    """

    Z2 = np.dot(W2, A1) + b2
    A2 = reg_utils.relu(Z2)

    #下面的步骤1-4对应于上述的步骤1-4。
    D2 = np.random.rand(A2.shape[0],
                        A2.shape[1])  #步骤1:初始化矩阵D2 = np.random.rand(..., ...)
    D2 = D2 < keep_prob  #步骤2:将D2的值转换为0或1(使​​用keep_prob作为阈值)
    A2 = A2 * D2  #步骤3:舍弃A1的一些节点(将它的值变为0或False)
    A2 = A2 / keep_prob  #步骤4:缩放未舍弃的节点(不为0)的值

    Z3 = np.dot(W3, A2) + b3
    A3 = reg_utils.sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Esempio n. 25
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    '''
    实现具有随机舍弃节点的前向传播
    linear->relu+dropout->linear->relu->dropout->linear->sigmoid
    :param X: 输入数据集,维度为(2,示例数)1
    :param parameters: 包含参数'W1','b1','W2','b2','W3','b3'的Python字典
    W1 - 权重矩阵,维度为(20,2)
    b1 - 偏向量,维度为(20,1)
    W2 - 权重矩阵,维度为(3,20)
    b2 - 偏向量,维度为(3,1)
    W3 - 权重矩阵,维度为(1,3)
    b3 - 偏向量,维度为(1,1)
    keep_prob - 随即删除的概率,实数
    
    :param keep_prob: 
    :return:
     A3 - 最后的激活值,维度为(1,1),正向传播的输出
     cache-存储了一些用于反向传播的元组
    '''
    np.random.seed(1)

    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']

    #linear->relu->linear->relu->linear->sigmoid
    Z1 = np.dot(W1, X) + b1
    A1 = reg_utils.relu(Z1)

    #下面的步骤1-4对应于上述的步骤1-4
    D1 = np.random.rand(A1.shape[0],
                        A1.shape[1])  #步骤1:初始化D1 = np.random.rand(...,...)
    D1 = D1 < keep_prob  #步骤2:将D1的值转化为0或1(使用keep_prob作为阈值)
    A1 = A1 * D1  #步骤3:舍弃A1的一些节点(将它的值变为0或False)
    A1 = A1 / keep_prob  #步骤4:缩放未舍弃的节点(不为0)的值
    '''
    不理解的同学运行一下下面的代码就知道了
    import numpy as np
    np.random.seed(1)
    A1 = np.random.randn(1,3)
    
    D1 = np.random.rand(A1.shape[0],A1.shape[1])
    keep_prob = 0.5
    D1 = D1 < keep_prob
    print(D1)
    
    A1 = 0.01
    A1 = A1*D1
    A1 = A1 /keep_prob
    print(A1)
    '''

    Z2 = np.dot(W2, A1) + b2
    A2 = reg_utils.relu(Z2)

    #下面的步骤1-4对应上述的步骤1-4
    D2 = np.random.rand(A2.shape[0],
                        A2.shape[1])  #步骤1:初始化矩阵D2 = nprandom.rand(...,...)
    D2 = D2 < keep_prob  #步骤2:将D2的值转换为0或1(使用keep_prob作为阈值)
    A2 = A2 * D2  #步骤3:舍弃A2的一些节点(将它的值变为0或False)
    A2 = A2 / keep_prob  #步骤4:缩放未舍弃的节点(不为0)的值

    Z3 = np.dot(W3, A2) + b3
    A3 = reg_utils.sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
def forward_propagation_with_dropout(X,parameters,keep_prob=0.5):
    '''
    实现具有随机舍弃节点的前向传播
    linear->relu+dropout->linear->relu->dropout->linear->sigmoid
    :param X: 输入数据集,维度为(2,示例数)1
    :param parameters: 包含参数'W1','b1','W2','b2','W3','b3'的Python字典
    W1 - 权重矩阵,维度为(20,2)
    b1 - 偏向量,维度为(20,1)
    W2 - 权重矩阵,维度为(3,20)
    b2 - 偏向量,维度为(3,1)
    W3 - 权重矩阵,维度为(1,3)
    b3 - 偏向量,维度为(1,1)
    keep_prob - 随即删除的概率,实数
    
    :param keep_prob: 
    :return:
     A3 - 最后的激活值,维度为(1,1),正向传播的输出
     cache-存储了一些用于反向传播的元组
    '''
    np.random.seed(1)

    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']

    #linear->relu->linear->relu->linear->sigmoid
    Z1 = np.dot(W1,X)+b1
    A1 = reg_utils.relu(Z1)

    #下面的步骤1-4对应于上述的步骤1-4
    D1=np.random.rand(A1.shape[0],A1.shape[1])#步骤1:初始化D1 = np.random.rand(...,...)
    D1 = D1<keep_prob                         #步骤2:将D1的值转化为0或1(使用keep_prob作为阈值)
    A1 = A1*D1                                #步骤3:舍弃A1的一些节点(将它的值变为0或False)
    A1 = A1/keep_prob                         #步骤4:缩放未舍弃的节点(不为0)的值
    '''
    不理解的同学运行一下下面的代码就知道了
    import numpy as np
    np.random.seed(1)
    A1 = np.random.randn(1,3)
    
    D1 = np.random.rand(A1.shape[0],A1.shape[1])
    keep_prob = 0.5
    D1 = D1 < keep_prob
    print(D1)
    
    A1 = 0.01
    A1 = A1*D1
    A1 = A1 /keep_prob
    print(A1)
    '''

    Z2 = np.dot(W2,A1)+b2
    A2 = reg_utils.relu(Z2)

    #下面的步骤1-4对应上述的步骤1-4
    D2 = np.random.rand(A2.shape[0],A2.shape[1])  #步骤1:初始化矩阵D2 = nprandom.rand(...,...)
    D2 = D2<keep_prob                             #步骤2:将D2的值转换为0或1(使用keep_prob作为阈值)
    A2 = A2*D2                                    #步骤3:舍弃A2的一些节点(将它的值变为0或False)
    A2 = A2/keep_prob                             #步骤4:缩放未舍弃的节点(不为0)的值

    Z3 = np.dot(W3,A2)+b3
    A3 = reg_utils.sigmoid(Z3)

    cache = (Z1,D1,A1,W1,b1,Z2,D2,A2,W2,b2,Z3,A3,W3,b3)

    return A3,cache
Esempio n. 27
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    """
    实现使用 dropout 正则化的前向传播: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID.
    
    :param X: 输入数据集, of shape (2, number of examples)
    :param parameters: python dictionary,包含参数: "W1", "b1", "W2", "b2", "W3", "b3":
                    W1 -- weight matrix of shape (20, 2)
                    b1 -- bias vector of shape (20, 1)
                    W2 -- weight matrix of shape (3, 20)
                    b2 -- bias vector of shape (3, 1)
                    W3 -- weight matrix of shape (1, 3)
                    b3 -- bias vector of shape (1, 1)
    :param keep_prob: drop-out(随即删除)过程中保留一个神经元的概率
    
    :return A3: 最后一层的激活值, 前向传播的输出, of shape (1,1)
    :return cache: tuple, 存储着用来计算后向传播的信息
    """

    np.random.seed(1)

    # 重新取出参数
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)

    # 随即删除节点(神经元)
    ### START CODE HERE ### (approx. 4 lines)
    D1 = np.random.rand(
        A1.shape[0],
        A1.shape[1])  # Step 1: 初始化矩阵 D (D 与 A 的维度相同,D 为 1 则保留相应的神经元,为 0 则删除)
    D1 = (D1 < keep_prob).astype(
        int)  # Step 2: 将 D1 的值转化为 0 or 1 (使用 keep_prob 作为阈值)
    A1 = A1 * D1  # Step 3: 删除 A1 的一些节点
    A1 = A1 / keep_prob  # Step 4: 将未删除的神经元的值缩放
    ### END CODE HERE ###

    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)
    ### START CODE HERE ### (approx. 4 lines)
    D2 = np.random.rand(
        A2.shape[0],
        A2.shape[1])  # Step 1: initialize matrix D2 = np.random.rand(..., ...)
    D2 = (D2 < keep_prob).astype(
        int
    )  # Step 2: convert entries of D2 to 0 or 1 (using keep_prob as the threshold)
    A2 = A2 * D2  # Step 3: shut down some neurons of A2
    A2 = A2 / keep_prob  # Step 4: scale the value of neurons that haven't been shut down
    ### END CODE HERE ###

    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache