def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): # 随机关闭各层的神经元(输出层除外) np.random.seed(1) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] # 第一个隐藏层 Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) D1 = np.random.rand(A1.shape[0], A1.shape[1]) # 产生随机的矩阵D1 D1 = D1 < keep_prob # 将D1按照keep_prob来选择将其转化为0/1 A1 = A1 * D1 # Step 3: 关闭A1的一些神经元 A1 = A1 / keep_prob # Step 4: 放大没有关闭的神经元,目的是为了保持输出期望不变 # 第二个隐藏层 Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) D2 = np.random.rand(A2.shape[0], A2.shape[1]) D2 = D2 < keep_prob A2 = A2 * D2 A2 = A2 / keep_prob # 输出层 Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def drop_forward(X, parameters, keep_prob): ''' forward process with dropout A[l] = A[l]*d[l] (d[l] is array with value (0/1) same size as A[l] ''' L = int(len(parameters) / 2) cache = {} cache['A' + str(0)] = X for l in range(1, L): cache['Z' + str(l)] = np.dot( parameters['W' + str(l)], cache['A' + str(l - 1)]) + parameters['b' + str(l)] cache['A' + str(l)] = relu(cache['Z' + str(l)]) cache['d' + str(l)] = (np.random.rand(cache['A' + str(l)].shape[0], cache['A' + str(l)].shape[1]) < keep_prob).astype(int) cache['A' + str(l)] = cache['A' + str(l)] * cache['d' + str(l)] cache['A' + str(l)] = cache['A' + str(l)] / keep_prob cache['W' + str(l)] = parameters['W' + str(l)] cache['b' + str(l)] = parameters['b' + str(l)] cache['Z' + str(L)] = np.dot(parameters['W' + str(L)], cache['A' + str(L - 1)]) + parameters['b' + str(L)] cache['A' + str(L)] = sigmoid(cache['Z' + str(L)]) cache['W' + str(L)] = parameters['W' + str(L)] cache['b' + str(L)] = parameters['b' + str(L)] return cache['A' + str(L)], cache
def forward_propagation_with_dropout(X, parameters, keep_prob = 0.5): np.random.seed(1) # retrieve parameters W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) # Steps 1-4 below correspond to the Steps 1-4 described above. D1 = np.random.rand(A1.shape[0],A1.shape[1]) # Step 1: initialize matrix D1 = np.random.rand(..., ...) D1 = D1 < keep_prob # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold) A1 = A1 * D1 # Step 3: shut down some neurons of A1 A1 = A1 / keep_prob # Step 4: scale the value of neurons that haven't been shut down Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) D2 = np.random.rand(A2.shape[0],A2.shape[1]) # Step 1: initialize matrix D1 = np.random.rand(..., ...) D2 = D2 < keep_prob # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold) A2 = A2 * D2 # Step 3: shut down some neurons of A1 A2 = A2 / keep_prob # Step 4: scale the value of neurons that haven't been shut down Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_drop(X, parameters, keep_prob): np.random.seed(1) i = 1 W = [None] B = [None] cache = [] a = X while 'W' + str(i) in parameters: W.append(parameters['W' + str(i)]) B.append(parameters['b' + str(i)]) i += 1 # 前向传播开始 for j in range(1, i - 1): z = np.dot(W[j], a) + B[j] a = reg_utils.relu(z) D = np.random.rand(a.shape[0], a.shape[1]) D = D < keep_prob a = a * D a /= keep_prob cache.append(z) cache.append(D) cache.append(a) cache.append(W[j]) cache.append(B[j]) z = np.dot(W[i - 1], a) + B[i - 1] a = reg_utils.sigmoid(z) cache.append(z) cache.append(a) cache.append(W[i - 1]) cache.append(B[i - 1]) return a, cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): """ Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID. Arguments: X -- input dataset, of shape (2, number of examples) parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": W1 -- weight matrix of shape (20, 2) b1 -- bias vector of shape (20, 1) W2 -- weight matrix of shape (3, 20) b2 -- bias vector of shape (3, 1) W3 -- weight matrix of shape (1, 3) b3 -- bias vector of shape (1, 1) keep_prob - probability of keeping a neuron active during drop-out, scalar Returns: A3 -- last activation value, output of the forward propagation, of shape (1,1) cache -- tuple, information stored for computing the backward propagation """ # retrieve parameters W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) ### START CODE HERE ### (approx. 4 lines) # Steps 1-4 below correspond to the Steps 1-4 described above. D1 = np.random.rand( A1.shape[0], A1.shape[1]) # Step 1: initialize matrix D1 = np.random.rand(..., ...) D1 = np.where( D1 < keep_prob, 1, 0 ) # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold) A1 = A1 * D1 # Step 3: shut down some neurons of A1 A1 = A1 / keep_prob # Step 4: scale the value of neurons that haven't been shut down ### END CODE HERE ### Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) ### START CODE HERE ### (approx. 4 lines) D2 = np.random.rand( A2.shape[0], A2.shape[1]) # Step 1: initialize matrix D2 = np.random.rand(..., ...) D2 = np.where( D2 < keep_prob, 1, 0 ) # Step 2: convert entries of D2 to 0 or 1 (using keep_prob as the threshold) A2 = A2 * D2 # Step 3: shut down some neurons of A2 A2 = A2 / keep_prob # Step 4: scale the value of neurons that haven't been shut down ### END CODE HERE ### Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): np.random.seed(1) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] Z1 = np.dot(W1, X) + b1 A1 = reg_utils.relu(Z1) # 使用这样的方法来Drop_out D1 = np.random.rand(A1.shape[0], A1.shape[1]) #步骤1:初始化矩阵D2 = np.random.rand(..., ...) D1 = D1 < keep_prob # 步骤2:将D1的值转换为0或1(使用keep_prob作为阈值) A1 = A1 * D1 # 步骤3:舍弃A1的一些节点(将它的值变为0或False) A1 = A1 / keep_prob # 步骤4:缩放未舍弃的节点(不为0)的值 Z2 = np.dot(W2, A1) + b2 A2 = reg_utils.relu(Z2) D2 = np.random.randn(A2.shape[0], A2.shape[1]) D2 = D2 < keep_prob A2 = A2 * D2 A2 = A2 / keep_prob Z3 = np.dot(W3, A2) + b3 A3 = reg_utils.sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): np.random.seed(1) W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] W3 = parameters['W3'] b3 = parameters['b3'] Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) D1 = np.random.rand(A1.shape[0], A1.shape[1]) D1 = D1 < keep_prob A1 = np.multiply(D1, A1) A1 = A1 / keep_prob Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) D2 = np.random.rand(A2.shape[0], A2.shape[1]) D2 = D2 < keep_prob A2 = np.multiply(D2, A2) A2 = A2 / keep_prob Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): np.random.seed(1) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) D1 = np.random.rand(A1.shape[0], A1.shape[1]) D1 = D1 < keep_prob A1 = A1 * D1 A1 = A1 / keep_prob Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) D2 = np.random.rand(A2.shape[0], A2.shape[1]) D2 = D2 < keep_prob A2 = A2 * D2 A2 = A2 / keep_prob Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X, params, keep_prob=0.5): np.random.seed(1) W1, b1 = params["W1"], params["b1"] W2, b2 = params["W2"], params["b2"] W3, b3 = params["W3"], params["b3"] Z1 = np.dot(W1, X) + b1 A1 = reg_utils.relu(Z1) # layer 1 drop out D1 = np.random.rand(A1.shape[0], A1.shape[1]) < keep_prob A1 = np.multiply(A1, D1) A1 = A1 / keep_prob Z2 = np.dot(W2, A1) + b2 A2 = reg_utils.relu(Z2) # layer 2 drop out D2 = np.random.rand(A2.shape[0], A2.shape[1]) < keep_prob A2 = np.multiply(A2, D2) A2 = A2 / keep_prob # No dropout at final layer Z3 = np.dot(W3, A2) + b3 A3 = reg_utils.sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): np.random.seed(1) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) # Steps 1-4 below correspond to the Steps 1-4 described above. D1 = np.random.rand( A1.shape[0], A1.shape[1]) # Step 1: initialize matrix D1 = np.random.rand(..., ...) D1 = D1 < keep_prob # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold) A1 = A1 * D1 # Step 3: shut down some neurons of A1 A1 = np.divide( A1, keep_prob ) # Step 4: scale the value of neurons that haven't been shut down """ import numpy as np >>> a=np.random.rand(2,3) >>> a array([[ 0.01838459, 0.70233192, 0.53293226], [ 0.81863108, 0.74747237, 0.05302554]]) >>> a=a<0.5 >>> a array([[ True, False, False], [False, False, True]], dtype=bool) >>> b=a*0.3 >>> b array([[ 0.3, 0. , 0. ], [ 0. , 0. , 0.3]]) >>> b=np.divide(b,0.1) >>> b array([[ 3., 0., 0.], [ 0., 0., 3.]]) """ Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) D2 = np.random.rand( A2.shape[0], A2.shape[1]) # Step 1: initialize matrix D2 = np.random.rand(..., ...) D2 = D2 < keep_prob # Step 2: convert entries of D2 to 0 or 1 (using keep_prob as the threshold) A2 = A2 * D2 # Step 3: shut down some neurons of A2 A2 = A2 / keep_prob # Step 4: scale the value of neurons that haven't been shut down Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): """ Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID. Arguments: X -- input dataset, of shape (2, number of examples) parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": W1 -- weight matrix of shape (20, 2) b1 -- bias vector of shape (20, 1) W2 -- weight matrix of shape (3, 20) b2 -- bias vector of shape (3, 1) W3 -- weight matrix of shape (1, 3) b3 -- bias vector of shape (1, 1) keep_prob - probability of keeping a neuron active during drop-out, scalar Returns: A3 -- last activation value, output of the forward propagation, of shape (1,1) cache -- tuple, information stored for computing the backward propagation """ np.random.seed(1) # retrieve parameters W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) # Step 1: initialize matrix D1 = np.random.rand(..., ...) # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold) # Step 3: shut down some neurons of A1 # Step 4: scale the value of neurons that haven't been shut down D1 = np.random.rand(A1.shape[0], A1.shape[1]) < keep_prob A1 = np.multiply(A1, D1) A1/= keep_prob Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) D2 = np.random.rand(A2.shape[0], A2.shape[1]) < keep_prob A2 = np.multiply(A2, D2) A2 /= keep_prob Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X,parameters,keep_prob=0.5): """ 实现具有随机舍弃节点的前向传播: Linear -> Relu + dropout -> Linear -> Relu + dropout -> Linear -> sigmoid 参数: X - 输入数据集。维度为(2,示例数) parameters - 包含参数“W1”,"b1","W2","b2","W3","b3"的python字典 W1 - 权重矩阵,维度为(20,2) ##layers_dims=[2,20,3,1] b1 - 偏向量,维度为(20,1) W2 - 权重矩阵,维度为(3,20) b2 - 偏向量,维度为(3,1) W3 - 权重矩阵,维度为(1,3) b3 - 偏向量,维度为(1,1) keep_prob - 随机删除的概率,实数 返回: A3 - 最后的激活值,维度为(1,1),正向传播的输出 cache - 存储了一些用于计算反向传播的数值的元组 """ np.random.seed(1) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] #Linear -> Relu -> Linear -> Relu -> Linear -> sigmoid Z1 = np.dot(W1,X) + b1 A1 = reg_utils.relu(Z1) #下面的步骤1-4对应于上述步骤的1-4 D1 = np.random.rand(A1.shape[0],A1.shape[1]) #步骤1:初始化矩阵D1 D1 = D1 < keep_prob #步骤2:将D1的值转换为0或者1 A1 = A1 * D1 #步骤3:舍弃A1的一些节点(将它的值变为0或者False) A1 = A1 / keep_prob #步骤4:缩放未舍弃的节点(不为0)的值 Z2 = np.dot(W2,A1) + b2 A2 = reg_utils.relu(Z2) #下面的步骤1-4对应于上述步骤1-4 D2 = np.random.rand(A2.shape[0],A2.shape[1]) D2 = D2 < keep_prob A2 = A2 * D2 A2 = A2 / keep_prob Z3 = np.dot(W3,A2)+b3 A3 = reg_utils.sigmoid(Z3) cache = (Z1,D1,A1,W1,b1,Z2,D2,A2,W2,b2,Z3,A3,W3,b3) return A3,cache
def forward_propagation_with_dropout(X, parameters, keep_prob): """ Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID. Arguments: X -- input dataset, of shape (2, number of examples) parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": W1 -- weight matrix of shape (20, 2) b1 -- bias vector of shape (20, 1) W2 -- weight matrix of shape (3, 20) b2 -- bias vector of shape (3, 1) W3 -- weight matrix of shape (1, 3) b3 -- bias vector of shape (1, 1) keep_prob - probability of keeping a neuron active during drop-out, scalar Returns: A3 -- last activation value, output of the forward propagation, of shape (1,1) cache -- tuple, information stored for computing the backward propagation """ np.random.seed(1) # retrieve parameters W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) D1 = np.random.rand(A1.shape[0], A1.shape[1]) D1 = (D1 < keep_prob) A1 = np.multiply(A1, D1) A1 = np.divide(A1, keep_prob) Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) D2 = np.random.rand(A2.shape[0], A2.shape[1]) D2 = (D2 < keep_prob) A2 = np.multiply(A2, D2) A2 = np.divide(A2, keep_prob) Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob = 0.5): """ Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID. Arguments: X -- input dataset, of shape (2, number of examples) parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": W1 -- weight matrix of shape (20, 2) b1 -- bias vector of shape (20, 1) W2 -- weight matrix of shape (3, 20) b2 -- bias vector of shape (3, 1) W3 -- weight matrix of shape (1, 3) b3 -- bias vector of shape (1, 1) keep_prob - probability of keeping a neuron active during drop-out, scalar Returns: A3 -- last activation value, output of the forward propagation, of shape (1,1) cache -- tuple, information stored for computing the backward propagation """ np.random.seed(1) # retrieve parameters W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) ### START CODE HERE ### (approx. 4 lines) # Steps 1-4 below correspond to the Steps 1-4 described above. D1 = np.random.rand(*A1.shape) # Step 1: initialize matrix D1 = np.random.rand(..., ...) D1 = D1 < keep_prob # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold) A1 = A1 * D1 # Step 3: shut down some neurons of A1 A1 = A1 / keep_prob # Step 4: scale the value of neurons that haven't been shut down ### END CODE HERE ### Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) ### START CODE HERE ### (approx. 4 lines) D2 = np.random.rand(*A2.shape) # Step 1: initialize matrix D2 = np.random.rand(..., ...) D2 = D2 < keep_prob # Step 2: convert entries of D2 to 0 or 1 (using keep_prob as the threshold) A2 = A2 * D2 # Step 3: shut down some neurons of A2 A2 = A2 / keep_prob # Step 4: scale the value of neurons that haven't been shut down ### END CODE HERE ### Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob = 0.5): """ Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID. Arguments: X -- input dataset, of shape (2, number of examples) parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": W1 -- weight matrix of shape (20, 2) b1 -- bias vector of shape (20, 1) W2 -- weight matrix of shape (3, 20) b2 -- bias vector of shape (3, 1) W3 -- weight matrix of shape (1, 3) b3 -- bias vector of shape (1, 1) keep_prob - probability of keeping a neuron active during drop-out, scalar Returns: A3 -- last activation value, output of the forward propagation, of shape (1,1) cache -- tuple, information stored for computing the backward propagation """ np.random.seed(1) W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] W3 = parameters['W3'] b3 = parameters['b3'] z1 = np.matmul(W1, X) + b1 # shape (20, 1) a1 = relu(z1) # 1) d[1] with same shape as a[1] np.random.rand() # d1 = np.random.rand(a1.shape[0], a1.shape[1]) # 2) d[x] > keep_prob (0, 1) # d1 = d1 < keep_prob # 3) element wise product # a1 *= d1 # 4) divide by keep_prob # a1 /= keep_prob z2 = np.matmul(W2, a1) + b2 # shape (3, 1) a2 = relu(z2) d2 = np.random.rand(a2.shape[0], a1.shape[1]) d2 = d2 < keep_prob a2 *= d2 a2 /= keep_prob z3 = np.matmul(W3, a2) + b3 # shape (1,1) a3 = sigmoid(z3) cache = (z1, d1, a1, W1, b1, z2, d2, a2, W2, b2, z3, a3, W3, b3) return a3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob): """ 实现具有随机舍弃节点的dropout LINEAR -> RELU + DROPUOUT -> RELU + DROPOUT -> LINEAR -> SIGMOID :param X:输入数据集,维度为(2,示例数) :param parameters: W1 - (20, 2) b1 - (20, 1) W2 - (3, 20) b2 - (3, 1) W3 - (1, 3) b3 - (1, 1) """ np.random.seed(1) L = len(parameters) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] Z1 = np.dot(W1, X) + b1 A1 = reg_utils.relu(Z1) # 初始化矩阵D1 D1 = np.random.rand(A1.shape[0], A1.shape[1]) # 将D1的值转换为0或1(使用keep_prob作为阈值) D1 = D1 < keep_prob # 舍弃A1的部分节点 A1 = A1 * D1 # 缩放未舍弃的节点 A1 = A1 / keep_prob Z2 = np.dot(W2, A1) + b2 A2 = reg_utils.relu(Z2) D2 = np.random.rand(A2.shape[0], A2.shape[1]) D2 = D2 < keep_prob A2 = A2 * D2 A2 = A2 / keep_prob Z3 = np.dot(W3, A2) + b3 A3 = reg_utils.sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X,parameters,keep_prob=0.5): """ 实现具有随机舍弃节点的前向传播 LINEAR->RELU+DROPOUT->LINEAR->RELU+DROPOUT->LINEAR-SIGMOID 参数: X -输入数据集,维度(2,示例数) parameters 包含参数W1,b1,W2,b2,W3,b3的python字典 W1 -权重矩阵,维度(20,2) b1 -偏向量,维度(20,1) W2 -权重矩阵,维度(3,20) b2 -偏向量,维度(3,1) W3 -权重矩阵,维度(1,3) b3 -偏向量,维度(1,1) keep_prob -随机删除节点的概率,实数 返回: A3 -最后的激活值,正向传播的输出 cache -存储了用于反向传播的数值的元组 """ np.random.seed(1) W1=parameters["W1"] W2=parameters["W2"] W3=parameters["W3"] b1=parameters["b1"] b2=parameters["b2"] b3=parameters["b3"] Z1=np.dot(W1,X)+b1 A1=reg_utils.relu(Z1) D1=np.random.rand(A1.shape[0],A1.shape[1]) D1=D1<keep_prob A1=A1*D1 A1=A1/keep_prob Z2=np.dot(W2,A1)+b2 A2=reg_utils.relu(Z2) D2=np.random.rand(A2.shape[0],A2.shape[1]) D2=D2<keep_prob A2=A2*D2 A2=A2/keep_prob Z3=np.dot(W3,A2)+b3 A3=reg_utils.sigmoid(Z3) cache=(Z1,D1,A1,W1,b1,Z2,D2,A2,W2,b2,Z3,A3,W3,b3) return A3,cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): ''' Implements the forward propagation. Arguments: X -- input dataset, of shape (2, number of examples) parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": W1 -- weight matrix of shape (20, 2) b1 -- bias vector of shape (20, 1) W2 -- weight matrix of shape (3, 20) b2 -- bias vector of shape (3, 1) W3 -- weight matrix of shape (1, 3) b3 -- bias vector of shape (1, 1) keep_prob - probability of keeping a neuron active during drop-out, scalar Returns: A3 -- last activation value, output of the forward propagation, of shape (1,1) cache -- tuple, information stored for computing the backward propagation ''' np.random.seed(1) W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] W3 = parameters['W3'] b3 = parameters['b3'] Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) D1 = np.random.rand(A1.shape[0], A1.shape[1]) D1 = (D1 < keep_prob) A1 = A1 * D1 A1 = A1 / keep_prob Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) D2 = np.random.rand(A2.shape[0], A2.shape[1]) D2 = (D2 < keep_prob) A2 = A2 * D2 A2 = A2 / keep_prob Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): """ implements the forward propagation: linear -> relu + dropout -> linear -> relu + dropout -> linear -> sigmoid arguments: X -- input dataset,shape(2,number of examples) parameters -- dictionary contraining your parameters kee_prob - probability of keeping a neuron active during dropout scalar returns: A3 -- last activation value cache -- tuple, information stored for computing the backward propagation """ np.random.seed(1) #retrieve parameters W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) #start dropout D1 = np.random.rand(A1.shape[0], A1.shape[1]) D1 = D1 < keep_prob A1 = A1 * D1 A1 = A1 / keep_prob Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) D2 = np.random.rand(A2.shape[0], A2.shape[1]) D2 = D2 < keep_prob A2 = A2 * D2 A2 = A2 / keep_prob Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagate_with_reg(X, params, keep_prob=1): # retrieve parameters W1 = params["W1"] b1 = params["b1"] W2 = params["W2"] b2 = params["b2"] W3 = params["W3"] b3 = params["b3"] # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID z1 = np.dot(W1, X) + b1 D1 = np.random.rand(z1.shape[0], z1.shape[1]) < keep_prob # 每个样本dropout都不同, 所以不要用boardcast a1 = reg_utils.relu(z1) * D1 / keep_prob z2 = np.dot(W2, a1) + b2 D2 = np.random.rand(z2.shape[0], z2.shape[1]) < keep_prob a2 = reg_utils.relu(z2) * D2 / keep_prob z3 = np.dot(W3, a2) + b3 a3 = reg_utils.sigmoid(z3) cache = (D1, z1, a1, W1, b1, D2, z2, a2, W2, b2, z3, a3, W3, b3) return a3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): """ Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID. Arguments: X -- input dataset, of shape (2, number of examples) parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3": keep_prob - probability of keeping a neuron active during drop-out, scalar Returns: A3 -- last activation value, output of the forward propagation, of shape (1,1) cache -- tuple, information stored for computing the backward propagation """ np.random.seed(1) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) D1 = np.random.rand(A1.shape[0], A1.shape[1]) < keep_prob A1 = A1 * D1 A1 /= keep_prob Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) D2 = np.random.rand(A2.shape[0], A2.shape[1]) < keep_prob A2 = A2 * D2 A2 /= keep_prob Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation(X, parameters, keep_prob=1.0): np.random.seed(1) # retrieve parameters W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) # dropout D1 = np.random.rand( A1.shape[0], A1.shape[1] ) # Step 1: initialize matrix D1 from uniform distribution [0, 1) D1 = D1 < keep_prob # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold) A1 = A1 * D1 # Step 3: shut down some neurons of A1 A1 = A1 / keep_prob # Step 4: scale the value of neurons that haven't been shut down Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) # dropout D2 = np.random.rand(A2.shape[0], A2.shape[1]) D2 = D2 < keep_prob A2 = A2 * D2 A2 = A2 / keep_prob Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X,parameters,keep_prob=0.5): """ 在第1层和第2层采用Inverted Dropout """ np.random.seed(1) W1=parameters["W1"] b1=parameters["b1"] W2=parameters["W2"] b2=parameters["b2"] W3=parameters["W3"] b3=parameters["b3"] # Linear->ReLU->Linear->ReLU->Linear->Sigmoid Z1=np.dot(W1,X)+b1 A1=reg_utils.relu(Z1) # 采用Inverted Dropout D1=np.random.rand(A1.shape[0],A1.shape[1]) # 初始化矩阵,与A1具有相同维度。 D1=D1<keep_prob # 将低于keep_prob的值设置为0,将高于keep_prob的值设置为1 A1=A1*D1 # 舍弃A1的一些结点,将它的值变为0或False A1=A1/keep_prob # 缩放未舍弃的结点的值(这一步最关键,体现了“Inverted”) Z2=np.dot(W2,A1)+b2 A2=reg_utils.relu(Z2) # 采用Inverted Dropout D2=np.random.rand(A2.shape[0],A2.shape[1]) D2=D2<keep_prob A2=A2*D2 A2=A2/keep_prob Z3=np.dot(W3,A2)+b3 A3=reg_utils.sigmoid(Z3) cache=(Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3,cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): """ 实现具有随机舍弃节点的前向传播。 LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID. 参数: X - 输入数据集,维度为(2,示例数) parameters - 包含参数“W1”,“b1”,“W2”,“b2”,“W3”,“b3”的python字典: W1 - 权重矩阵,维度为(20,2) b1 - 偏向量,维度为(20,1) W2 - 权重矩阵,维度为(3,20) b2 - 偏向量,维度为(3,1) W3 - 权重矩阵,维度为(1,3) b3 - 偏向量,维度为(1,1) keep_prob - 随机删除的概率,实数 返回: A3 - 最后的激活值,维度为(1,1),正向传播的输出 cache - 存储了一些用于计算反向传播的数值的元组 """ np.random.seed(1) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] #LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID Z1 = np.dot(W1, X) + b1 A1 = reg_utils.relu(Z1) #下面的步骤1-4对应于上述的步骤1-4。 D1 = np.random.rand(A1.shape[0], A1.shape[1]) #步骤1:初始化矩阵D1 = np.random.rand(..., ...) D1 = D1 < keep_prob #步骤2:将D1的值转换为0或1(使用keep_prob作为阈值) A1 = A1 * D1 #步骤3:舍弃A1的一些节点(将它的值变为0或False) A1 = A1 / keep_prob #步骤4:缩放未舍弃的节点(不为0)的值 """ #不理解的同学运行一下下面代码就知道了。 import numpy as np np.random.seed(1) A1 = np.random.randn(1,3) D1 = np.random.rand(A1.shape[0],A1.shape[1]) keep_prob=0.5 D1 = D1 < keep_prob print(D1) A1 = 0.01 A1 = A1 * D1 A1 = A1 / keep_prob print(A1) """ Z2 = np.dot(W2, A1) + b2 A2 = reg_utils.relu(Z2) #下面的步骤1-4对应于上述的步骤1-4。 D2 = np.random.rand(A2.shape[0], A2.shape[1]) #步骤1:初始化矩阵D2 = np.random.rand(..., ...) D2 = D2 < keep_prob #步骤2:将D2的值转换为0或1(使用keep_prob作为阈值) A2 = A2 * D2 #步骤3:舍弃A1的一些节点(将它的值变为0或False) A2 = A2 / keep_prob #步骤4:缩放未舍弃的节点(不为0)的值 Z3 = np.dot(W3, A2) + b3 A3 = reg_utils.sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): ''' 实现具有随机舍弃节点的前向传播 linear->relu+dropout->linear->relu->dropout->linear->sigmoid :param X: 输入数据集,维度为(2,示例数)1 :param parameters: 包含参数'W1','b1','W2','b2','W3','b3'的Python字典 W1 - 权重矩阵,维度为(20,2) b1 - 偏向量,维度为(20,1) W2 - 权重矩阵,维度为(3,20) b2 - 偏向量,维度为(3,1) W3 - 权重矩阵,维度为(1,3) b3 - 偏向量,维度为(1,1) keep_prob - 随即删除的概率,实数 :param keep_prob: :return: A3 - 最后的激活值,维度为(1,1),正向传播的输出 cache-存储了一些用于反向传播的元组 ''' np.random.seed(1) W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] W3 = parameters['W3'] b3 = parameters['b3'] #linear->relu->linear->relu->linear->sigmoid Z1 = np.dot(W1, X) + b1 A1 = reg_utils.relu(Z1) #下面的步骤1-4对应于上述的步骤1-4 D1 = np.random.rand(A1.shape[0], A1.shape[1]) #步骤1:初始化D1 = np.random.rand(...,...) D1 = D1 < keep_prob #步骤2:将D1的值转化为0或1(使用keep_prob作为阈值) A1 = A1 * D1 #步骤3:舍弃A1的一些节点(将它的值变为0或False) A1 = A1 / keep_prob #步骤4:缩放未舍弃的节点(不为0)的值 ''' 不理解的同学运行一下下面的代码就知道了 import numpy as np np.random.seed(1) A1 = np.random.randn(1,3) D1 = np.random.rand(A1.shape[0],A1.shape[1]) keep_prob = 0.5 D1 = D1 < keep_prob print(D1) A1 = 0.01 A1 = A1*D1 A1 = A1 /keep_prob print(A1) ''' Z2 = np.dot(W2, A1) + b2 A2 = reg_utils.relu(Z2) #下面的步骤1-4对应上述的步骤1-4 D2 = np.random.rand(A2.shape[0], A2.shape[1]) #步骤1:初始化矩阵D2 = nprandom.rand(...,...) D2 = D2 < keep_prob #步骤2:将D2的值转换为0或1(使用keep_prob作为阈值) A2 = A2 * D2 #步骤3:舍弃A2的一些节点(将它的值变为0或False) A2 = A2 / keep_prob #步骤4:缩放未舍弃的节点(不为0)的值 Z3 = np.dot(W3, A2) + b3 A3 = reg_utils.sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def forward_propagation_with_dropout(X,parameters,keep_prob=0.5): ''' 实现具有随机舍弃节点的前向传播 linear->relu+dropout->linear->relu->dropout->linear->sigmoid :param X: 输入数据集,维度为(2,示例数)1 :param parameters: 包含参数'W1','b1','W2','b2','W3','b3'的Python字典 W1 - 权重矩阵,维度为(20,2) b1 - 偏向量,维度为(20,1) W2 - 权重矩阵,维度为(3,20) b2 - 偏向量,维度为(3,1) W3 - 权重矩阵,维度为(1,3) b3 - 偏向量,维度为(1,1) keep_prob - 随即删除的概率,实数 :param keep_prob: :return: A3 - 最后的激活值,维度为(1,1),正向传播的输出 cache-存储了一些用于反向传播的元组 ''' np.random.seed(1) W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] W3 = parameters['W3'] b3 = parameters['b3'] #linear->relu->linear->relu->linear->sigmoid Z1 = np.dot(W1,X)+b1 A1 = reg_utils.relu(Z1) #下面的步骤1-4对应于上述的步骤1-4 D1=np.random.rand(A1.shape[0],A1.shape[1])#步骤1:初始化D1 = np.random.rand(...,...) D1 = D1<keep_prob #步骤2:将D1的值转化为0或1(使用keep_prob作为阈值) A1 = A1*D1 #步骤3:舍弃A1的一些节点(将它的值变为0或False) A1 = A1/keep_prob #步骤4:缩放未舍弃的节点(不为0)的值 ''' 不理解的同学运行一下下面的代码就知道了 import numpy as np np.random.seed(1) A1 = np.random.randn(1,3) D1 = np.random.rand(A1.shape[0],A1.shape[1]) keep_prob = 0.5 D1 = D1 < keep_prob print(D1) A1 = 0.01 A1 = A1*D1 A1 = A1 /keep_prob print(A1) ''' Z2 = np.dot(W2,A1)+b2 A2 = reg_utils.relu(Z2) #下面的步骤1-4对应上述的步骤1-4 D2 = np.random.rand(A2.shape[0],A2.shape[1]) #步骤1:初始化矩阵D2 = nprandom.rand(...,...) D2 = D2<keep_prob #步骤2:将D2的值转换为0或1(使用keep_prob作为阈值) A2 = A2*D2 #步骤3:舍弃A2的一些节点(将它的值变为0或False) A2 = A2/keep_prob #步骤4:缩放未舍弃的节点(不为0)的值 Z3 = np.dot(W3,A2)+b3 A3 = reg_utils.sigmoid(Z3) cache = (Z1,D1,A1,W1,b1,Z2,D2,A2,W2,b2,Z3,A3,W3,b3) return A3,cache
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): """ 实现使用 dropout 正则化的前向传播: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID. :param X: 输入数据集, of shape (2, number of examples) :param parameters: python dictionary,包含参数: "W1", "b1", "W2", "b2", "W3", "b3": W1 -- weight matrix of shape (20, 2) b1 -- bias vector of shape (20, 1) W2 -- weight matrix of shape (3, 20) b2 -- bias vector of shape (3, 1) W3 -- weight matrix of shape (1, 3) b3 -- bias vector of shape (1, 1) :param keep_prob: drop-out(随即删除)过程中保留一个神经元的概率 :return A3: 最后一层的激活值, 前向传播的输出, of shape (1,1) :return cache: tuple, 存储着用来计算后向传播的信息 """ np.random.seed(1) # 重新取出参数 W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) # 随即删除节点(神经元) ### START CODE HERE ### (approx. 4 lines) D1 = np.random.rand( A1.shape[0], A1.shape[1]) # Step 1: 初始化矩阵 D (D 与 A 的维度相同,D 为 1 则保留相应的神经元,为 0 则删除) D1 = (D1 < keep_prob).astype( int) # Step 2: 将 D1 的值转化为 0 or 1 (使用 keep_prob 作为阈值) A1 = A1 * D1 # Step 3: 删除 A1 的一些节点 A1 = A1 / keep_prob # Step 4: 将未删除的神经元的值缩放 ### END CODE HERE ### Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) ### START CODE HERE ### (approx. 4 lines) D2 = np.random.rand( A2.shape[0], A2.shape[1]) # Step 1: initialize matrix D2 = np.random.rand(..., ...) D2 = (D2 < keep_prob).astype( int ) # Step 2: convert entries of D2 to 0 or 1 (using keep_prob as the threshold) A2 = A2 * D2 # Step 3: shut down some neurons of A2 A2 = A2 / keep_prob # Step 4: scale the value of neurons that haven't been shut down ### END CODE HERE ### Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache