예제 #1
0
def G_base(name, x, batch):

    if name == 'G1':
        first_out_channels = 128
    else:
        first_out_channels = 16

    with tf.variable_scope(name):
        conv1 = conv('conv1', x, 7 * 7, first_out_channels, 1, 3, False)
        ins1 = ins_norm('ins1', conv1)
        relu1 = relu('relu1', ins1)

        conv2 = conv('conv2', relu1, 3 * 3, first_out_channels * 2, 2, 0, True)
        ins2 = ins_norm('ins2', conv2)
        relu2 = relu('relu2', ins2)

        conv3 = conv('conv3', relu2, 3 * 3, first_out_channels * 4, 2, 0, True)
        ins3 = ins_norm('ins3', conv3)
        relu3 = relu('relu3', ins3)

        conv4 = conv('conv4', relu3, 3 * 3, first_out_channels * 8, 2, 0, True)
        ins4 = ins_norm('ins4', conv4)
        relu4 = relu('relu4', ins4)

        conv5 = conv('conv5', relu4, 3 * 3, first_out_channels * 16, 2, 0,
                     True)
        ins5 = ins_norm('ins5', conv5)
        relu5 = relu('relu5', ins5)

        x_in = relu5

        if name == 'G1':
            for i in range(9):
                name = 'res' + str(i + 1)
                x_in = res_block(name, x_in)

        up1 = conv_trans('up1', x_in, 3 * 3, first_out_channels * 8, 2, batch,
                         True)
        ins_up1 = ins_norm('ins_up1', up1)
        relu_up1 = relu('relu_up1', ins_up1)

        up2 = conv_trans('up2', relu_up1, 3 * 3, first_out_channels * 4, 2,
                         batch, True)
        ins_up2 = ins_norm('ins_up2', up2)
        relu_up2 = relu('relu_up2', ins_up2)

        up3 = conv_trans('up3', relu_up2, 3 * 3, first_out_channels * 2, 2,
                         batch, True)
        ins_up3 = ins_norm('ins_up3', up3)
        relu_up3 = relu('relu_up3', ins_up3)

        up4 = conv_trans('up4', relu_up3, 3 * 3, first_out_channels, 2, batch,
                         True)
        ins_up4 = ins_norm('ins_up4', up4)
        relu_up4 = relu('relu_up4', ins_up4)

        conv_end = conv('conv_end', relu_up4, 7 * 7, 3, 1, 3, False)
        tanh_end = tanh('tanh_end', conv_end)
        return tanh_end, relu_up4
예제 #2
0
def Convolutional_Forward(minibatch_X, parameters_conv, hparameters1,
                          hparameters2, hparameters3, hparameters4):
    Z1, cache_conv1 = conv_forward(minibatch_X, parameters_conv["W1"],
                                   parameters_conv["b1"], hparameters1)
    A1, c1 = relu(Z1)
    P1, cache_pool1 = pool_forward(A1, hparameters2)

    Z2, cache_conv2 = conv_forward(P1, parameters_conv["W2"],
                                   parameters_conv["b2"], hparameters3)
    A2, c2 = relu(Z2)
    P2, cache_pool2 = pool_forward(A2, hparameters4)
    return P2, cache_pool2, cache_pool1, cache_conv1, cache_conv2, c1, c2
예제 #3
0
def res_block(
        name,
        x,
        channels=2048):  # conv('conv1', x, 3*3, 64, 2, ref_pad=1, pad=True)
    with tf.variable_scope(name):
        conv1 = conv('conv1', x, 3 * 3, channels, 1, 1, False)
        conv1_ins = ins_norm('ins1', conv1)
        conv1_relu = relu('relu1', conv1_ins)

        conv2 = conv('conv2', conv1_relu, 3 * 3, channels, 1, 1, False)
        conv2_ins = ins_norm('ins2', conv2)
        conv2_relu = relu('relu2', conv2_ins)
        return conv2_relu + x
예제 #4
0
    def test_forward_propagation_manual(self):
        # Tests forward propagation by comparing to manual calculations.

        # Setup
        data = [2, 2, 2]
        inputs = [1, 2, 3]
        outputs = [7]
        connections = [
            Connection(1, 5),
            Connection(2, 5),
            Connection(2, 4),
            Connection(3, 4),
            Connection(4, 5),
            Connection(4, 6),
            Connection(5, 6),
            Connection(6, 7)
        ]

        w0 = 1.5
        w1 = 2
        w2 = 2.5
        w3 = -1.5
        w4 = -2
        w5 = 3
        w6 = 3
        w7 = 0.5

        connections[0].weight = w0
        connections[1].weight = w1
        connections[2].weight = w2
        connections[3].weight = w3
        connections[4].weight = w4
        connections[5].weight = w5
        connections[6].weight = w6
        connections[7].weight = w7

        # Manual calculations
        a4 = relu((w2 * data[1] + 1) + (w3 * data[2] + 1))
        a5 = relu((w0 * data[0] + 1) + (w1 * data[1] + 1) + (w4 * a4 + 1))
        a6 = relu((w6 * a5 + 1) + (w5 * a4 + 1))
        a7 = sigmoid((w7 * a6 + 1))

        # Run
        graph = Graph()
        a7_actual = graph.forward_propagate(data, inputs, outputs, connections)

        # Assert
        # TODO: Precision problem
        self.assertTrue(abs(a7_actual[1] - a7) < 0.0005)
    def _build_network(self):
        """Build a 2 hidden layer neural network with ReLU activations
        and a softmax output layer"""

        self.W1 = np.random.random((self.inp_shape, self.hidden_units))
        self.b1 = np.zeros((self.hidden_units, ))
        self.hid1 = lambda x: relu(np.dot(x, self.W1) + self.b1)

        self.W2 = np.random.random((self.hidden_units, self.hidden_units))
        self.b2 = np.zeros((self.hidden_units, ))
        self.hid2 = lambda x: relu(np.dot(x, self.W2) + self.b2)

        self.W3 = np.random.random((self.hidden_units, self.num_classes))
        self.b3 = np.zeros((self.num_classes, ))
        self.hid3 = lambda x: softmax(np.dot(x, self.W3) + self.b3)
예제 #6
0
    def forward_pass(self, verbose=False, debug=False):
        """
        Computes the values of all units (neurons) over ALL sample points (vectorized).

        Args:
        Returns:
        """
        if (verbose): print("\n _______ Forward Pass _______")

        # Zeroth step: Outputs of input units.
        X = self.X_active

        if (verbose): print("\t X.shape", X.shape)

        # First step: Outputs (H) of hidden units.
        if (verbose): print("\t Computing 1st layer . . . ")
        S_h     = util.withBias(X) @ self.V.T
        H       = activations.relu(S_h)

        # Second step: Outputs (O) of output units.
        if (verbose): print("\t Computing 2nd layer . . . ")
        S_o     = util.withBias(H) @ self.W.T
        O  = activations.softmax(S_o, verbose)

        if debug: pdb.set_trace()

        self.O = O
        return X, S_h, H, S_o, O
def linear_activation_forward_with_dropout(A_prev,
                                           W,
                                           b,
                                           activation,
                                           keep_prob=0.5):
    # Linear forward step
    Z, linear_cache = linear_forward(A_prev, W, b)

    # Activation forward step
    if activation == 'relu':
        A, activation_cache = relu(Z)

        # Implementing dropout
        D = np.random.rand(A.shape[0], A.shape[1])
        D = (D < keep_prob).astype(
            int
        )  # convert entries of D to 0 or 1 (using keep_prob as the threshold)
        A = A * D  # shut down some neurons of A
        A = np.divide(
            A, keep_prob
        )  # scale the value of neurons that haven't been shut down

        cache = (linear_cache, activation_cache, D)
    elif activation == 'sigmoid':
        A, activation_cache = sigmoid(Z)
        cache = (linear_cache, activation_cache, None)

    return A, cache
예제 #8
0
def main():
    src1, src2 = mpimg.imread('pic.jpg'), mpimg.imread('pic2.jpg')
    src = np.array([src1, src2])
    del src1, src2
    print(src.shape)
    # visualize(src, 3)

    filter = np.array((
        ([-1, 0, 1], [-2, 0, 2], [-1, 0, 1]),
        ([-1, 0, 1], [-2, 0, 2], [-1, 0, 1]),
        ([-1, 0, 1], [-2, 0, 2], [-1, 0, 1]),
    ))

    start = time.time()
    result = convolve(src, filter)
    end = time.time()
    print('Time taken for vectorized: ', end - start)

    result = relu(result)
    plt.imshow(result[0, :, :], cmap='gray')
    plt.show()
    plt.imshow(result[1, :, :], cmap='gray')
    plt.show()

    print(result.shape)
    start = time.time()
    result = max_pool(result, pool_size=2)
    end = time.time()
    print('Time taken Vectorized Max Pool: ', end - start)
    # plt.imshow(result[0], cmap = 'gray')
    # plt.show()
    # plt.imshow(result[1], cmap = 'gray')
    # plt.show()
    print(result.shape)
예제 #9
0
 def forward(self, inputs):
     assert inputs.shape == (inputs, 1)
     s = np.matmul(np.transpose(weights), inputs)
     if activationf == 'Relu':
         self.activation = act.relu(s)
     else:
         self.activation = act.sigmoid(s)
예제 #10
0
def linear_activation_forward(A_prev, W, b, activation):
    """
    Implement the forward propagation for the LINEAR->ACTIVATION layer

    Arguments:
    A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"

    Returns:
    A -- the output of the activation function, also called the post-activation value 
    cache -- a python dictionary containing "linear_cache" and "activation_cache";
             stored for computing the backward pass efficiently
    """
    if activation == "softmax":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = stable_softmax(Z)

    elif activation == "relu":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)

    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)

    return A, cache
예제 #11
0
    def linear_activation_forward(self, A_prev, W, b, activation):

        if activation == "sigmoid":
            # Inputs: "A_prev, W, b". Outputs: "A, activation_cache"
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = activations.sigmoid(Z)

        elif activation == "relu":
            # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = activations.relu(Z)

        elif activation == "softmax":
            # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = activations.softmax(Z)

        elif activation == "euler":
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = activations.euler(Z)

        assert (A.shape == (W.shape[0], A_prev.shape[1]))
        cache = (linear_cache, activation_cache)

        return A, cache
예제 #12
0
 def activation_forward(self, Z, activation="tanh"):
     if activation is "tanh":
         return [Z, tanh(Z)]
     if activation is "relu":
         return [Z, relu(Z)]
     if activation is "sigmoid":
         return [Z, sigmoid(Z)]
예제 #13
0
파일: model.py 프로젝트: Darvaron/ML
def linear_activation_forward(A_prev, W, b, activation):
    '''
    Implementa la propagación hacia adelante Linear->Activación de una capa
    
    Arguments:
    A_prev -- Activación de la capa anterior, de dimensiones (tamaño de la capa anterior, número de ejemplos)
    W -- Pesos de la capa actual, de dimensiones (tamaño de la capa actual, tamaño de la capa anterior)
    b -- sesgos de la capa actual, de dimensiones (tamaño de la capa actual, 1)
    activation -- string con el nombre de la activación que será usada en esta capa: "Relu", "Sigmoid"
    
    Returns:
    A -- Activación de la capa actual, de dimensiones (tamaño de la capa actual, número de ejemplos)
    cache -- python tupla que contiene el cache linear y de la activación
    '''

    if activation == "Sigmoid":
        Z, linear_c = linear_forward(A_prev, W, b)
        A, activacion_c = sigmoid(Z)

    elif activation == "Relu":
        Z, linear_c = linear_forward(A_prev, W, b)
        A, activacion_c = relu(Z)

    cache = (linear_c, activacion_c)

    return A, cache
def forward_prop(X, params):
    W1 = params['W1']
    W2 = params['W2']
    W3 = params['W3']
    b1 = params['b1']
    b2 = params['b2']
    b3 = params['b3']

    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)
    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = [Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3]
    return A3, cache
예제 #15
0
    def build_G(self, x_bound, x_label, x_feat, x_k, x_b):
        with tf.variable_scope('G'):

            # 融合
            x_feat_act = tf.add(tf.multiply(x_feat, x_k), x_b)
            x_concat = tf.concat([x_bound, x_label, x_feat_act], 3)

            #
            input_downsampled = tf.nn.avg_pool(x_concat,
                                               ksize=[1, 3, 3, 1],
                                               strides=[1, 2, 2, 1],
                                               padding="SAME")

            # G1
            _, G1_relu_up4 = G_base('G1', input_downsampled, self.batch)

            # G2_1
            G2_1_conv1 = conv('G2_1_conv1', x_concat, 7 * 7, 64, 1, None, True)
            G2_1_ins1 = ins_norm('G2_1_ins1', G2_1_conv1)
            G2_1_relu1 = relu('G2_1_relu1', G2_1_ins1)

            G2_1_conv2 = conv('G2_1_conv2', G2_1_relu1, 3 * 3, 128, 2, 1,
                              False)
            G2_1_ins2 = ins_norm('G2_1_ins2', G2_1_conv2)
            G2_1_relu2 = relu('G2_1_relu2', G2_1_ins2)

            # 融合G1的输出和G2_1的输出 128
            G_add = tf.add(G1_relu_up4, G2_1_relu2, name='G_Add')

            # G2_2
            # res_block
            for i in range(3):
                name = 'G2_2_res' + str(i + 1)
                G_add = res_block(name, G_add, channels=128)
            #
            G2_2_trans = conv_trans('G2_2_trans', G_add, 3 * 3, 64, 2,
                                    self.batch, True)
            G2_2_ins2 = ins_norm('G2_2_ins2', G2_2_trans)
            G2_2_relu2 = relu('G2_2_relu2', G2_2_ins2)

            # final convolution
            G2_2_conv_end = conv('G2_2_conv_end', G2_2_relu2, 7 * 7, 3, 1,
                                 None, True)
            G2_2_tanh_end = tanh('G2_2_tanh_end', G2_2_conv_end)

            return G2_2_tanh_end
예제 #16
0
 def __one_layer_forward_prop(self, a_prev, layer_index, activation):
     z = np.dot(self.w[layer_index], a_prev) + self.b[layer_index]
     if activation == 'relu':
         return relu(z), z
     elif activation == 'sigmoid':
         return sigmoid(z), z
     elif activation == 'softmax':
         return softmax(z), z
     else:
         raise "Invalid activation: {}".format(activation)
예제 #17
0
    def linear_activation_forward(self, A, W, b, activation):
        Z, linear_cache = self.linear_forward(A, W, b)
        if activation == 'sigmoid':
            A, activation_cache = sigmoid(Z), (linear_cache, Z)
        elif activation == 'relu':
            A, activation_cache = relu(Z), (linear_cache, Z)
        else:
            assert False
        assert A.shape == (W.shape[0], Z.shape[1])  

        return A, activation_cache
예제 #18
0
def one_layer_forward_pass(input_activations, weights, bias, activation='R'):
    output = np.dot(weights, input_activations) + bias

    if activation is 'R':
        activation_next = activations.relu(output)
    elif activation is 'S':
        activation_next = activations.sigmoid(output)
    else:
        raise Exception('Nahh!')

    return activation_next, output
예제 #19
0
def forward(x, w1, b1, w2, b2):
    z1 = x.dot(w1.T) + b1.T
    a1 = relu(z1)
    z2 = a1.dot(w2.T) + b2.T
    a2 = softmax(z2)
    return {
        'a1': a1,
        'z1': z1,
        'a2': a2,
        'z2': z2,
    }
def linear_activation_forward(A_prev, W, b, activation):
    # Linear forward step
    Z, linear_cache = linear_forward(A_prev, W, b)

    # Forward activation step
    if activation == 'relu':
        A, activation_cache = relu(Z)
    elif activation == 'sigmoid':
        A, activation_cache = sigmoid(Z)

    cache = (linear_cache, activation_cache)
    return A, cache
예제 #21
0
def linear_activation_forward(A_prev, W, b, activation):
    Z, linear_cache = linear_forward(A_prev, W, b)

    if activation == 'sigmoid':
        A, activation_cache = sigmoid(Z)
    elif activation == 'relu':
        A, activation_cache = relu(Z)
    else:
        raise KeyError('No activation function found')

    cache = (linear_cache, activation_cache)

    return A, cache
예제 #22
0
def one_step_forward(A_prev, W, b, activation):

    Z = np.dot(W, A_prev) + b
    linear_cache = A_prev, W, b
    # print(A_prev.shape,W.shape,b.shape)

    if activation == 'relu':
        A, activation_cache = relu(Z)  # This relu function is for L-1 layers
    if activation == 'softmax':
        A, activation_cache = softmax(
            Z)  # The sigmoid function is for last Lth layer call

    return A, (linear_cache, activation_cache)
예제 #23
0
def convolution_forward(X, filters, bias):
    """Funckja realizujaca warstwe konwolucyjna z liniowa funkcja aktywacji typu ReLU
    Jest to ta sama f-cja co convolution w pliku splot.py z ta roznica, ze bierze pod uwage macierz progow
    """	
    feature_maps = []
    for i in range(len(filters)):
        feature_map = []
        filter1 = filters[i]
        depth = len(filter1)
        for j in range(depth):
            feature_map.append(signal.convolve2d(X[j], np.rot90(filter1[j],2) ,'valid'))
        feature_map = sum(feature_map) + bias[i]*np.ones((feature_map[0].shape[0], feature_map[0].shape[1]))
        feature_maps.append(act.relu(feature_map))
    return np.asarray(feature_maps)
예제 #24
0
 def forward_prop(self, X):
     #print('X.shape',X.shape)
     A_list = []
     m, n = X.shape
     A = X.T
     A_list.append(A)
     for i in range(len(self.w_list)):
         Z = (np.dot(self.w_list[i], A)) + self.b_list[i]
         if self.activ[i] == 'sigm':
             A = activ.sigmoid(Z)
         elif self.activ[i] == 'relu':
             A = activ.relu(Z)
         A_list.append(A)
     return A_list
예제 #25
0
def linear_activation_forward(A_prev, W, b, activation):

    Z, Z_cache = linear_forward(A_prev, W, b)

    if activation == 'relu':
        A, A_cache = activations.relu(Z)

    elif activation == 'sigmoid':
        A, A_cache = activations.sigmoid(Z)

    elif activation == 'softmax':
        A_cache = activations.softmax(Z)

    cache = (Z_cache, A_cache)

    return A, cache
예제 #26
0
def linear_act_forward(A, W, b, act):
    """
    Implements the linear and activation functions of a single node
    Also, returns the original values for storage in cache
    """
    if act == "sigmoid":
        Z, lin_cache = linear(A, W, b)
        A, act_cache = sigmoid(Z)
    elif act == "relu":
        Z, lin_cache = linear(A, W, b)
        A, act_cache = relu(Z)
    elif act == "softmax":
        Z, lin_cache = linear(A, W, b)
        A, act_cache = softmax(Z)

    cache = (lin_cache, act_cache)

    return A, cache
예제 #27
0
    def activation_forward(self,input,W,b,activation_type):
        '''
        :param input: the input of the current layer
        :param W: the weights of the current layer
        :param b: biases of the current layer
        :param activation_type: Type of activation function used in the forward propagation
        :return: - A --> the output of the activation function
                 - packet_of_packets --> Tuple of 2 elements which will be used in backward propagation :
                     1- linear packer : contains ( input , weights , biases ) of the current layer
                     2- activation packet : contains ( Z ) which is the input to the activation function
        '''
        if activation_type == "sigmoid":
            Z, linear_packet = self.identity_forward(input, W, b) ## Z = input * w + b
            temp=activations.Sigmoid()
            A, activation_packet = temp.forward(Z) ## A = sig(z)

        elif activation_type == "relu":
            Z, linear_packet = self.identity_forward(input, W, b)
            temp = activations.relu()
            A, activation_packet = temp.forward(Z)

        elif activation_type == "leaky_relu":
            Z, linear_packet = self.identity_forward(input, W, b)
            temp = activations.leaky_relu()
            A, activation_packet = temp.forward(Z)
        elif activation_type == "tanh":
            Z, linear_packet = self.identity_forward(input, W, b)
            temp = activations.tanh()
            A, activation_packet = temp.forward(Z)
        elif activation_type == "softmax":
            Z, linear_packet = self.identity_forward(input, W, b)
            #temp =
            A, activation_packet = activations.Softmax().forward(Z)
        elif activation_type == "linear":
            Z, linear_packet = self.identity_forward(input, W, b)
            # temp =
            A, activation_packet = Z,Z

        else:
            raise ValueError("ERROR : Activation Function is Not Determined")

        packet_of_packets = linear_packet, activation_packet
        return A, packet_of_packets
예제 #28
0
def linear_activation_forward(A_prev, W, b, activation):
    # function calculate A od units in single layer

    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)

    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)

    elif activation == "softmax":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = softmax(Z)


    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)

    return A, cache
예제 #29
0
    def forward(self, inputData):
        """Funkcja realizacje propagacje danych w przod
        Wejscie: obraz lub paczka obrazow ze zbioru danych
        Wyjscie: produkt warstwy softmax: 10-dim wektor liczb
        """
        weights = self.Weights
        biases = self.Biases
        poolParams = self.poolParams
        cache = [
        ]  #zmienna przechowujaca produkty warstw - pomocna do propagacji wstecznej
        #warstwa wejsciowa
        layer0 = np.asarray(inputData)
        cache.append(layer0)
        #pierwsza warstwa konwolucyjna
        layer1 = convolution_forward(np.asarray([layer0]), weights[0],
                                     biases[0])
        cache.append(layer1)
        #pierwsza warstwa max poolingu
        layer2 = maxpool_forward(layer1, poolParams[0][0], poolParams[0][1])
        cache.append(layer2)
        #druga warstwa konwolucyjna
        layer3 = convolution_forward(layer2, weights[1], biases[1])
        cache.append(layer3)
        #druga warstwa max poolingu
        layer4 = maxpool_forward(layer3, poolParams[1][0], poolParams[1][1])
        cache.append(layer4)
        #pierwsza warstwa fully connected zrealizowana jako warstwa konwolucyjna
        layer5 = convolution_forward(layer4, weights[2], biases[2])
        cache.append(layer5)
        #druga warstwa fully connected z funkcja aktywacji typu ReLU
        layer6 = act.relu(
            np.dot(weights[3], layer5[:, 0]).transpose() +
            biases[3]).transpose()
        cache.append(layer6)
        #softmax
        layer7 = np.dot(weights[4], layer6[:, 0]).transpose() + biases[4]
        layer7 -= np.max(layer7)
        layer7 = np.exp(layer7) / sum(np.exp(layer7))

        return (layer7, cache)
예제 #30
0
def _step_forward(A_prev, W, b, activation="sigmoid", keep_prob=1):
  # linear
  # Z[l] = W[l] dot A[l-1] + b[l]
  Z = np.dot(W, A_prev) + b
  linear_cache = (A_prev, W, b)
  
  # activation
  # A[l] = g[l](Z[l])
  if activation == "sigmoid":
    A = sigmoid(Z)
  elif activation == "relu":
    A = relu(Z)
  elif activation == "tanh":
    A = np.tanh(Z)

  D = np.random.rand(*A.shape) < keep_prob
  A *= D
  A /= keep_prob
  
  activation_cache = (Z, activation)

  return A, D, (linear_cache, activation_cache)