Exemple #1
0
    def linear_activation_forward(self, A_prev, W, b, activation):

        if activation == "sigmoid":
            # Inputs: "A_prev, W, b". Outputs: "A, activation_cache"
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = activations.sigmoid(Z)

        elif activation == "relu":
            # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = activations.relu(Z)

        elif activation == "softmax":
            # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = activations.softmax(Z)

        elif activation == "euler":
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = activations.euler(Z)

        assert (A.shape == (W.shape[0], A_prev.shape[1]))
        cache = (linear_cache, activation_cache)

        return A, cache
Exemple #2
0
def softmax_grad(x):
    """
    Computes the gradient of the softmax function. The softmax takes
    a vector of inputs and computes the jacobian for the partial
    derivatives of the function with respect to the input
        J_{i,j} = \partial f_i( x ) / \partial x_j

    Args:
        x (np.ndarray): The input vector to the softmax during forward
        propogation of shape (M,)

    Returns:
        A Jacobian of shape (M,M) which contains the partial derivatives
    """

    f_x = np.squeeze(softmax(x))
    n = len(f_x)
    mask = np.eye(n, dtype=bool)

    jac = np.zeros((n, n))

    diag_idx, _ = np.where(mask == True)
    jac[mask] = f_x[diag_idx] * (1 - f_x[diag_idx])

    i_idx, k_idx = np.where(mask == False)
    jac[~mask] = -f_x[i_idx] * f_x[k_idx]

    return jac
Exemple #3
0
 def forward(self, x, dropout_prob=None):
     """
     前向传播,针对一个mini-batch处理
     """
     net_inputs = []  # 各层的输入
     net_outputs = []  # 各层激活后的输出
     net_d = []
     # 为了层号对应,将输入层直接添加
     net_inputs.append(x)
     net_outputs.append(x)
     net_d.append(np.ones(x.shape[1:]))  # 输入层无丢弃概率
     for i in range(1, self.weight_num):  # 参数数量比层数少1
         x = x @ self.params['w' + str(i)].T
         net_inputs.append(x)
         x = tanh(x)
         if dropout_prob:
             # 训练阶段丢弃
             x, d_temp = dropout(x, dropout_prob)
             net_d.append(d_temp)
         net_outputs.append(x)
     out = x @ self.params['w' + str(self.weight_num)].T
     net_inputs.append(out)
     out = softmax(out)
     net_outputs.append(out)
     return {
         'net_inputs': net_inputs,
         'net_outputs': net_outputs,
         'd': net_d
     }, out
Exemple #4
0
    def forward_bn(self, x, bn_mode='train'):
        """
        带BN层的前向传播
        """

        net_inputs = []
        net_outputs = []
        caches = []
        net_inputs.append(x)
        net_outputs.append(x)
        caches.append(x)

        for i in range(1, self.weight_num):
            # 所有隐层的输入都进行BN,输入层和输出层不进行BN
            x = x = x @ self.params['w' + str(i)].T
            net_inputs.append(x)
            x, cache = self.batch_norm(x, i,
                                       bn_mode)  # 可以将BN理解为加在隐藏层神经元输入和输出间可训练的一层
            caches.append(cache)
            x = tanh(x)
            net_outputs.append(x)
        out = x @ self.params['w' + str(self.weight_num)].T
        net_inputs.append(out)
        out = softmax(out)
        net_outputs.append(out)

        return {
            'net_inputs': net_inputs,
            'net_outputs': net_outputs,
            'cache': caches
        }, out
Exemple #5
0
    def forward_pass(self, verbose=False, debug=False):
        """
        Computes the values of all units (neurons) over ALL sample points (vectorized).

        Args:
        Returns:
        """
        if (verbose): print("\n _______ Forward Pass _______")

        # Zeroth step: Outputs of input units.
        X = self.X_active

        if (verbose): print("\t X.shape", X.shape)

        # First step: Outputs (H) of hidden units.
        if (verbose): print("\t Computing 1st layer . . . ")
        S_h     = util.withBias(X) @ self.V.T
        H       = activations.relu(S_h)

        # Second step: Outputs (O) of output units.
        if (verbose): print("\t Computing 2nd layer . . . ")
        S_o     = util.withBias(H) @ self.W.T
        O  = activations.softmax(S_o, verbose)

        if debug: pdb.set_trace()

        self.O = O
        return X, S_h, H, S_o, O
Exemple #6
0
    def feedforward(self, s_inst, s_trans):

        y_r = act.sigmoid(s_inst, self.V_r)

        g = act.softmax(s_inst, self.W_g, self.g_strength, self.level_bias)
        g = np.transpose(g)
        l_sel = self.select_level(g)

        y_m = np.zeros((self.L, 1, self.M))
        for l in np.arange(self.L):
            if l == l_sel:
                y_m[l, :, :], self.cumulative_memory[
                    l, :, :] = act.sigmoid_acc_leaky(
                        s_trans, self.V_m[l, :, :],
                        self.cumulative_memory[l, :, :], self.LEAK[l, 0,
                                                                   0], g[l, 0])
            else:
                self.cumulative_memory[l, :, :] *= self.LEAK[l, 0, 0]
                y_m[l, :, :] = act.sigmoidal(self.cumulative_memory[l, :, :])
            print('\t\t\t\t MEMORY_LEVEL ', l, '\t ', y_m[l, :, :])

        inp_h = np.zeros((1, self.H))
        for l in np.arange(self.L):
            inp_h = act.linear(y_m[l, :, :], self.W_m[l, :, :])
        y_h = act.sigmoidal(inp_h)

        Q = act.linear(y_r, self.W_r) + act.linear(y_h, self.W_h)

        return y_r, y_m, y_h, g, l_sel, Q
Exemple #7
0
 def __call__(self, y_true, y_pred):
     if y_true.ndim == y_pred.ndim:
         if self.ignore_value is not None:
             mask = T.neq(y_true, self.ignore_value)
             logit = masking_softmax(y_pred, mask)
             logit = T.clip(logit, 1e-9, 1 - 1e-9)
             log_prob = y_true * T.switch(T.neq(y_true, self.ignore_value),
                                          T.log(logit), 0)
             batch_size = T.sum(y_true * T.neq(y_true, self.ignore_value))
             return T.cast(-T.sum(log_prob) / batch_size, 'floatX')
         else:
             logit = softmax(y_pred)
             logit = T.clip(logit, 1e-9, 1 - 1e-9)
             return -T.mean(y_true * T.log(logit), axis=-1)
     elif y_true.ndim == y_pred.ndim - 1:
         no_classes = y_pred.shape[-1]
         total_dim = 1
         for d in y_pred.shape[:-1]:
             total_dim *= d
         y_pred = y_pred.reshape((-1, no_classes))
         y_true = y_true.reshape((-1, 1))
         if self.ignore_value is not None:
             mask = T.neq(y_true, self.ignore_value)
             logit = masking_softmax(y_pred, mask)
             prob = logit[T.arange(total_dim).dimshuffle(0, 'x'), y_true]
             prob = T.clip(prob, 1e-9, 1 - 1e-9)
             log_prob = T.switch(T.neq(y_true, self.ignore_value),
                                 T.log(prob), 0)
             batch_size = T.sum(T.neq(y_true, self.ignore_value))
         else:
             prob = y_pred[T.arange(total_dim).dimshuffle(0, 'x'), y_true]
             prob = T.clip(prob, 1e-9, 1 - 1e-9)
             log_prob = T.log(prob)
             batch_size = total_dim
         return T.cast(-T.sum(log_prob) / batch_size, 'floatX')
Exemple #8
0
def linear_activation_forward(A_prev, W, b, activation):
    """
	Implement the forward propagation for the LINEAR->ACTIVATION layer

	Arguments:
	A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
	W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
	b -- bias vector, numpy array of shape (size of the current layer, 1)
	activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"

	Returns:
	A -- the output of the activation function, also called the post-activation value 
	cache -- a python dictionary containing "linear_cache" and "activation_cache";
			 stored for computing the backward pass efficiently
	"""

    if activation == "sigmoid":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)

    elif activation == "relu":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)

    elif activation == "softmax":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = softmax(Z)

    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)

    return A, cache
Exemple #9
0
    def feed_forward(self, x):
        output = np.dot(self.weights, x) + self.biases

        self.cache_x = x
        self.cache_output = output

        return softmax(output)
 def feedforward(self, input):
     """Uses softmax to ensure that probabilities over all classes sum to one. Prediction with highest probability returns 1, rest 0."""
     output = softmax(input)
     max_index = np.argmax(output, axis=1)
     output = np.zeros(output.shape)
     output[0, max_index] = 1
     return output
 def forward(self):
     logits = self.logits.get_data()
     labels = self.labels.get_data()
     pred = softmax(logits)
     res = cross_entropy(logits=pred, labels=labels)
     self.out.set_data_(res)
     self.pred.set_data_(pred)
     return self.out
 def forward(self, x, a_prev, c_prev):
     self.gamma_f = sigmoid(np.dot(self.w_f, np.concatenate([a_prev, x])) + self.b_f)
     self.gamma_u = sigmoid(np.dot(self.w_u, np.concatenate([a_prev, x])) + self.b_u)
     self.gamma_o = sigmoid(np.dot(self.w_o, np.concatenate([a_prev, x])) + self.b_o)
     self.c_ = np.tanh(self.w_c * np.concatenate([a_prev, x]) + self.b_c)
     self.c = self.gamma_f * c_prev + self.gamma_u * self.c_
     self.a = self.gamma_o * np.tanh(self.c)
     self.y = softmax(np.dot())
Exemple #13
0
 def activationFunction(self, z):
     if self.activ == Activations.SIGMOID.value:
         return actvtn.sigmoid(z)
     elif self.activ == Activations.SOFTMAX.value:
         return actvtn.softmax(z)
     elif self.activ == Activations.TANH.value:
         return actvtn.tanh(z)
     else:
         return z
Exemple #14
0
 def __one_layer_forward_prop(self, a_prev, layer_index, activation):
     z = np.dot(self.w[layer_index], a_prev) + self.b[layer_index]
     if activation == 'relu':
         return relu(z), z
     elif activation == 'sigmoid':
         return sigmoid(z), z
     elif activation == 'softmax':
         return softmax(z), z
     else:
         raise "Invalid activation: {}".format(activation)
Exemple #15
0
 def _forward_prop(self, x):
     self._activations[0] = x
     for i in range(1, self.num_layers):
         self._zs[i] = (self.weights[i].dot(self._activations[i - 1]) +
                        self.biases[i])
         # Use "softmax" for last layer.
         if i == self.num_layers - 1:
             self._activations[i] = activations.softmax(self._zs[i])
         else:
             self._activations[i] = self.activation_fn(self._zs[i])
	def feedforward(self,s_inst,s_trans):

		g_strength = 3
		f_strength = 3

		y_r = act.sigmoid(s_inst, self.V_r)
		g = act.softmax(s_inst,self.W_g, g_strength)
		g = np.transpose(g)
		f = act.hard_sigmoid(s_inst,self.W_f, f_strength)
		f = np.transpose(f)
Exemple #17
0
def forward(x, w1, b1, w2, b2):
    z1 = x.dot(w1.T) + b1.T
    a1 = relu(z1)
    z2 = a1.dot(w2.T) + b2.T
    a2 = softmax(z2)
    return {
        'a1': a1,
        'z1': z1,
        'a2': a2,
        'z2': z2,
    }
Exemple #18
0
 def _forward_prop(self, x):
     '''
     RUn forward prop.
     '''
     a = np.array(x).reshape((len(x), 1))
     for count, b, w in zip(range(self.num_layers - 1), self.biases,
                            self.weights):
         if count == self.num_layers - 2:
             a = activations.softmax(np.dot(w, a) + b)
         else:
             a = self.activation(np.dot(w, a) + b)
     return a
Exemple #19
0
    def predict_proba(self, X):
        """ Perform the forward propagation.

            :param X: The batch - np.ndarray
            :return: A list of activation values - list of np.ndarray
        """
        X = X.T
        for i in range(len(self.W) - 1):
            X = self.act_func(np.dot(self.W[i], X) + self.B[i])
        X = softmax(np.dot(self.W[-1], X) +
                    self.B[-1])  # softmax on last layer
        return X.T
Exemple #20
0
def one_step_forward(A_prev, W, b, activation):

    Z = np.dot(W, A_prev) + b
    linear_cache = A_prev, W, b
    # print(A_prev.shape,W.shape,b.shape)

    if activation == 'relu':
        A, activation_cache = relu(Z)  # This relu function is for L-1 layers
    if activation == 'softmax':
        A, activation_cache = softmax(
            Z)  # The sigmoid function is for last Lth layer call

    return A, (linear_cache, activation_cache)
Exemple #21
0
    def apply_grad(self, X, Y):
        N, _ = X.shape
        gradients = []
        output = self.model.predict(X)
        self.model.output = output

        for i in xrange(N):
            yi = Y[i:i + 1]
            oi = output[i:i + 1]
            loss = 1.0 / N * (softmax(oi) - yi)
            gradients.append(loss)
        self.model.layers[-1].apply_grad(X, Y, gradients)

        return self(X, Y)
def predict(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']

    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)

    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)

    a3 = np.dot(z2, W3) + b3
    y = softmax(a3)

    return y
Exemple #23
0
    def _back_prop(self, x, y):
        """
        Compute gradients of Cost
        
        Returns:
        * (nabla_b, nabla_w) representing the
        gradient for the cost function C_x.  
        
        nabla_b and nabla_w are similar
        to self.biases and self.weights.
        """

        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        # feedforward
        activation = np.array(x).reshape((len(x), 1))

        # list to store all the activations, layer by layer
        a_ss = [activation]

        # list to store all the z vectors, layer by layer
        zs = []

        count = 0
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            if count == self.num_layers - 2:
                activation = activations.softmax(z)
            else:
                activation = self.activation(z)
            a_ss.append(activation)
            count += 1

        # backward pass
        delta = self.cost_derivative(a_ss[-1], y) * activations.softmax_prime(
            zs[-1])

        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, a_ss[-2].transpose())

        for l in range(2, self.num_layers):
            delta = np.dot(self.weights[-l + 1].transpose(),
                           delta) * self.activation_prime(zs[-l])
            #print(delta)
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, a_ss[-l - 1].transpose())

        return (nabla_b, nabla_w)
    def _build_network(self):
        """Build a 2 hidden layer neural network with ReLU activations
        and a softmax output layer"""

        self.W1 = np.random.random((self.inp_shape, self.hidden_units))
        self.b1 = np.zeros((self.hidden_units, ))
        self.hid1 = lambda x: relu(np.dot(x, self.W1) + self.b1)

        self.W2 = np.random.random((self.hidden_units, self.hidden_units))
        self.b2 = np.zeros((self.hidden_units, ))
        self.hid2 = lambda x: relu(np.dot(x, self.W2) + self.b2)

        self.W3 = np.random.random((self.hidden_units, self.num_classes))
        self.b3 = np.zeros((self.num_classes, ))
        self.hid3 = lambda x: softmax(np.dot(x, self.W3) + self.b3)
Exemple #25
0
def linear_activation_forward(A_prev, W, b, activation):

    Z, Z_cache = linear_forward(A_prev, W, b)

    if activation == 'relu':
        A, A_cache = activations.relu(Z)

    elif activation == 'sigmoid':
        A, A_cache = activations.sigmoid(Z)

    elif activation == 'softmax':
        A_cache = activations.softmax(Z)

    cache = (Z_cache, A_cache)

    return A, cache
    def feedforward(self, s_inst, s_trans):

        y_r = act.sigmoid(s_inst, self.V_r)
        g = act.softmax(s_inst, self.W_g, self.g_strength)
        g = np.transpose(g)

        y_m = 1e-6 * np.ones((self.L, 1, self.M))
        Q = act.linear(y_r, self.W_r)
        for l in np.arange(self.L):
            y_m[l, :, :], self.memory_content[l, :, :] = act.sigmoid_acc_leaky(
                s_trans, self.V_m[l, :, :], self.memory_content[l, :, :],
                1 - g[l, 0], g[l, 0])
            Q += act.linear(y_m[l, :, :], self.W_m[l, :, :])
            #print('\t MEM STATE ',str(l),':', y_m[l,:,:],'\t alpha=',self.ALPHA[l,0,0],'\t gate=',g[l,:],'\t forget=',f[l,:])

        return y_r, y_m, g, Q
Exemple #27
0
    def feedforward(self, inputs):
        """
        Feeds the input through the network layers to the softmax function
        """
        # Hidden Layer 1 
        z1_sum = np.matmul(self.weights['hidden1'].T, inputs) + self.bias['hidden1']
        z1 = activations.sigmoid(z1_sum, 'normal')
        
        # Hidden Layer 2
        # z2 = activations.sigmoid((np.einsum('ij, j->i', self.weights['hidden2'], z1) + self.bias['hidden2']), 'normal')

        # Output Layer
        out_sum = np.matmul(self.weights['out'].T, z1) + self.bias['out']
        prediction = activations.softmax(out_sum, 'normal')

        return prediction, out_sum, z1, z1_sum
def softmax_grad(x):
    #x is a vector
    #returns a jacobian matrix
    f_x = np.squeeze(softmax(x))
    n = len(f_x)
    mask = np.eye(n, dtype=bool)

    jac = np.zeros((n, n))

    diag_idx, _ = np.where(mask == True)
    jac[mask] = f_x[diag_idx] * (1 - f_x[diag_idx])

    i_idx, k_idx = np.where(mask == False)
    jac[~mask] = -f_x[i_idx] * f_x[k_idx]

    return jac
Exemple #29
0
    def output(self, pre_act=False, dropout_active=False):
        X = self.l_in.output(dropout_active=dropout_active)

        is_tensor3_softmax = X.ndim > 2

        shape = X.shape
        if is_tensor3_softmax:  #reshape for tensor3 softmax
            X = X.reshape((shape[0] * shape[1], self.n_in))

        out = activations.softmax(
            T.concatenate([T.zeros(
                (X.shape[0], 1)), T.dot(X, self.w)], axis=1) + self.b)

        if is_tensor3_softmax:  #reshape for tensor3 softmax
            out = out.reshape((shape[0], shape[1], self.size))

        return out
Exemple #30
0
def linear_act_forward(A, W, b, act):
    """
    Implements the linear and activation functions of a single node
    Also, returns the original values for storage in cache
    """
    if act == "sigmoid":
        Z, lin_cache = linear(A, W, b)
        A, act_cache = sigmoid(Z)
    elif act == "relu":
        Z, lin_cache = linear(A, W, b)
        A, act_cache = relu(Z)
    elif act == "softmax":
        Z, lin_cache = linear(A, W, b)
        A, act_cache = softmax(Z)

    cache = (lin_cache, act_cache)

    return A, cache