Example #1
0
def mlp_loss(weights, X, y, reg):
    """
    Compute loss and gradients of the neutral network.
    """

    L = len(weights)  # The index of the output layer
    z = []
    a = []

    err_tol = 1e-10  # Error of tolerance

    # Number of samples
    m = X.shape[0]

    # Forward pass
    z.append(0)  # Dummy element
    a.append(X)  # Input activation

    for i in range(0, L):
        W = weights[i]['W']
        b = weights[i]['b']
        z.append(np.dot(a[-1], W) + b)
        a.append(ac_func(
            z[-1]))  # Note the final element in a[:] will not be used

    zL_max = np.max(z[-1], axis=1, keepdims=True)
    z[-1] -= zL_max  # Avoid numerical problem due to large values of exp(z[-1])
    proba = np.exp(z[-1]) / np.sum(
        np.exp(z[-1]), axis=1, keepdims=True
    ) + err_tol  # Add err_tol to avoid this value too close to zero

    # Target matrix of labels
    Y = to_binary_class_matrix(y)

    # loss function
    sum_squared_weights = 0.0  # Sum of squared weights
    for i in range(L):
        W = weights[i]['W']
        sum_squared_weights += np.sum(W * W)

    loss = -1.0 / m * np.sum(
        Y * np.log(proba)) + 0.5 * reg * sum_squared_weights

    # Backpropagation
    delta = [-1.0 * (Y - proba)]
    for i in reversed(range(L)):  # Note that delta[0] will not be used
        W = weights[i]['W']
        d = np.dot(delta[0], W.T) * ac_func_deriv(z[i])
        delta.insert(0, d)  # Insert element at beginning

    # Gradients
    grad = [{} for i in range(L)]
    for i in range(L):
        W = weights[i]['W']
        grad[i]['W'] = np.dot(a[i].T, delta[i + 1]) / m + reg * W
        grad[i]['b'] = np.mean(delta[i + 1], axis=0)

    return loss, grad
Example #2
0
def mlp_loss(weights, X, y, reg):
    """
    Compute loss and gradients of the neutral network.
    """

    L = len(weights) # The index of the output layer
    z = []
    a = []

    err_tol = 1e-10  # Error of tolerance

    # Number of samples
    m = X.shape[0]

    # Forward pass
    z.append(0)                 # Dummy element
    a.append(X)                 # Input activation

    for i in range(0, L):
        W = weights[i]['W']
        b = weights[i]['b']
        z.append(np.dot(a[-1], W) + b)
        a.append(ac_func(z[-1])) # Note the final element in a[:] will not be used

    zL_max = np.max(z[-1], axis=1, keepdims=True)
    z[-1] -= zL_max # Avoid numerical problem due to large values of exp(z[-1])
    proba = np.exp(z[-1]) / np.sum(np.exp(z[-1]), axis=1, keepdims=True) + err_tol # Add err_tol to avoid this value too close to zero

    # Target matrix of labels
    Y = to_binary_class_matrix(y)

    # loss function
    sum_squared_weights = 0.0 # Sum of squared weights
    for i in range(L):
        W = weights[i]['W']
        sum_squared_weights += np.sum(W*W)

    loss = -1.0/m * np.sum(Y * np.log(proba)) + 0.5*reg*sum_squared_weights

    # Backpropagation
    delta = [-1.0 * (Y - proba)]
    for i in reversed(range(L)): # Note that delta[0] will not be used
        W = weights[i]['W']
        d = np.dot(delta[0], W.T) * ac_func_deriv(z[i])
        delta.insert(0, d) # Insert element at beginning

    # Gradients
    grad = [{} for i in range(L)]
    for i in range(L):
        W = weights[i]['W']
        grad[i]['W'] = np.dot(a[i].T, delta[i+1]) / m + reg*W
        grad[i]['b'] = np.mean(delta[i+1], axis=0)

    return loss, grad
Example #3
0
def neural_net_loss(weights, X, y, reg):
    """
    Compute loss and gradients of the neutral network.
    """

    Y = to_binary_class_matrix(y)
    L = len(weights)  # The index of the output layer
    z = []
    a = []

    # Number of samples
    m = X.shape[0]

    # Forward pass
    z.append(0)  # Dummy element
    a.append(X)  # Input activation

    for i in range(0, L):
        W = weights[i]['W']
        b = weights[i]['b']
        z.append(np.dot(a[-1], W) + b)
        a.append(ac_func(z[-1]))

    # loss function
    sum_weight_square = 0.0  # Sum of weight square
    for i in range(L):
        W = weights[i]['W']
        sum_weight_square += np.sum(W * W)

    loss = 1.0 / (2.0 * m) * np.sum(
        (a[-1] - Y)**2) + 0.5 * reg * sum_weight_square

    # Backpropagation
    delta = [(a[-1] - Y) * ac_func_deriv(z[-1])]
    for i in reversed(range(L)):  # Note that delta[0] will not be used
        W = weights[i]['W']
        d = np.dot(delta[0], W.T) * ac_func_deriv(z[i])
        delta.insert(0, d)  # Insert element at beginning

    # Gradients
    grad = [{} for i in range(L)]
    for i in range(L):
        W = weights[i]['W']
        grad[i]['W'] = np.dot(a[i].T, delta[i + 1]) / m + reg * W
        grad[i]['b'] = np.mean(delta[i + 1], axis=0)

    return loss, grad
Example #4
0
def neural_net_loss(weights, X, y, reg):
    """
    Compute loss and gradients of the neutral network.
    """

    Y = to_binary_class_matrix(y)
    L = len(weights) # The index of the output layer
    z = []
    a = []

    # Number of samples
    m = X.shape[0]

    # Forward pass
    z.append(0)                 # Dummy element
    a.append(X)                 # Input activation

    for i in range(0, L):
        W = weights[i]['W']
        b = weights[i]['b']
        z.append(np.dot(a[-1], W) + b)
        a.append(ac_func(z[-1]))

    # loss function
    sum_weight_square = 0.0 # Sum of weight square
    for i in range(L):
        W = weights[i]['W']
        sum_weight_square += np.sum(W*W)

    loss = 1.0/(2.0*m) * np.sum((a[-1] - Y)**2) + 0.5*reg*sum_weight_square

    # Backpropagation
    delta = [(a[-1] - Y) * ac_func_deriv(z[-1])]
    for i in reversed(range(L)): # Note that delta[0] will not be used
        W = weights[i]['W']
        d = np.dot(delta[0], W.T) * ac_func_deriv(z[i])
        delta.insert(0, d) # Insert element at beginning

    # Gradients
    grad = [{} for i in range(L)]
    for i in range(L):
        W = weights[i]['W']
        grad[i]['W'] = np.dot(a[i].T, delta[i+1]) / m + reg*W
        grad[i]['b'] = np.mean(delta[i+1], axis=0)

    return loss, grad
Example #5
0
def softmax_loss(weights, X, y, reg):
    """
    Compute the loss and derivative.

    theta: weight matrix
    X: the N x M input matrix, where each column data[:, i] corresponds to
          a single test set
    y: labels corresponding to the input data
    """

    # Small constant used to avoid numerical problem
    eps = 1e-10

    # Weighting parameters
    W0 = weights[0]['W']
    b0 = weights[0]['b']

    # Number of samples
    m = X.shape[0]

    # Forward pass
    a0 = X                 # Input activation
    z1 = np.dot(a0, W0) + b0

    z1_max = np.max(z1, axis=1, keepdims=True)
    z1 -= z1_max # Avoid numerical problem due to large values of exp(z1)
    proba = np.exp(z1) / np.sum(np.exp(z1), axis=1, keepdims=True) + eps # Add eps to avoid this value too close to zero

    # Target matrix of labels
    target = to_binary_class_matrix(y)

    # loss function
    loss = -1.0/m * np.sum(target * np.log(proba)) + 0.5*reg*np.sum(W0*W0)

    # Gradients
    delta1 = -1.0 * (target - proba)

    grad = [{}]
    grad[0]['W'] = np.dot(a0.T, delta1)/m + reg*W0
    grad[0]['b'] = np.mean(delta1, axis=0)

    return loss, grad
Example #6
0
def softmax_loss(weights, X, y, reg):
    """
    Compute the loss and derivative.

    theta: weight matrix
    X: the N x M input matrix, where each column data[:, i] corresponds to
          a single test set
    y: labels corresponding to the input data
    """

    # Small constant used to avoid numerical problem
    eps = 1e-10

    # Weighting parameters
    W0 = weights[0]['W']
    b0 = weights[0]['b']

    # Number of samples
    m = X.shape[0]

    # Forward pass
    a0 = X                 # Input activation
    z1 = np.dot(a0, W0) + b0

    z1_max = np.max(z1, axis=1, keepdims=True)
    z1 -= z1_max # Avoid numerical problem due to large values of exp(z1)
    proba = np.exp(z1) / np.sum(np.exp(z1), axis=1, keepdims=True) + eps # Add eps to avoid this value too close to zero

    # Target matrix of labels
    target = to_binary_class_matrix(y)

    # loss function
    loss = -1.0/m * np.sum(target * np.log(proba)) + 0.5*reg*np.sum(W0*W0)

    # Gradients
    delta1 = -1.0 * (target - proba)

    grad = [{}]
    grad[0]['W'] = np.dot(a0.T, delta1)/m + reg*W0
    grad[0]['b'] = np.mean(delta1, axis=0)

    return loss, grad