Exemple #1
0
def cost(params, data, weight_penalty=0, dropout=None, rng=np.random):
    inputs, targets = data.inputs, data.targets
    W1, b1, W2, b2, W3, b3 = params

    if dropout is None:
        dropout = DEFAULT_DROPOUT

    num_cases = inputs.shape[0]
    a1 = inputs

    if dropout[0] > 0:
        mask = rng.random_sample(a1.shape) > dropout[0]
        a1 = a1 * mask

    z2 = a1.dot(W1.T) + b1
    a2 = logistic(z2)

    # Note that at present every single activation is computed even
    # though we throw half of that work away when using dropout.
    if dropout[1] > 0:
        mask = rng.random_sample(a2.shape) > dropout[1]
        a2 = a2 * mask
 
    z3 = a2.dot(W2.T) + b2
    a3 = logistic(z3)

    if dropout[2] > 0:
        mask = rng.random_sample(a3.shape) > dropout[2]
        a3 = a3 * mask

    # Un-nomalized log-prob.
    U = a3.dot(W3.T) + b3
    # Normalize.
    log_prob = U - np.log(np.sum(np.exp(U), 1))[:,np.newaxis]
    # Compute probabilities over classes.
    prob = np.exp(log_prob)

    weight_cost = (0.5 * weight_penalty *  (
            np.sum(W1 ** 2) + np.sum(b1 ** 2) +
            np.sum(W2 ** 2) + np.sum(b2 ** 2) +
            np.sum(W3 ** 2) + np.sum(b3 ** 2)))

    cost = weight_cost - (np.sum(log_prob * targets) / num_cases)

    delta4 = error = prob - targets
    delta3 = delta4.dot(W3) * a3 * (1 - a3)
    delta2 = delta3.dot(W2) * a2 * (1 - a2)

    W1_grad = (delta2.T.dot(a1) / num_cases) + (weight_penalty * W1)
    W2_grad = (delta3.T.dot(a2) / num_cases) + (weight_penalty * W2)
    W3_grad = (delta4.T.dot(a3) / num_cases) + (weight_penalty * W3)

    b1_grad = (np.sum(delta2, 0) / num_cases) + (weight_penalty * b1)
    b2_grad = (np.sum(delta3, 0) / num_cases) + (weight_penalty * b2)
    b3_grad = (np.sum(delta4, 0) / num_cases) + (weight_penalty * b3)

    return cost, (W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad)
Exemple #2
0
def cost(params, data, weight_penalty=0, dropout=None, rng=np.random):
    inputs, targets = data.inputs, data.targets
    W1, b1, W2, b2, W3, b3 = params

    if dropout is None:
        dropout = DEFAULT_DROPOUT

    num_cases = inputs.shape[0]
    a1 = inputs

    if dropout[0] > 0:
        mask = rng.random_sample(a1.shape) > dropout[0]
        a1 = a1 * mask

    z2 = a1.dot(W1.T) + b1
    a2 = logistic(z2)

    # Note that at present every single activation is computed even
    # though we throw half of that work away when using dropout.
    if dropout[1] > 0:
        mask = rng.random_sample(a2.shape) > dropout[1]
        a2 = a2 * mask

    z3 = a2.dot(W2.T) + b2
    a3 = logistic(z3)

    if dropout[2] > 0:
        mask = rng.random_sample(a3.shape) > dropout[2]
        a3 = a3 * mask

    # Un-nomalized log-prob.
    U = a3.dot(W3.T) + b3
    # Normalize.
    log_prob = U - np.log(np.sum(np.exp(U), 1))[:, np.newaxis]
    # Compute probabilities over classes.
    prob = np.exp(log_prob)

    weight_cost = (0.5 * weight_penalty *
                   (np.sum(W1**2) + np.sum(b1**2) + np.sum(W2**2) +
                    np.sum(b2**2) + np.sum(W3**2) + np.sum(b3**2)))

    cost = weight_cost - (np.sum(log_prob * targets) / num_cases)

    delta4 = error = prob - targets
    delta3 = delta4.dot(W3) * a3 * (1 - a3)
    delta2 = delta3.dot(W2) * a2 * (1 - a2)

    W1_grad = (delta2.T.dot(a1) / num_cases) + (weight_penalty * W1)
    W2_grad = (delta3.T.dot(a2) / num_cases) + (weight_penalty * W2)
    W3_grad = (delta4.T.dot(a3) / num_cases) + (weight_penalty * W3)

    b1_grad = (np.sum(delta2, 0) / num_cases) + (weight_penalty * b1)
    b2_grad = (np.sum(delta3, 0) / num_cases) + (weight_penalty * b2)
    b3_grad = (np.sum(delta4, 0) / num_cases) + (weight_penalty * b3)

    return cost, (W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad)
Exemple #3
0
def log_prob(params, data, dropout=None):
    """
    Compute the log probability over classes for each input.
    """
    W1, b1, W2, b2, W3, b3 = params

    if dropout is None:
        dropout = DEFAULT_DROPOUT

    a1 = data.inputs
    z2 = a1.dot(W1.T * (1 - dropout[0])) + b1
    a2 = logistic(z2) 
    z3 = a2.dot(W2.T * (1 - dropout[1])) + b2
    a3 = logistic(z3)
    U = a3.dot(W3.T * (1 - dropout[2])) + b3
    log_prob = U - np.log(np.sum(np.exp(U), 1))[:,np.newaxis]
    return log_prob
Exemple #4
0
def log_prob(params, data, dropout=None):
    """
    Compute the log probability over classes for each input.
    """
    W1, b1, W2, b2, W3, b3 = params

    if dropout is None:
        dropout = DEFAULT_DROPOUT

    a1 = data.inputs
    z2 = a1.dot(W1.T * (1 - dropout[0])) + b1
    a2 = logistic(z2)
    z3 = a2.dot(W2.T * (1 - dropout[1])) + b2
    a3 = logistic(z3)
    U = a3.dot(W3.T * (1 - dropout[2])) + b3
    log_prob = U - np.log(np.sum(np.exp(U), 1))[:, np.newaxis]
    return log_prob
Exemple #5
0
def sample_h(rbm, v, end_of_chain):
    h_mean = logistic(v.dot(rbm.W.T) + rbm.h_bias)
    if not end_of_chain:
        h = sample_bernoulli(h_mean)
    else:
        # Don't sample the states of the hidden units, because:
        # a) We're at the end of the gibbs chain so we don't need h
        # for future computations of p(v|h).
        # b) h isn't required to compute neg_free_energy_grad.
        h = None
    return h, h_mean
Exemple #6
0
def sample_v_softmax(rbm, h, k, labels=None):
    """
    Sample the visible units of an RBM treating the k left-most units
    as a softmax group. If labels is given, the softmax group is
    clamped to those values.
    """
    # Top down activity for all units.
    a = h.dot(rbm.W) + rbm.v_bias
    # Softmax units.
    if labels is None:
        u = a[:,0:k] # Activities are un-normalized log probabilities.
        log_prob = u - np.log(np.sum(np.exp(u), 1))[:,np.newaxis]
        prob = np.exp(log_prob)
        labels = sample_softmax(prob)
    else:
        # Use labels as probs if clamped.
        prob = labels
    # Logistic units.
    v_mean = logistic(a[:,k:])
    v = sample_bernoulli(v_mean)
    return np.hstack((labels, v)), np.hstack((prob, v_mean))
Exemple #7
0
def sample_v(rbm, h):
    v_mean = logistic(h.dot(rbm.W) + rbm.v_bias)
    v = sample_bernoulli(v_mean)
    return v, v_mean