예제 #1
0
def up_pass(params, pixels):
    """
    Perform and upward pass from the visible pixels to the visible
    units of the top-level RBM.
    """
    # This is deterministic. (i.e. It uses the real-valued
    # probabilities rather than sampling.)
    hid1_mean = logistic(pixels.dot(params[0].W_r) + params[0].b_r)
    hid2_mean = logistic(hid1_mean.dot(params[1].W_r) + params[1].b_r)
    return hid2_mean
예제 #2
0
def up_pass(params, pixels):
    """
    Perform and upward pass from the visible pixels to the visible
    units of the top-level RBM.
    """
    # This is deterministic. (i.e. It uses the real-valued
    # probabilities rather than sampling.)
    hid1_mean = logistic(pixels.dot(params[0].W_r) + params[0].b_r)
    hid2_mean = logistic(hid1_mean.dot(params[1].W_r) + params[1].b_r)
    return hid2_mean
예제 #3
0
def down_pass(params, v):
    """
    Perform a deterministic downward pass from the visible units of
    the top-level RBM to the visible pixels.
    """
    # The visible units of the top-level RBM include a softmax group
    # which is not directly connected to the visible pixels.
    hid2_mean = v[:, mnist.NUM_CLASSES:]
    hid1_mean = logistic(hid2_mean.dot(params[1].W_g) + params[1].b_g)
    vis_mean = logistic(hid1_mean.dot(params[0].W_g) + params[0].b_g)
    return vis_mean
예제 #4
0
def down_pass(params, v):
    """
    Perform a deterministic downward pass from the visible units of
    the top-level RBM to the visible pixels.
    """
    # The visible units of the top-level RBM include a softmax group
    # which is not directly connected to the visible pixels.
    hid2_mean = v[:,mnist.NUM_CLASSES:]
    hid1_mean = logistic(hid2_mean.dot(params[1].W_g) + params[1].b_g)
    vis_mean = logistic(hid1_mean.dot(params[0].W_g) + params[0].b_g)
    return vis_mean
예제 #5
0
def sample_h(rbm, v, end_of_chain):
    h_mean = logistic((v / rbm.sigma).dot(rbm.W.T) + rbm.h_bias)
    if not end_of_chain:
        h = h_mean > np.random.random(h_mean.shape)
    else:
        h = None
    return h, h_mean
예제 #6
0
def sample_h_noisy_relu(rbm, v, end_of_chain):
    propup = (v / rbm.sigma).dot(rbm.W.T) + rbm.h_bias
    h_mean = np.maximum(0, propup)
    if not end_of_chain:
        noise = np.sqrt(logistic(propup)) * np.random.standard_normal(propup.shape)
        h = np.maximum(0, propup + noise)
    else:
        h = None
    return h, h_mean    
예제 #7
0
targets = targets[0:n]
labels = labels[0:n]

# These layers differ slightly from those in the paper. My main
# motivation is to avoid having a square weight matrix between hidden
# layers to avoid matrix transpose errors.
num_vis = inputs.shape[1]
num_hid1 = 529  # 23^2
num_hid2 = 484  # 22^2
num_top = 1936  # 44^2

batches = data.BatchIterator(inputs)
initial_params = rbm.initial_params(num_hid1, num_vis)
params = sgd(rbm_obj, initial_params, batches, momentum)

inputs = logistic(inputs.dot(params.W.T) + params.h_bias)
batches = data.BatchIterator(inputs)
initial_params = rbm.initial_params(num_hid2, num_hid1)
params = sgd(rbm_obj, initial_params, batches, momentum)

inputs = logistic(inputs.dot(params.W.T) + params.h_bias)
batches = data.BatchIterator(np.hstack((targets, inputs)))
initial_params = rbm.initial_params(num_top, num_hid2 + mnist.NUM_CLASSES)


def post_epoch(*args):
    print 'Mean hidden activation prob. is %.2f' % pcd.q


# Optimization objective for the top-level RBM.
pcd = rbm.pcd(rbm.sample_h, sample_v_softmax, rbm.neg_free_energy_grad,
예제 #8
0
targets = targets[0:n]
labels = labels[0:n]

# These layers differ slightly from those in the paper. My main
# motivation is to avoid having a square weight matrix between hidden
# layers to avoid matrix transpose errors.
num_vis = inputs.shape[1]
num_hid1 = 529 # 23^2 
num_hid2 = 484 # 22^2
num_top = 1936 # 44^2

batches = data.BatchIterator(inputs)
initial_params = rbm.initial_params(num_hid1, num_vis)
params = sgd(rbm_obj, initial_params, batches, momentum)

inputs = logistic(inputs.dot(params.W.T) + params.h_bias)
batches = data.BatchIterator(inputs)
initial_params = rbm.initial_params(num_hid2, num_hid1)
params = sgd(rbm_obj, initial_params, batches, momentum)

inputs = logistic(inputs.dot(params.W.T) + params.h_bias)
batches = data.BatchIterator(np.hstack((targets, inputs)))
initial_params = rbm.initial_params(num_top, num_hid2 + mnist.NUM_CLASSES)

def post_epoch(*args):
    print 'Mean hidden activation prob. is %.2f' % pcd.q

# Optimization objective for the top-level RBM.
pcd = rbm.pcd(rbm.sample_h, sample_v_softmax,
              rbm.neg_free_energy_grad, weight_decay)
예제 #9
0
def contrastive_wake_sleep(params, data, weight_decay=None, cd_k=1):
    inputs, targets = data.inputs, data.targets
    num_cases = inputs.shape[0]

    # Turn the single tuple of parameters into something easier to
    # work with.
    dbn_params = dbn.stack_params(params)
    grad = []

    # Wake phase.
    wake_hid1_states = rbm.sample_bernoulli(logistic(inputs.dot(dbn_params[0].W_r) + dbn_params[0].b_r))
    wake_hid2_states = rbm.sample_bernoulli(logistic(wake_hid1_states.dot(dbn_params[1].W_r) + dbn_params[1].b_r))

    # Contrastive divergence.
    gc = rbm.gibbs_chain(np.hstack((targets, wake_hid2_states)),
                         dbn_params[-1],
                         rbm.sample_h,
                         sample_v_softmax,
                         cd_k + 1)

    pos_sample = gc.next()
    if cd_k == 1:
        neg_sample = gc.next()
    else:
        recon_sample = gc.next()
        neg_sample = itertools.islice(gc, cd_k - 2, None).next()

    # Sleep phase.
    sleep_hid2_states = neg_sample[0][:,mnist.NUM_CLASSES:]
    sleep_hid1_states = rbm.sample_bernoulli(logistic(sleep_hid2_states.dot(dbn_params[1].W_g) + dbn_params[1].b_g))
    sleep_vis_probs = logistic(sleep_hid1_states.dot(dbn_params[0].W_g) + dbn_params[0].b_g)

    # Predictions.
    p_sleep_hid2 = logistic(sleep_hid1_states.dot(dbn_params[1].W_r) + dbn_params[1].b_r)
    p_sleep_hid1 = logistic(sleep_vis_probs.dot(dbn_params[0].W_r) + dbn_params[0].b_r)
    p_wake_vis = logistic(wake_hid1_states.dot(dbn_params[0].W_g) + dbn_params[0].b_g)
    p_wake_hid1 = logistic(wake_hid2_states.dot(dbn_params[1].W_g) + dbn_params[1].b_g)

    # Gradients.
    # Layer 0.
    W_r_grad = sleep_vis_probs.T.dot(p_sleep_hid1 - sleep_hid1_states) / num_cases
    b_r_grad = np.mean(p_sleep_hid1 - sleep_hid1_states, 0)
    W_g_grad = wake_hid1_states.T.dot(p_wake_vis - inputs) / num_cases
    b_g_grad = np.mean(p_wake_vis - inputs, 0)
    grad.extend([W_r_grad, b_r_grad, W_g_grad, b_g_grad])

    # Layer 1.
    W_r_grad = sleep_hid1_states.T.dot(p_sleep_hid2 - sleep_hid2_states) / num_cases
    b_r_grad = np.mean(p_sleep_hid2 - sleep_hid2_states, 0)
    W_g_grad = wake_hid2_states.T.dot(p_wake_hid1 - wake_hid1_states) / num_cases
    b_g_grad = np.mean(p_wake_hid1 - wake_hid1_states, 0)
    grad.extend([W_r_grad, b_r_grad, W_g_grad, b_g_grad])
    
    # Top-level RBM.
    pos_grad = rbm.neg_free_energy_grad(dbn_params[-1], pos_sample)
    neg_grad = rbm.neg_free_energy_grad(dbn_params[-1], neg_sample)
    rbm_grad = map(operator.sub, neg_grad, pos_grad)
    grad.extend(rbm_grad)

    # Weight decay.
    if weight_decay:
        weight_grad = (weight_decay(p)[1] for p in params)
        grad = map(operator.add, grad, weight_grad)

    # One-step reconstruction error.
    if cd_k == 1:
        recon = sleep_vis_probs
    else:
        # Perform a determisitic down pass from the first sample of
        # the Gibbs chain in order to compute the one-step
        # reconstruction error.
        recon_hid2_probs = recon_sample[1][:,mnist.NUM_CLASSES:]
        recon_hid1_probs = rbm.sample_bernoulli(logistic(recon_hid2_probs.dot(dbn_params[1].W_g) + dbn_params[1].b_g))
        recon = logistic(recon_hid1_probs.dot(dbn_params[0].W_g) + dbn_params[0].b_g)

    error = np.sum((inputs - recon) ** 2) / num_cases

    return error, grad
예제 #10
0
def contrastive_wake_sleep(params, data, weight_decay=None, cd_k=1):
    inputs, targets = data.inputs, data.targets
    num_cases = inputs.shape[0]

    # Turn the single tuple of parameters into something easier to
    # work with.
    dbn_params = dbn.stack_params(params)
    grad = []

    # Wake phase.
    wake_hid1_states = rbm.sample_bernoulli(
        logistic(inputs.dot(dbn_params[0].W_r) + dbn_params[0].b_r))
    wake_hid2_states = rbm.sample_bernoulli(
        logistic(wake_hid1_states.dot(dbn_params[1].W_r) + dbn_params[1].b_r))

    # Contrastive divergence.
    gc = rbm.gibbs_chain(np.hstack(
        (targets, wake_hid2_states)), dbn_params[-1], rbm.sample_h,
                         sample_v_softmax, cd_k + 1)

    pos_sample = gc.next()
    if cd_k == 1:
        neg_sample = gc.next()
    else:
        recon_sample = gc.next()
        neg_sample = itertools.islice(gc, cd_k - 2, None).next()

    # Sleep phase.
    sleep_hid2_states = neg_sample[0][:, mnist.NUM_CLASSES:]
    sleep_hid1_states = rbm.sample_bernoulli(
        logistic(sleep_hid2_states.dot(dbn_params[1].W_g) + dbn_params[1].b_g))
    sleep_vis_probs = logistic(
        sleep_hid1_states.dot(dbn_params[0].W_g) + dbn_params[0].b_g)

    # Predictions.
    p_sleep_hid2 = logistic(
        sleep_hid1_states.dot(dbn_params[1].W_r) + dbn_params[1].b_r)
    p_sleep_hid1 = logistic(
        sleep_vis_probs.dot(dbn_params[0].W_r) + dbn_params[0].b_r)
    p_wake_vis = logistic(
        wake_hid1_states.dot(dbn_params[0].W_g) + dbn_params[0].b_g)
    p_wake_hid1 = logistic(
        wake_hid2_states.dot(dbn_params[1].W_g) + dbn_params[1].b_g)

    # Gradients.
    # Layer 0.
    W_r_grad = sleep_vis_probs.T.dot(p_sleep_hid1 -
                                     sleep_hid1_states) / num_cases
    b_r_grad = np.mean(p_sleep_hid1 - sleep_hid1_states, 0)
    W_g_grad = wake_hid1_states.T.dot(p_wake_vis - inputs) / num_cases
    b_g_grad = np.mean(p_wake_vis - inputs, 0)
    grad.extend([W_r_grad, b_r_grad, W_g_grad, b_g_grad])

    # Layer 1.
    W_r_grad = sleep_hid1_states.T.dot(p_sleep_hid2 -
                                       sleep_hid2_states) / num_cases
    b_r_grad = np.mean(p_sleep_hid2 - sleep_hid2_states, 0)
    W_g_grad = wake_hid2_states.T.dot(p_wake_hid1 -
                                      wake_hid1_states) / num_cases
    b_g_grad = np.mean(p_wake_hid1 - wake_hid1_states, 0)
    grad.extend([W_r_grad, b_r_grad, W_g_grad, b_g_grad])

    # Top-level RBM.
    pos_grad = rbm.neg_free_energy_grad(dbn_params[-1], pos_sample)
    neg_grad = rbm.neg_free_energy_grad(dbn_params[-1], neg_sample)
    rbm_grad = map(operator.sub, neg_grad, pos_grad)
    grad.extend(rbm_grad)

    # Weight decay.
    if weight_decay:
        weight_grad = (weight_decay(p)[1] for p in params)
        grad = map(operator.add, grad, weight_grad)

    # One-step reconstruction error.
    if cd_k == 1:
        recon = sleep_vis_probs
    else:
        # Perform a determisitic down pass from the first sample of
        # the Gibbs chain in order to compute the one-step
        # reconstruction error.
        recon_hid2_probs = recon_sample[1][:, mnist.NUM_CLASSES:]
        recon_hid1_probs = rbm.sample_bernoulli(
            logistic(
                recon_hid2_probs.dot(dbn_params[1].W_g) + dbn_params[1].b_g))
        recon = logistic(
            recon_hid1_probs.dot(dbn_params[0].W_g) + dbn_params[0].b_g)

    error = np.sum((inputs - recon)**2) / num_cases

    return error, grad