コード例 #1
0
ファイル: dbn_layers.py プロジェクト: leadingin/kmc-research
def train_and_sample_from_dbn_layers(random_seed=1,
                                     dataset='bucket/mnist.pkl.gz',
                                     epochs=15,
                                     architecture=[500,500,2000],
                                     samples=1,
                                     plot_every=1000,
                                     save_folder=None,
                                     starting_rbms=[]):
    # Setup
    rbms = starting_rbms
    original_dataset = dataset
    # Pretraining loop
    for (i, n_hidden) in enumerate(architecture):
        if len(rbms) <= i: # Check to see if RBM already available
            # Train
            print 'Training rbm %d' % (i+1)
            (rbm, train_set_x, train_set_y, test_set_x, test_set_y) = train_rbm(learning_rate=0.1, training_epochs=epochs,
                                                                                n_hidden=n_hidden,
                                                                                dataset=dataset,
                                                                                random_seed=random_seed,
                                                                                augment_with_labels=(i==len(architecture)-1))
            rbms.append(rbm)
        if i < len(architecture) - 1:
            print 'Passing data through rbm %d' % (i+1)
            # Pass data through rbm
            # First reload data to get correct object types
            datasets = load_data(original_dataset)
            pseudo_train_set_x, pseudo_train_set_y = datasets[0]
            pseudo_test_set_x, pseudo_test_set_y = datasets[2]
            x_train_array = train_set_x.get_value()
            x_test_array = test_set_x.get_value()
            pseudo_x_train_array = np.zeros((x_train_array.shape[0], architecture[0]))
            pseudo_x_test_array = np.zeros((x_test_array.shape[0], architecture[0]))
            W = rbm.W.get_value()
            bias = np.tile(rbm.hbias.get_value(), (x_train_array.shape[0],1))
            #### TODO - should I be using mean activations or random activations?
            #### Currently using mean activations
            print 'Computing training features'
            pre_sigmoid_activation = np.dot(x_train_array, W) + bias
            hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            pseudo_x_train_array = hid_prob
            bias = np.tile(rbm.hbias.get_value(), (x_test_array.shape[0],1))
            print 'Computing testing features'
            pre_sigmoid_activation = np.dot(x_test_array, W) + bias
            hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            pseudo_x_test_array = hid_prob
            pseudo_train_set_x.set_value(pseudo_x_train_array)
            pseudo_test_set_x.set_value(pseudo_x_test_array)
            dataset = (pseudo_train_set_x, pseudo_train_set_y, pseudo_test_set_x, pseudo_test_set_y)
    print 'Pretraining complete'

    with open(os.path.join(save_folder, 'rbms.pkl'), 'w') as save_file:
        pickle.dump(rbms, save_file)

    # Reload original data
    datasets = load_data(original_dataset)
    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]
    # Sampling
    rng = np.random.RandomState(random_seed)

    images = np.zeros((0,28*28))
    labels = np.zeros((0,1))

    number_of_train_samples = train_set_x.get_value(borrow=True).shape[0]

    count = 0

    print 'Sampling images'

    while count < samples:

        # Pick random test example, with which to initialize the persistent chain
        train_idx = rng.randint(number_of_train_samples - 1)
        starting_image = np.asarray(train_set_x.get_value(borrow=True)[train_idx:train_idx+1])

        vis = starting_image
        # Propogate image up the rbms
        for rbm in rbms[:-1]:
            pre_sigmoid_activation = np.dot(vis, rbm.W.get_value()) + rbm.hbias.get_value()
            hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            vis = (hid_prob > np.random.rand(hid_prob.shape[0], hid_prob.shape[1])) * 1
        # Append label
        y_list = train_set_y.owner.inputs[0].get_value()
        y_ind = np.zeros((1, 10))
        y_ind[0,y_list[train_idx]] = 1
        vis = np.hstack((vis, y_ind))

        W = rbms[-1].W.get_value()
        h_bias = rbms[-1].hbias.get_value()
        v_bias = rbms[-1].vbias.get_value()
        # Gibbs sample in the autoassociative memory
        for dummy in range(plot_every):
            pre_sigmoid_activation = np.dot(vis, W) + h_bias
            hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            hid = (hid_prob > np.random.rand(hid_prob.shape[0], hid_prob.shape[1])) * 1
            pre_sigmoid_activation = np.dot(hid, W.T) + v_bias
            vis_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            vis = (vis_prob > np.random.rand(vis_prob.shape[0], vis.shape[1])) * 1
            # Clamp
            vis[0,-10:] = y_ind
        # Propogate the image down the rbms
        vis = vis[:,:-10]
        for rbm in reversed(rbms[:-1]):
            pre_sigmoid_activation = np.dot(vis, rbm.W.get_value().T) + rbm.vbias.get_value()
            vis_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            vis = (vis_prob > np.random.rand(vis_prob.shape[0], vis_prob.shape[1])) * 1

        vis_image = vis_prob
        if len(rbms) == 1:
            # Remove the labels from the image
            vis_image = vis_image[:,:-10]

        images = np.vstack((images, vis_image))
        labels = np.vstack((labels, y_list[train_idx]))
        np.savetxt(os.path.join(save_folder, 'images.csv'), images, delimiter=',')
        np.savetxt(os.path.join(save_folder, 'labels.csv'), labels, delimiter=',')
        count += 1
        print 'Sampled %d images' % count

    return rbms
コード例 #2
0
def train_and_sample_from_dbn(random_seed=1,
                              dataset='bucket/mnist.pkl.gz',
                              epochs=15,
                              architecture=[500,500,2000],
                              samples=1,
                              plot_every=1000):
    # Setup
    rbms = []
    original_dataset='bucket/mnist.pkl.gz'
    # Pretraining loop
    for (i, n_hidden) in enumerate(architecture):
        # Train
        print 'Training rbm %d' % (i+1)
        (rbm, train_set_x, train_set_y, test_set_x, test_set_y) = train_rbm(learning_rate=0.1, training_epochs=epochs,
                                                                            n_hidden=n_hidden,
                                                                            dataset=dataset,
                                                                            random_seed=random_seed,
                                                                            augment_with_labels=(i==len(architecture)-1))
        rbms.append(rbm)
        if i < len(architecture) - 1:
            print 'Passing data through rbm %d' % (i+1)
            # Pass data through rbm
            # First reload data to get correct object types
            datasets = load_data(original_dataset)
            pseudo_train_set_x, pseudo_train_set_y = datasets[0]
            pseudo_test_set_x, pseudo_test_set_y = datasets[2]
            x_train_array = train_set_x.get_value()
            x_test_array = test_set_x.get_value()
            pseudo_x_train_array = np.zeros((x_train_array.shape[0], architecture[0]))
            pseudo_x_test_array = np.zeros((x_test_array.shape[0], architecture[0]))
            W = rbm.W.get_value()
            bias = np.tile(rbm.hbias.get_value(), (x_train_array.shape[0],1))
            #### TODO - should I be using mean activations or random activations?
            #### Currently using mean activations
            print 'Computing training features'
            pre_sigmoid_activation = np.dot(x_train_array, W) + bias
            hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            pseudo_x_train_array = hid_prob
            bias = np.tile(rbm.hbias.get_value(), (x_test_array.shape[0],1))
            print 'Computing testing features'
            pre_sigmoid_activation = np.dot(x_test_array, W) + bias
            hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            pseudo_x_test_array = hid_prob
            pseudo_train_set_x.set_value(pseudo_x_train_array)
            pseudo_test_set_x.set_value(pseudo_x_test_array)
            dataset = (pseudo_train_set_x, pseudo_train_set_y, pseudo_test_set_x, pseudo_test_set_y)
    print 'Pretraining complete'
    # Reload original data
    datasets = load_data(original_dataset)
    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]
    # Sampling
    rng = np.random.RandomState(random_seed)

    images = np.zeros((0,28*28))
    labels = np.zeros((0,1))

    number_of_train_samples = train_set_x.get_value(borrow=True).shape[0]

    count = 0

    print 'Sampling images'

    while count < samples:

        # Pick random test example, with which to initialize the persistent chain
        train_idx = rng.randint(number_of_train_samples - 1)
        starting_image = np.asarray(train_set_x.get_value(borrow=True)[train_idx:train_idx+1])

        vis = starting_image
        # Propogate image up the rbms
        for rbm in rbms[:-1]:
            pre_sigmoid_activation = np.dot(vis, rbm.W.get_value()) + rbm.hbias.get_value()
            hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            vis = (hid_prob > np.random.rand(hid_prob.shape[0], hid_prob.shape[1])) * 1
        # Append label
        y_list = train_set_y.owner.inputs[0].get_value()
        y_ind = np.zeros((1, 10))
        y_ind[0,y_list[train_idx]] = 1
        vis = np.hstack((vis, y_ind))

        W = rbms[-1].W.get_value()
        h_bias = rbms[-1].hbias.get_value()
        v_bias = rbms[-1].vbias.get_value()
        # Gibbs sample in the autoassociative memory
        for dummy in range(plot_every):
            pre_sigmoid_activation = np.dot(vis, W) + h_bias
            hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            hid = (hid_prob > np.random.rand(hid_prob.shape[0], hid_prob.shape[1])) * 1
            pre_sigmoid_activation = np.dot(hid, W.T) + v_bias
            vis_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            vis = (vis_prob > np.random.rand(vis_prob.shape[0], vis.shape[1])) * 1
            # Clamp
            vis[0,-10:] = y_ind
        # Propogate the image down the rbms
        vis = vis[:,:-10]
        for rbm in reversed(rbms[:-1]):
            pre_sigmoid_activation = np.dot(vis, rbm.W.get_value().T) + rbm.vbias.get_value()
            vis_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            vis = (vis_prob > np.random.rand(vis_prob.shape[0], vis_prob.shape[1])) * 1

        vis_image = vis_prob

        images = np.vstack((images, vis_image))
        labels = np.vstack((labels, y_list[train_idx]))
        np.savetxt('images.csv', images, delimiter=',')
        np.savetxt('labels.csv', labels, delimiter=',')
        count += 1
        print 'Sampled %d images' % count

    return (rbms, images, labels)
コード例 #3
0
def fine_tune_dbn_layers(random_seed=1,
                         dataset='bucket/mnist.pkl.gz',
                         save_folder='./',
                         epochs=50,
                         rbms=[],
                         samples=3000,
                         plot_every=1000,
                         starting_learning_rate=0.002,
                         decay_learning_rate=True,
                         cd_iters=15):
    # Load data - pixels and targets (labels)
    original_dataset = dataset
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    train_digits = train_set_x.get_value()
    y_list = train_set_y.owner.inputs[0].get_value()
    train_labels = np.zeros((train_digits.shape[0], 10))
    for i in range(train_digits.shape[0]):
        train_labels[i,y_list[i]] = 1
    # Setup
    rec_biases = [np.atleast_2d(rbm.hbias.get_value()) for rbm in rbms[:-1]]
    gen_biases = [np.atleast_2d(rbm.vbias.get_value()) for rbm in rbms[:-1]]
    rec_W = [rbm.W.get_value() for rbm in rbms[:-1]]
    gen_W = [rbm.W.get_value().copy().T for rbm in rbms[:-1]]
    pentop = rbms[-1].W.get_value()[:-10,:]
    labtop = rbms[-1].W.get_value()[-10:,:]
    pengenbiases = np.atleast_2d(rbms[-1].vbias.get_value())[:,:-10];
    labgenbiases = np.atleast_2d(rbms[-1].vbias.get_value())[:,-10:];
    topbiases = np.atleast_2d(rbms[-1].hbias.get_value())
    wake_states   = [None] * (len(rbms) - 1)
    sleep_states  = [None] * (len(rbms) - 1)
    p_sleep_probs = [None] * (len(rbms) - 1)
    p_wake_probs  = [None] * (len(rbms) - 1)
    # Fine tuning loop
    for epoch in range(epochs):
        # Permute data
        perm = np.random.permutation(train_digits.shape[0])
        train_digits = train_digits[perm,:];
        train_labels = train_labels[perm,:];
        # Set learning rate
        if decay_learning_rate:
            learning_rate = starting_learning_rate / (epoch + 1);
        for data_iter in range(train_digits.shape[0]):
            # Tell us what's up
            print 'epoch %d of %d, iter %d of %d' % (epoch+1, epochs, data_iter+1, train_digits.shape[0])
            # Extract a digit
            data = train_digits[data_iter,:];
            targets = np.atleast_2d(train_labels[data_iter,:]);
            # PERFORM A BOTTOM-UP PASS TO GET WAKE/POSITIVE PHASE PROBABILITIES AND SAMPLE STATES
            state = data
            for i in range(len(rbms) - 1):
                pre_sigmoid_activation = np.dot(state, rec_W[i]) + rec_biases[i]
                hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
                state = (hid_prob > np.random.rand(hid_prob.shape[0], hid_prob.shape[1])) * 1
                wake_states[i] = state
            pre_sigmoid_activation = np.dot(wake_states[-1], pentop) + np.dot(targets, labtop) + topbiases
            hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            waketopstates = (hid_prob > np.random.rand(hid_prob.shape[0], hid_prob.shape[1])) * 1
            # POSITIVE PHASE STATISTICS FOR CONTRASTIVE DIVERGENCE
            poslabtopstatistics = np.dot(targets.T, waketopstates)
            pospentopstatistics = np.dot(wake_states[-1].T, waketopstates)
            # PERFORM GIBBS SAMPLING ITERATIONS USING THE TOP LEVEL UNDIRECTED ASSOCIATIVE MEMORY
            negtopstates = waketopstates
            for cd_iter in range(cd_iters):
                pre_sigmoid_activation = np.dot(negtopstates, pentop.T) + pengenbiases
                hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
                negpenstates = (hid_prob > np.random.rand(hid_prob.shape[0], hid_prob.shape[1])) * 1

                pre_sigmoid_activation = np.dot(negtopstates, labtop.T) + labgenbiases
                neglabprobs = softmax(pre_sigmoid_activation)

                pre_sigmoid_activation = np.dot(negpenstates, pentop) + np.dot(neglabprobs, labtop) + topbiases
                hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
                negtopstates = (hid_prob > np.random.rand(hid_prob.shape[0], hid_prob.shape[1])) * 1
            # NEGATIVE PHASE STATISTICS FOR CONTRASTIVE DIVERGENCE
            negpentopstatistics = np.dot(negpenstates.T, negtopstates)
            neglabtopstatistics = np.dot(neglabprobs.T, negtopstates)
            # STARTING FROM THE END OF THE GIBBS SAMPLING RUN, PERFORM A TOP-DOWN GENERATIVE PASS TO GET SLEEP/NEGATIVE PHASE PROBABILITIES AND SAMPLE STATES
            sleep_states[-1] = negpenstates
            for i in reversed(range(0, len(rbms)-2)):
                pre_sigmoid_activation = np.dot(sleep_states[i+1], gen_W[i+1]) + gen_biases[i+1]
                hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
                state = (hid_prob > np.random.rand(hid_prob.shape[0], hid_prob.shape[1])) * 1
                sleep_states[i] = state
            pre_sigmoid_activation = np.dot(sleep_states[0], gen_W[0]) + gen_biases[0]
            sleepvisprobs = 1 / (1 + np.exp(-pre_sigmoid_activation))
            # PREDICTIONS
            for i in reversed(range(len(rbms)-2)):
                pre_sigmoid_activation = np.dot(sleep_states[i], rec_W[i+1]) + rec_biases[i+1]
                hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
                p_sleep_probs[i+1] = hid_prob
            pre_sigmoid_activation = np.dot(sleepvisprobs, rec_W[0]) + rec_biases[0]
            hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            p_sleep_probs[0] = hid_prob
            for i in range(len(rbms)-1):
                pre_sigmoid_activation = np.dot(wake_states[i], gen_W[i]) + gen_biases[i]
                hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
                p_wake_probs[i] = hid_prob
            # UPDATES TO GENERATIVE PARAMETERS
            for i in range(len(rbms)-1):
                eff_data = data if i == 0 else wake_states[i-1]
                gen_W[i] = gen_W[i] + learning_rate * np.dot(wake_states[i].T, (eff_data - p_wake_probs[i]))
                gen_biases[i] = gen_biases[i] + learning_rate * (eff_data - p_wake_probs[i])
            # UPDATES TO TOP LEVEL ASSOCIATIVE MEMORY PARAMETERS
            labtop = labtop + learning_rate*(poslabtopstatistics-neglabtopstatistics);
            labgenbiases = labgenbiases + learning_rate*(targets - neglabprobs);
            pentop = pentop + learning_rate*(pospentopstatistics - negpentopstatistics);
            pengenbiases = pengenbiases + learning_rate*(wake_states[-1] - negpenstates);
            topbiases = topbiases + learning_rate*(waketopstates - negtopstates);
            # UPDATES TO RECOGNITION/INFERENCE APPROXIMATION PARAMETERS
            for i in reversed(range(len(rbms)-1)):
                eff_data = sleep_states[i-1] if i > 0 else sleepvisprobs
                rec_W[i] = rec_W[i] + learning_rate * np.dot(eff_data.T, sleep_states[i] - p_sleep_probs[i])
                rec_biases[i] = rec_biases[i] + learning_rate * (sleep_states[i] - p_sleep_probs[i])

    print 'Fine tuning complete'

    top_W = np.vstack((pentop, labtop))
    gen_biases.append(np.hstack((pengenbiases,labgenbiases)))

    with open(os.path.join(save_folder, 'dbn-ft.pkl'), 'w') as save_file:
        dbnft = {'rec_biases' : rec_biases,
                 'gen_biases' : gen_biases,
                 'rec_W' : rec_W,
                 'gen_W' : gen_W,
                 'pentop' : pentop,
                 'labtop' : labtop,
                 'pengenbiases' : pengenbiases,
                 'labgenbiases' : labgenbiases,
                 'topbiases' : topbiases,
                 'top_W' : top_W}
        pickle.dump(dbnft, save_file)

    # Reload original data
    datasets = load_data(original_dataset)
    train_set_x, train_set_y = datasets[0]
    # Sampling
    rng = np.random.RandomState(random_seed)

    images = np.zeros((0,28*28))
    labels = np.zeros((0,1))

    number_of_train_samples = train_set_x.get_value(borrow=True).shape[0]

    count = 0

    print 'Sampling images'

    while count < samples:

        train_idx = rng.randint(number_of_train_samples - 1)
        starting_image = np.asarray(train_set_x.get_value(borrow=True)[train_idx:train_idx+1])

        y_ind = np.zeros((1, 10))
        y_ind[0,y_list[train_idx]] = 1
        vis = np.hstack((np.random.rand(1, 500), y_ind))

        # Gibbs sample from random start with clamped labels

        for dummy in range(plot_every):
            pre_sigmoid_activation = np.dot(vis, top_W) + topbiases
            hid_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            hid = (hid_prob > np.random.rand(hid_prob.shape[0], hid_prob.shape[1])) * 1
            pre_sigmoid_activation = np.dot(hid, top_W.T) + gen_biases[-1]
            vis_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            vis = (vis_prob > np.random.rand(vis_prob.shape[0], vis.shape[1])) * 1
            # Clamp
            vis[0,-10:] = y_ind

        # Propogate down

        vis = vis[:,:-10]
        for (W, bias) in reversed(zip(gen_W, gen_biases)):
            pre_sigmoid_activation = np.dot(vis, W) + bias
            vis_prob = 1 / (1 + np.exp(-pre_sigmoid_activation))
            vis = (vis_prob > np.random.rand(vis_prob.shape[0], vis_prob.shape[1])) * 1

        vis_image = vis_prob # Use probabilities at final layer

        images = np.vstack((images, vis_image))
        labels = np.vstack((labels, y_list[train_idx]))
        np.savetxt(os.path.join(save_folder, 'images-ft.csv'), images, delimiter=',')
        np.savetxt(os.path.join(save_folder, 'labels-ft.csv'), labels, delimiter=',')
        count += 1
        print 'Sampled %d images' % count