def test():
    minibatch_size = 100
    num_words = 20
    tuplesize = 3
    num_visible = tuplesize*num_words
    num_hidden = 30

    codec = Codec(tuplesize, num_words)
    tuples = codec.tuples
    words = codec.words
    encoded = codec.tuples_to_matrix()
    (num_data, _) = encoded.shape

    print(words)
    print('data count: ', num_data)

    rbm = RBM(num_visible = num_visible,
              num_hidden = num_hidden,
              minibatch_size = minibatch_size)
    id_indices = numpy.random.randint(low=0, high=num_data, size=minibatch_size)
    input_data = T.constant(encoded[id_indices])

    #print(input_data)
    
    #print(rbm.propup(input_data).eval())

    h1samples = rbm.sample_h_given_v(input_data).eval()
    #print(h1samples)

    #print(rbm.propdown(h1samples).eval())

    v2samples = rbm.sample_v_given_h(h1samples).eval()
    #print(v2samples)

    (W,H,V) = rbm.contrastive_divergence_1(input_data)
    #print(W.eval())
    #print(H.eval())
    #print(V.eval())


    xvis = T.fmatrix('xvis')
    h1samples = rbm.sample_h_given_v(xvis)
    v2samples = rbm.sample_v_given_h(h1samples)
    sample_vhv = theano.function([xvis], v2samples)

    example_indices = numpy.random.randint(low=0, high=num_data, size=minibatch_size)
    example_input_data = encoded[example_indices]
    num_examples = min(10, minibatch_size)
    def show_examples():
        rec = sample_vhv(example_input_data)
        for example in range(num_examples):
            print('input words:',
                  [(t+1, words[idx])
                   for t in range(tuplesize)
                   for idx in range(num_words)
                   if example_input_data[example, t*num_words + idx]])
            print('reconstructed words:',
                  [(t+1, words[idx])
                   for t in range(tuplesize)
                   for idx in range(num_words)
                   if rec[example, t*num_words + idx]])

        
    vis = T.fmatrix('vis')
    train = rbm.cd1_fun(vis)

    draw = VisualizeWeights('Minibatches', rbm, tuplesize, words, num_hidden)
    for epoch in range(1000):
        show_examples()
        all_vdiffs = numpy.zeros(num_visible)
        print('epoch ', epoch)
        numpy.random.shuffle(encoded)
        for minibatch in range(num_data // minibatch_size):
            mb_start = minibatch * minibatch_size;
            mb_end = mb_start + minibatch_size;
            input_data_indices = numpy.arange(mb_start, mb_end)
            encoded_input = encoded[input_data_indices]
            input_data = encoded_input
            vdiffs = train(input_data)
            all_vdiffs = all_vdiffs + numpy.abs(vdiffs)
        print('reconstruction error: ', numpy.sum(all_vdiffs) * minibatch_size)
        print(numpy.ndarray.astype(rbm.weights.get_value()*100, numpy.int32))
        print(numpy.ndarray.astype(rbm.vbias.get_value()*100, numpy.int32))
        print(numpy.ndarray.astype(rbm.hbias.get_value()*100, numpy.int32))
        draw.epoch_finished(epoch)
Example #2
0
def test():
    num_words = 20
    tuplesize = 3
    num_visible = tuplesize*num_words
    num_hidden = 30
    codec = Codec(tuplesize, num_words)
    tuples = codec.tuples
    words = codec.words
    encoded = codec.tuples_to_matrix()
    (num_data, _) = encoded.shape
    print('data count: ', num_data)

    rbm = RBM1(num_visible = num_visible,
               num_hidden = num_hidden)
    input_data = T.constant(encoded[2])
    #print(pairs[2])
    #print(encoded[2])

    #print(input_data)
    
    #print(rbm.propup(input_data).eval())

    h1samples = rbm.sample_h_given_v(input_data).eval()
    #print(h1samples)

    #print(rbm.propdown(h1samples).eval())

    v2samples = rbm.sample_v_given_h(h1samples).eval()
    #print(v2samples)

    (W,H,V) = rbm.contrastive_divergence_1(input_data)
    #print(W.eval())

    

    xvis = T.fvector('xvis')
    h1samples = rbm.sample_h_given_v(xvis)
    v2samples = rbm.sample_v_given_h(h1samples)
    sample_vhv = theano.function([xvis], v2samples)

    num_examples = 20
    example_indices = numpy.random.randint(low=0, high=num_data, size=num_examples)
    def show_examples():
        for example in example_indices:
            dat = encoded[example]
            v2samples = sample_vhv(dat)
            print('input words:',
                  [(t+1, words[idx])
                   for t in range(tuplesize)
                   for idx in range(num_words)
                   if encoded[example, t*num_words + idx]])
            print('reconstructed words:',
                  [(t+1, words[idx])
                   for t in range(tuplesize)
                   for idx in range(num_words)
                   if v2samples[t*num_words + idx]])
            print('')
    def report_hidden():
        weights = rbm.weights.get_value()
        for h in range(num_hidden):
            print('hidden ', h)
            for block in range(tuplesize):
                for word in range(num_words):
                    w = weights[block*num_words+word, h]
                    if w > 0.5:
                        print('   %2i %8s  %4.1f' % (block, words[word], w))


    vis = T.fvector('vis')
    train = rbm.cd1_fun(vis)
    input_data = numpy.reshape(encoded[2],
                               num_visible)
    train(input_data)
    print(rbm.weights.get_value())

    draw = VisualizeWeights('', rbm, tuplesize, words, num_hidden)

    for epoch in range(500):
        show_examples()
        all_vdiffs = numpy.zeros(num_visible)
        print('epoch ', epoch)
        for i in range(num_data):
            input_data = numpy.reshape(encoded[i],
                                       num_visible)
            vdiffs = train(input_data)
            all_vdiffs = all_vdiffs + numpy.abs(vdiffs)
        print('reconstruction error: ', numpy.sum(all_vdiffs))
        print(T.cast(rbm.weights.get_value()*100, 'int32').eval())
        draw.epoch_finished(epoch)
        report_hidden()
def test():
    minibatch_size = 100
    num_words = 40
    tuplesize = 5
    num_visible = tuplesize*num_words
    num_hidden = 140

    codec = Codec(tuplesize, num_words)
    tuples = codec.tuples
    words = codec.words
    encoded = codec.tuples_to_matrix()
    (num_data, _) = encoded.shape

    print(words)
    print('data count: ', num_data)

    rbm = RBM(num_visible = num_visible,
              num_hidden = num_hidden,
              minibatch_size = minibatch_size,
              venabledp=1.0,
              henabledp=0.7)
    id_indices = numpy.random.randint(low=0, high=num_data, size=minibatch_size)
    input_data = T.constant(encoded[id_indices])

    #print(input_data)
    
    #print(rbm.propup(input_data).eval())

    #h1samples = rbm.sample_h_given_v(input_data).eval()
    #print(h1samples)

    #print(rbm.propdown(h1samples).eval())

    #v2samples = rbm.sample_v_given_h(h1samples).eval()
    #print(v2samples)

    #(W,H,V) = rbm.contrastive_divergence_1(input_data)
    #print(W.eval())
    #print(H.eval())
    #print(V.eval())


    all_h_enabled = numpy.ones(num_hidden)
    all_v_enabled = numpy.ones(num_visible)

    xvis = T.fmatrix('xvis')
    h1samples = rbm.sample_h_given_v(xvis, all_h_enabled)
    v2samples = rbm.sample_v_given_h(h1samples, all_v_enabled)
    sample_vhv = theano.function([xvis], v2samples)

    example_indices = numpy.random.randint(low=0, high=num_data, size=minibatch_size)
    example_input_data = encoded[example_indices]
    num_examples = min(10, minibatch_size)
    def show_examples():
        rec = sample_vhv(example_input_data)
        for example in range(num_examples):
            print('input words:',
                  [(t+1, words[idx])
                   for t in range(tuplesize)
                   for idx in range(num_words)
                   if example_input_data[example, t*num_words + idx]])
            print('reconstructed words:',
                  [(t+1, words[idx])
                   for t in range(tuplesize)
                   for idx in range(num_words)
                   if rec[example, t*num_words + idx]])

    def report_hidden():
        weights = rbm.weights.get_value()
        for h in range(num_hidden):
            print('hidden ', h)
            for block in range(tuplesize):
                for word in range(num_words):
                    w = weights[block*num_words+word, h]
                    if w > 0.5:
                        print('   %2i %8s  %4.1f' % (block, words[word], w))
        
    vis = T.fmatrix('vis')
    train = rbm.cd1_fun(vis)

    draw = VisualizeWeights('Dropout (vp:%4.2f, hp:%4.2f)' % (rbm.venabledp, rbm.henabledp),
                            rbm, tuplesize, words, num_hidden,
                            num_visible)
    for epoch in range(1000):
        show_examples()
        all_vdiffs = numpy.zeros(num_visible)
        print('epoch ', epoch)
        numpy.random.shuffle(encoded)
        for minibatch in range(num_data // minibatch_size):
            mb_start = minibatch * minibatch_size;
            mb_end = mb_start + minibatch_size;
            input_data_indices = numpy.arange(mb_start, mb_end)
            encoded_input = encoded[input_data_indices]
            input_data = encoded_input
            (vdiffs, venabled, henabled) = train(input_data)
            all_vdiffs = all_vdiffs + numpy.abs(vdiffs)
            #print('venabled', venabled)
            #print('henabled', henabled)
        print('reconstruction error: ', numpy.sum(all_vdiffs) * minibatch_size)
        #print(numpy.ndarray.astype(rbm.weights.get_value()*100, numpy.int32))
        #print(numpy.ndarray.astype(rbm.vbias.get_value()*100, numpy.int32))
        #print(numpy.ndarray.astype(rbm.hbias.get_value()*100, numpy.int32))
        draw.epoch_finished(epoch)
        report_hidden()