Example #1
0
def main():
    ## define hyperparameters
    learning_rate = 0.01
    weight_decay = 0.02
    minibatch_size = 32
    epochs = 50

    ## load dataset
    data = numpy.array([[1,1,1,0,0,0],[1,0,1,0,0,0],[1,1,1,0,0,0],[0,0,1,1,1,0], [0,0,1,1,0,0],[0,0,1,1,1,0]]) # A 6x6 matrix where each row is a training example and each column is a visible unit.

    ## construct RBM model
    rbm = base.RBM()

    rbm.v = units.BinaryUnits(rbm) # visibles
    rbm.h = units.BinaryUnits(rbm) # hiddens

    initial_W = []
    initial_bv = []
    initial_bh = []

    rbm.W = parameters.ProdParameters(rbm, [rbm.v, rbm.h], initial_W) # weights
    rbm.bv = parameters.BiasParameters(rbm, rbm.v, initial_bv) # visible bias
    rbm.bh = parameters.BiasParameters(rbm, rbm.h, initial_bh) # hidden bias

    ## define a variable map, that maps the 'input' units to Theano variables.
    initial_vmap = { rbm.v: T.matrix('v') }

    ## compute symbolic CD-1 statistics
    s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=1)

    ## create an updater for each parameter variable
    decay = 0.7 #TODO decay?
    umap = {}
    for variable in [rbm.W.W, rbm.bv.b, rbm.bh.b]:
        new_value = variable + learning_rate * (updaters.CDUpdater(rbm, variable, s) - decay * updaters.DecayUpdater(variable))
        umap[variable] = new_value

    ## monitor reconstruction cost during training
    mse = monitors.reconstruction_mse(s, rbm.v)

    ## train the model
    t = trainers.MinibatchTrainer(rbm, umap)
    train = t.compile_function(initial_vmap, mb_size=minibatch_size, monitors=[mse])

    for epoch in range(epochs):
        costs = [m for m in train({ rbm.v: data })]
        print "MSE = %.4f" % numpy.mean(costs)
Example #2
0
# add softmax unit for context
rbm.s = units.SoftmaxUnits(rbm, name='s')

# link context and hiddens
initial_Ws = np.asarray( np.random.uniform(
                   low   = -4*np.sqrt(6./(n_hidden+1+n_states)),
                   high  =  4*np.sqrt(6./(n_hidden+1+n_states)),
                   size  =  (1, n_states, n_hidden)),
                   dtype =  theano.config.floatX)
rbm.Ws = parameters.AdvancedProdParameters(rbm, [rbm.s, rbm.h], [2, 1], theano.shared(value = initial_Ws, name='Ws'), name='Ws')

initial_vmap = { rbm.v: T.matrix('v'), rbm.s: T.tensor3('s') }

# try to calculate weight updates using CD-1 stats
print ">> Constructing contrastive divergence updaters..."
s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], context_units=[rbm.s], k=1, mean_field_for_stats=[rbm.v], mean_field_for_gibbs=[rbm.v])
# s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v, rbm.s], hidden_units=[rbm.h], k=1, mean_field_for_stats=[rbm.v], mean_field_for_gibbs=[rbm.v])

umap = {}
for var in rbm.variables:
    pu = var + 0.001 * updaters.CDUpdater(rbm, var, s)
    umap[var] = pu

print ">> Compiling functions..."
t = trainers.MinibatchTrainer(rbm, umap)
m = monitors.reconstruction_mse(s, rbm.v)
m_data = s['data'][rbm.v]
m_model = s['model'][rbm.v]
e_data = rbm.energy(s['data']).mean()
e_model = rbm.energy(s['model']).mean()
Example #3
0
    def _initial_bv(self):
        return np.zeros(self.n_visible, dtype=theano.config.floatX)

    def _initial_bh(self):
        return np.zeros(self.n_hidden, dtype=theano.config.floatX)


rbm = FactoredBinaryBinaryRBM(n_visible, n_hidden, n_factors)
initial_vmap = {rbm.v: T.matrix('v')}

# try to calculate weight updates using CD stats
print ">> Constructing contrastive divergence updaters..."
s = stats.cd_stats(rbm,
                   initial_vmap,
                   visible_units=[rbm.v],
                   hidden_units=[rbm.h],
                   k=k,
                   mean_field_for_stats=[rbm.v],
                   mean_field_for_gibbs=[rbm.v])

umap = {}
for var in rbm.variables:
    pu = var + (learning_rate / float(mb_size)) * updaters.CDUpdater(
        rbm, var, s)
    umap[var] = pu

print ">> Compiling functions..."
t = trainers.MinibatchTrainer(rbm, umap)
m = monitors.reconstruction_mse(s, rbm.v)
m_data = s['data'][rbm.v]
m_model = s['model'][rbm.v]
Example #4
0
                   dtype =  theano.config.floatX)
        
    def _initial_bv(self):
        return np.zeros(self.n_visible, dtype = theano.config.floatX)
        
    def _initial_bh(self):
        return np.zeros(self.n_hidden, dtype = theano.config.floatX)


rbm = TexpBinaryRBM(n_visible, n_hidden)

initial_vmap = { rbm.v: T.matrix('v') }

# We use single-step contrastive divergence (CD-1) to train the RBM. For this, we can use
# the CDParamUpdater. This requires symbolic CD-1 statistics:
s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=1)

# We create an updater for each parameter variable
umap = {}
for var in rbm.variables:
    pu = var + 0.001 * updaters.CDUpdater(rbm, var, s) # the learning rate is 0.001
    umap[var] = pu
 
# training
t = trainers.MinibatchTrainer(rbm, umap)
mse = monitors.reconstruction_mse(s, rbm.v)
train = t.compile_function(initial_vmap, mb_size=32, monitors=[mse], name='train', mode=mode)

epochs = 200

start_time = time.time()
def morbrun1(f1=1, f2=1, v1=1, v2=1, kern=1):

    test_set_x = np.array(eval_print1).flatten(2)
    valid_set_x = np.array(eval_print3).flatten(2)
    train_set_x = np.array(eval_print2).flatten(2)

    train_set_x = train_set_x.reshape(
        np.array(eval_print2).shape[0] * batchm, kern, v1, v2)
    valid_set_x = valid_set_x.reshape(
        np.array(eval_print3).shape[0] * batchm, kern, v1, v2)
    test_set_x = test_set_x.reshape(
        np.array(eval_print1).shape[0] * batchm, kern, v1, v2)

    visible_maps = kern
    hidden_maps = neuron  # 100 # 50
    filter_height = f1  # 7 # 8
    filter_width = f2  # 30 # 8
    mb_size = batchm  # 1 minibatch

    print ">> Constructing RBM..."
    fan_in = visible_maps * filter_height * filter_width
    """
   initial_W = numpy.asarray(
            self.numpy_rng.uniform(
                low = - numpy.sqrt(3./fan_in),
                high = numpy.sqrt(3./fan_in),
                size = self.filter_shape
            ), dtype=theano.config.floatX)
  """
    numpy_rng = np.random.RandomState(123)
    initial_W = np.asarray(numpy_rng.normal(0,
                                            0.5 / np.sqrt(fan_in),
                                            size=(hidden_maps, visible_maps,
                                                  filter_height,
                                                  filter_width)),
                           dtype=theano.config.floatX)
    initial_bv = np.zeros(visible_maps, dtype=theano.config.floatX)
    initial_bh = np.zeros(hidden_maps, dtype=theano.config.floatX)

    shape_info = {
        'hidden_maps': hidden_maps,
        'visible_maps': visible_maps,
        'filter_height': filter_height,
        'filter_width': filter_width,
        'visible_height': v1,  #45+8,
        'visible_width': v2,  #30,
        'mb_size': mb_size
    }

    # rbms.SigmoidBinaryRBM(n_visible, n_hidden)
    rbm = morb.base.RBM()
    rbm.v = units.BinaryUnits(rbm, name='v')  # visibles
    rbm.h = units.BinaryUnits(rbm, name='h')  # hiddens
    rbm.W = parameters.Convolutional2DParameters(rbm, [rbm.v, rbm.h],
                                                 theano.shared(value=initial_W,
                                                               name='W'),
                                                 name='W',
                                                 shape_info=shape_info)
    # one bias per map (so shared across width and height):
    rbm.bv = parameters.SharedBiasParameters(rbm,
                                             rbm.v,
                                             3,
                                             2,
                                             theano.shared(value=initial_bv,
                                                           name='bv'),
                                             name='bv')
    rbm.bh = parameters.SharedBiasParameters(rbm,
                                             rbm.h,
                                             3,
                                             2,
                                             theano.shared(value=initial_bh,
                                                           name='bh'),
                                             name='bh')

    initial_vmap = {rbm.v: T.tensor4('v')}

    # try to calculate weight updates using CD-1 stats
    print ">> Constructing contrastive divergence updaters..."
    s = stats.cd_stats(rbm,
                       initial_vmap,
                       visible_units=[rbm.v],
                       hidden_units=[rbm.h],
                       k=5,
                       mean_field_for_stats=[rbm.v],
                       mean_field_for_gibbs=[rbm.v])

    lr_cd = 0.001
    if indk == -1:
        lr_cd = 0

    umap = {}
    for var in rbm.variables:
        pu = var + lr_cd * updaters.CDUpdater(rbm, var, s)
        umap[var] = pu

    print ">> Compiling functions..."
    t = trainers.MinibatchTrainer(rbm, umap)
    m = monitors.reconstruction_mse(s, rbm.v)

    e_data = rbm.energy(s['data']).mean()
    e_model = rbm.energy(s['model']).mean()

    # train = t.compile_function(initial_vmap, mb_size=32, monitors=[m], name='train', mode=mode)
    train = t.compile_function(initial_vmap,
                               mb_size=mb_size,
                               monitors=[m, e_data, e_model],
                               name='train',
                               mode=mode)

    # TRAINING

    epochs = epoch_cd
    print ">> Training for %d epochs..." % epochs

    for epoch in range(epochs):
        monitoring_data_train = [
            (cost, energy_data, energy_model)
            for cost, energy_data, energy_model in train({rbm.v: train_set_x})
        ]
        mses_train, edata_train_list, emodel_train_list = zip(
            *monitoring_data_train)

    #print rbm.W.var.get_value().shape
    lay1w = rbm.W.var.get_value()
    Wl = theano.shared(lay1w)
    lay1bh = rbm.bh.var.get_value()
    bhl = theano.shared(lay1bh)
    #print Wl.get_value().shape
    return [Wl, bhl]
Example #6
0
data_context_train = data_context[:-1000, :]
data_context_eval = data_context[-1000:, :]

n_visible = data.shape[1]
n_context = data_context.shape[1]
n_hidden = 100

print ">> Constructing RBM..."
rbm = rbms.BinaryBinaryCRBM(n_visible, n_hidden, n_context)
initial_vmap = {rbm.v: T.matrix('v'), rbm.x: T.matrix('x')}

# try to calculate weight updates using CD-1 stats
print ">> Constructing contrastive divergence updaters..."
s = stats.cd_stats(rbm,
                   initial_vmap,
                   visible_units=[rbm.v],
                   hidden_units=[rbm.h],
                   context_units=[rbm.x],
                   k=1)

umap = {}
for var in rbm.variables:
    pu = var + 0.0005 * updaters.CDUpdater(rbm, var, s)
    umap[var] = pu

print ">> Compiling functions..."
t = trainers.MinibatchTrainer(rbm, umap)
m = monitors.reconstruction_mse(s, rbm.v)
mce = monitors.reconstruction_crossentropy(s, rbm.v)
free_energy = T.mean(rbm.free_energy(
    [rbm.h], s['data']))  # take the mean over the minibatch.
Example #7
0
n_hidden = 500
mb_size = 20
k = 15
learning_rate = 0.1
epochs = 15


print ">> Constructing RBM..."
rbm = rbms.BinaryBinaryRBM(n_visible, n_hidden)
initial_vmap = { rbm.v: T.matrix('v') }

persistent_vmap = { rbm.h: theano.shared(np.zeros((mb_size, n_hidden), dtype=theano.config.floatX)) }

# try to calculate weight updates using CD stats
print ">> Constructing contrastive divergence updaters..."
s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=k, persistent_vmap=persistent_vmap, mean_field_for_stats=[rbm.v], mean_field_for_gibbs=[rbm.v])

umap = {}
for var in rbm.variables:
    pu = var + (learning_rate / float(mb_size)) * updaters.CDUpdater(rbm, var, s)
    umap[var] = pu

print ">> Compiling functions..."
t = trainers.MinibatchTrainer(rbm, umap)
m = monitors.reconstruction_mse(s, rbm.v)
m_data = s['data'][rbm.v]
m_model = s['model'][rbm.v]
e_data = rbm.energy(s['data']).mean()
e_model = rbm.energy(s['model']).mean()

# train = t.compile_function(initial_vmap, mb_size=32, monitors=[m], name='train', mode=mode)
Example #8
0
k = 1  # 15
learning_rate = 0.01  # 0.1
epochs = 2000

print ">> Constructing RBM..."
# rbm = rbms.LearntPrecisionGaussianBinaryRBM(n_visible, n_hidden)
rbm = rbms.LearntPrecisionSeparateGaussianBinaryRBM(n_visible, n_hidden_mean,
                                                    n_hidden_precision)
initial_vmap = {rbm.v: T.matrix('v')}

# try to calculate weight updates using CD stats
print ">> Constructing contrastive divergence updaters..."
# s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=k)
s = stats.cd_stats(rbm,
                   initial_vmap,
                   visible_units=[rbm.v],
                   hidden_units=[rbm.hp, rbm.hm],
                   k=k)

# We create an updater for each parameter variable.
# IMPORTANT: the precision parameters must be constrained to be negative.
# variables = [rbm.Wm.var, rbm.bvm.var, rbm.bh.var, rbm.Wp.var, rbm.bvp.var]
variables = [
    rbm.Wm.var, rbm.bvm.var, rbm.bhm.var, rbm.Wp.var, rbm.bvp.var, rbm.bhp.var
]
precision_variables = [rbm.Wp.var, rbm.bvp.var]

umap = {}
for var in variables:
    pu = var + (learning_rate / mb_size) * updaters.CDUpdater(
        rbm, var, s)  # the learning rate is 0.001