def main(): ## define hyperparameters learning_rate = 0.01 weight_decay = 0.02 minibatch_size = 32 epochs = 50 ## load dataset data = numpy.array([[1,1,1,0,0,0],[1,0,1,0,0,0],[1,1,1,0,0,0],[0,0,1,1,1,0], [0,0,1,1,0,0],[0,0,1,1,1,0]]) # A 6x6 matrix where each row is a training example and each column is a visible unit. ## construct RBM model rbm = base.RBM() rbm.v = units.BinaryUnits(rbm) # visibles rbm.h = units.BinaryUnits(rbm) # hiddens initial_W = [] initial_bv = [] initial_bh = [] rbm.W = parameters.ProdParameters(rbm, [rbm.v, rbm.h], initial_W) # weights rbm.bv = parameters.BiasParameters(rbm, rbm.v, initial_bv) # visible bias rbm.bh = parameters.BiasParameters(rbm, rbm.h, initial_bh) # hidden bias ## define a variable map, that maps the 'input' units to Theano variables. initial_vmap = { rbm.v: T.matrix('v') } ## compute symbolic CD-1 statistics s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=1) ## create an updater for each parameter variable decay = 0.7 #TODO decay? umap = {} for variable in [rbm.W.W, rbm.bv.b, rbm.bh.b]: new_value = variable + learning_rate * (updaters.CDUpdater(rbm, variable, s) - decay * updaters.DecayUpdater(variable)) umap[variable] = new_value ## monitor reconstruction cost during training mse = monitors.reconstruction_mse(s, rbm.v) ## train the model t = trainers.MinibatchTrainer(rbm, umap) train = t.compile_function(initial_vmap, mb_size=minibatch_size, monitors=[mse]) for epoch in range(epochs): costs = [m for m in train({ rbm.v: data })] print "MSE = %.4f" % numpy.mean(costs)
# add softmax unit for context rbm.s = units.SoftmaxUnits(rbm, name='s') # link context and hiddens initial_Ws = np.asarray( np.random.uniform( low = -4*np.sqrt(6./(n_hidden+1+n_states)), high = 4*np.sqrt(6./(n_hidden+1+n_states)), size = (1, n_states, n_hidden)), dtype = theano.config.floatX) rbm.Ws = parameters.AdvancedProdParameters(rbm, [rbm.s, rbm.h], [2, 1], theano.shared(value = initial_Ws, name='Ws'), name='Ws') initial_vmap = { rbm.v: T.matrix('v'), rbm.s: T.tensor3('s') } # try to calculate weight updates using CD-1 stats print ">> Constructing contrastive divergence updaters..." s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], context_units=[rbm.s], k=1, mean_field_for_stats=[rbm.v], mean_field_for_gibbs=[rbm.v]) # s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v, rbm.s], hidden_units=[rbm.h], k=1, mean_field_for_stats=[rbm.v], mean_field_for_gibbs=[rbm.v]) umap = {} for var in rbm.variables: pu = var + 0.001 * updaters.CDUpdater(rbm, var, s) umap[var] = pu print ">> Compiling functions..." t = trainers.MinibatchTrainer(rbm, umap) m = monitors.reconstruction_mse(s, rbm.v) m_data = s['data'][rbm.v] m_model = s['model'][rbm.v] e_data = rbm.energy(s['data']).mean() e_model = rbm.energy(s['model']).mean()
def _initial_bv(self): return np.zeros(self.n_visible, dtype=theano.config.floatX) def _initial_bh(self): return np.zeros(self.n_hidden, dtype=theano.config.floatX) rbm = FactoredBinaryBinaryRBM(n_visible, n_hidden, n_factors) initial_vmap = {rbm.v: T.matrix('v')} # try to calculate weight updates using CD stats print ">> Constructing contrastive divergence updaters..." s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=k, mean_field_for_stats=[rbm.v], mean_field_for_gibbs=[rbm.v]) umap = {} for var in rbm.variables: pu = var + (learning_rate / float(mb_size)) * updaters.CDUpdater( rbm, var, s) umap[var] = pu print ">> Compiling functions..." t = trainers.MinibatchTrainer(rbm, umap) m = monitors.reconstruction_mse(s, rbm.v) m_data = s['data'][rbm.v] m_model = s['model'][rbm.v]
dtype = theano.config.floatX) def _initial_bv(self): return np.zeros(self.n_visible, dtype = theano.config.floatX) def _initial_bh(self): return np.zeros(self.n_hidden, dtype = theano.config.floatX) rbm = TexpBinaryRBM(n_visible, n_hidden) initial_vmap = { rbm.v: T.matrix('v') } # We use single-step contrastive divergence (CD-1) to train the RBM. For this, we can use # the CDParamUpdater. This requires symbolic CD-1 statistics: s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=1) # We create an updater for each parameter variable umap = {} for var in rbm.variables: pu = var + 0.001 * updaters.CDUpdater(rbm, var, s) # the learning rate is 0.001 umap[var] = pu # training t = trainers.MinibatchTrainer(rbm, umap) mse = monitors.reconstruction_mse(s, rbm.v) train = t.compile_function(initial_vmap, mb_size=32, monitors=[mse], name='train', mode=mode) epochs = 200 start_time = time.time()
def morbrun1(f1=1, f2=1, v1=1, v2=1, kern=1): test_set_x = np.array(eval_print1).flatten(2) valid_set_x = np.array(eval_print3).flatten(2) train_set_x = np.array(eval_print2).flatten(2) train_set_x = train_set_x.reshape( np.array(eval_print2).shape[0] * batchm, kern, v1, v2) valid_set_x = valid_set_x.reshape( np.array(eval_print3).shape[0] * batchm, kern, v1, v2) test_set_x = test_set_x.reshape( np.array(eval_print1).shape[0] * batchm, kern, v1, v2) visible_maps = kern hidden_maps = neuron # 100 # 50 filter_height = f1 # 7 # 8 filter_width = f2 # 30 # 8 mb_size = batchm # 1 minibatch print ">> Constructing RBM..." fan_in = visible_maps * filter_height * filter_width """ initial_W = numpy.asarray( self.numpy_rng.uniform( low = - numpy.sqrt(3./fan_in), high = numpy.sqrt(3./fan_in), size = self.filter_shape ), dtype=theano.config.floatX) """ numpy_rng = np.random.RandomState(123) initial_W = np.asarray(numpy_rng.normal(0, 0.5 / np.sqrt(fan_in), size=(hidden_maps, visible_maps, filter_height, filter_width)), dtype=theano.config.floatX) initial_bv = np.zeros(visible_maps, dtype=theano.config.floatX) initial_bh = np.zeros(hidden_maps, dtype=theano.config.floatX) shape_info = { 'hidden_maps': hidden_maps, 'visible_maps': visible_maps, 'filter_height': filter_height, 'filter_width': filter_width, 'visible_height': v1, #45+8, 'visible_width': v2, #30, 'mb_size': mb_size } # rbms.SigmoidBinaryRBM(n_visible, n_hidden) rbm = morb.base.RBM() rbm.v = units.BinaryUnits(rbm, name='v') # visibles rbm.h = units.BinaryUnits(rbm, name='h') # hiddens rbm.W = parameters.Convolutional2DParameters(rbm, [rbm.v, rbm.h], theano.shared(value=initial_W, name='W'), name='W', shape_info=shape_info) # one bias per map (so shared across width and height): rbm.bv = parameters.SharedBiasParameters(rbm, rbm.v, 3, 2, theano.shared(value=initial_bv, name='bv'), name='bv') rbm.bh = parameters.SharedBiasParameters(rbm, rbm.h, 3, 2, theano.shared(value=initial_bh, name='bh'), name='bh') initial_vmap = {rbm.v: T.tensor4('v')} # try to calculate weight updates using CD-1 stats print ">> Constructing contrastive divergence updaters..." s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=5, mean_field_for_stats=[rbm.v], mean_field_for_gibbs=[rbm.v]) lr_cd = 0.001 if indk == -1: lr_cd = 0 umap = {} for var in rbm.variables: pu = var + lr_cd * updaters.CDUpdater(rbm, var, s) umap[var] = pu print ">> Compiling functions..." t = trainers.MinibatchTrainer(rbm, umap) m = monitors.reconstruction_mse(s, rbm.v) e_data = rbm.energy(s['data']).mean() e_model = rbm.energy(s['model']).mean() # train = t.compile_function(initial_vmap, mb_size=32, monitors=[m], name='train', mode=mode) train = t.compile_function(initial_vmap, mb_size=mb_size, monitors=[m, e_data, e_model], name='train', mode=mode) # TRAINING epochs = epoch_cd print ">> Training for %d epochs..." % epochs for epoch in range(epochs): monitoring_data_train = [ (cost, energy_data, energy_model) for cost, energy_data, energy_model in train({rbm.v: train_set_x}) ] mses_train, edata_train_list, emodel_train_list = zip( *monitoring_data_train) #print rbm.W.var.get_value().shape lay1w = rbm.W.var.get_value() Wl = theano.shared(lay1w) lay1bh = rbm.bh.var.get_value() bhl = theano.shared(lay1bh) #print Wl.get_value().shape return [Wl, bhl]
data_context_train = data_context[:-1000, :] data_context_eval = data_context[-1000:, :] n_visible = data.shape[1] n_context = data_context.shape[1] n_hidden = 100 print ">> Constructing RBM..." rbm = rbms.BinaryBinaryCRBM(n_visible, n_hidden, n_context) initial_vmap = {rbm.v: T.matrix('v'), rbm.x: T.matrix('x')} # try to calculate weight updates using CD-1 stats print ">> Constructing contrastive divergence updaters..." s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], context_units=[rbm.x], k=1) umap = {} for var in rbm.variables: pu = var + 0.0005 * updaters.CDUpdater(rbm, var, s) umap[var] = pu print ">> Compiling functions..." t = trainers.MinibatchTrainer(rbm, umap) m = monitors.reconstruction_mse(s, rbm.v) mce = monitors.reconstruction_crossentropy(s, rbm.v) free_energy = T.mean(rbm.free_energy( [rbm.h], s['data'])) # take the mean over the minibatch.
n_hidden = 500 mb_size = 20 k = 15 learning_rate = 0.1 epochs = 15 print ">> Constructing RBM..." rbm = rbms.BinaryBinaryRBM(n_visible, n_hidden) initial_vmap = { rbm.v: T.matrix('v') } persistent_vmap = { rbm.h: theano.shared(np.zeros((mb_size, n_hidden), dtype=theano.config.floatX)) } # try to calculate weight updates using CD stats print ">> Constructing contrastive divergence updaters..." s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=k, persistent_vmap=persistent_vmap, mean_field_for_stats=[rbm.v], mean_field_for_gibbs=[rbm.v]) umap = {} for var in rbm.variables: pu = var + (learning_rate / float(mb_size)) * updaters.CDUpdater(rbm, var, s) umap[var] = pu print ">> Compiling functions..." t = trainers.MinibatchTrainer(rbm, umap) m = monitors.reconstruction_mse(s, rbm.v) m_data = s['data'][rbm.v] m_model = s['model'][rbm.v] e_data = rbm.energy(s['data']).mean() e_model = rbm.energy(s['model']).mean() # train = t.compile_function(initial_vmap, mb_size=32, monitors=[m], name='train', mode=mode)
k = 1 # 15 learning_rate = 0.01 # 0.1 epochs = 2000 print ">> Constructing RBM..." # rbm = rbms.LearntPrecisionGaussianBinaryRBM(n_visible, n_hidden) rbm = rbms.LearntPrecisionSeparateGaussianBinaryRBM(n_visible, n_hidden_mean, n_hidden_precision) initial_vmap = {rbm.v: T.matrix('v')} # try to calculate weight updates using CD stats print ">> Constructing contrastive divergence updaters..." # s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=k) s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.hp, rbm.hm], k=k) # We create an updater for each parameter variable. # IMPORTANT: the precision parameters must be constrained to be negative. # variables = [rbm.Wm.var, rbm.bvm.var, rbm.bh.var, rbm.Wp.var, rbm.bvp.var] variables = [ rbm.Wm.var, rbm.bvm.var, rbm.bhm.var, rbm.Wp.var, rbm.bvp.var, rbm.bhp.var ] precision_variables = [rbm.Wp.var, rbm.bvp.var] umap = {} for var in variables: pu = var + (learning_rate / mb_size) * updaters.CDUpdater( rbm, var, s) # the learning rate is 0.001