def __init__(self, rbms=[], num_vis=None, hid_layers_size=[], bottomRBMtype=None, add_opts={}): self.params = [] if not rbms: self.input = T.matrix('input') self.num_layers = len(hid_layers_size) self.num_vis = num_vis self.stack = [] num_vis_cur = self.num_vis input_cur = self.input self.need_train = True for l in xrange(0, self.num_layers): num_hid_cur = hid_layers_size[l] if l > 0: input_cur = self.stack[-1].output num_vis_cur = self.stack[-1].num_hid rbm = None if l == 0: # replicated softmax layer only for first one if bottomRBMtype == None or bottomRBMtype == RBM: rbm = RBM(input=input_cur, num_vis=num_vis_cur, num_hid=num_hid_cur) if bottomRBMtype == RBMReplSoftmax: rbm = RBMReplSoftmax(input=input_cur, num_vis=num_vis_cur, num_hid=num_hid_cur) else: rbm = RBM(input=input_cur, num_vis=num_vis_cur, num_hid=num_hid_cur) assert (rbm) self.stack.append(rbm) else: self.stack = rbms self.num_vis = self.stack[0].num_vis self.num_layers = len(self.stack) self.need_train = True self.input = rbms[0].input for l in xrange(1, self.num_layers): self.stack[l].input = self.stack[l - 1].output
num_cases = data.shape[0] num_dims = data.shape[1] num_vis = num_dims data_sh = theano.shared(np.asarray(data, dtype=theano.config.floatX), borrow=True) data_valid_sh = theano.shared(np.asarray(data_valid, dtype=theano.config.floatX), borrow=True) train_params = {'batch_size' : 42, 'learning_rate' : 0.005, 'cd_steps' : 5, 'max_epoch' : 50, 'persistent_on' : True, 'init_momentum' : 0.5, 'momentum' : 0.9, 'moment_start' : 0.01, 'weight_decay' : 0.0002, 'introspect_freq' : 10 } num_hid = 60 if len(sys.argv)>1: num_hid = int(sys.argv[1]) rbm = RBMReplSoftmax(num_vis = num_vis, num_hid = num_hid, from_cache = False) num_batches = data_sh.get_value(borrow=True).shape[0]/train_params['batch_size'] max_epoch = train_params['max_epoch'] train_params['ep_inc'] = np.float32(1.0/(num_batches*max_epoch)) ep_inc = train_params['ep_inc'] persistent = train_params['persistent_on'] batch_size = train_params['batch_size'] index = T.lscalar('index') # index to a minibatch if persistent: train_params['persistent'] = theano.shared(np.zeros((batch_size, rbm.num_hid), dtype=theano.config.floatX), borrow=True) else: train_params['persistent'] = None cost, free_en, gparam, updates = rbm.get_cost_updates(train_params) updates.update([(rbm.epoch_ratio, rbm.epoch_ratio + ep_inc)])
num_vis = num_dims perm = np.random.permutation(num_cases) data = data[perm] data_sh = theano.shared(np.asarray(data, dtype=theano.config.floatX), borrow=True) train_params = {'batch_size' : 5, 'learning_rate' : 0.01, 'cd_steps' : 2, 'max_epoch' : 20, 'persistent_on' : False, 'init_momentum' : 0.5, 'momentum' : 0.9, 'moment_start' : 0.01, 'weight_decay' : 0.0001 } #rbm_rs = RBMStack(num_vis = 10, hid_layers_size = [5], bottomRBMtype = RBMReplSoftmax) data_dum_np = np.asarray([[5,6,7,2,3,4,1,30,0,0],[0,2,5,1,0,0,1,4,5,6],[5,30,7,1,0,1,2,0,1,2],[20,0,1,0,1,3,4,10,4,1],[1,0,1,3,10,5,7,1,5,1]], dtype=theano.config.floatX) #data_dum_np = np.round(np.log(data_dum_np+1)) data_dum = theano.shared(data_dum_np, borrow=True) rbm = RBMReplSoftmax(num_vis = 10, num_hid = 10, from_cache = False) #preh, hm, hs = rbm.sample_h_given_v(rbm.input) #prev, vm, vs = rbm.sample_v_given_h(hs) # #f = theano.function([], [preh,hm,hs,prev,vm,vs], givens = [(rbm.input, data_sh[0:3])]) index = T.lscalar('index') # index to a minibatch max_epoch = train_params['max_epoch'] #num_batches = data_sh.get_value(borrow=True).shape[0]/train_params['batch_size'] #train_params['ep_inc'] = np.float32(1.0/(num_batches*max_epoch)) #ep_inc = train_params['ep_inc'] l_rate = T.cast(train_params['learning_rate'], dtype=theano.config.floatX) weight_decay = T.cast(train_params['weight_decay'], dtype=theano.config.floatX)
'momentum': 0.9, 'moment_start': 0.01, 'weight_decay': 0.0001 } #rbm_rs = RBMStack(num_vis = 10, hid_layers_size = [5], bottomRBMtype = RBMReplSoftmax) data_dum_np = np.asarray( [[5, 6, 7, 2, 3, 4, 1, 30, 0, 0], [0, 2, 5, 1, 0, 0, 1, 4, 5, 6], [5, 30, 7, 1, 0, 1, 2, 0, 1, 2], [20, 0, 1, 0, 1, 3, 4, 10, 4, 1], [1, 0, 1, 3, 10, 5, 7, 1, 5, 1]], dtype=theano.config.floatX) #data_dum_np = np.round(np.log(data_dum_np+1)) data_dum = theano.shared(data_dum_np, borrow=True) rbm = RBMReplSoftmax(num_vis=10, num_hid=10, from_cache=False) #preh, hm, hs = rbm.sample_h_given_v(rbm.input) #prev, vm, vs = rbm.sample_v_given_h(hs) # #f = theano.function([], [preh,hm,hs,prev,vm,vs], givens = [(rbm.input, data_sh[0:3])]) index = T.lscalar('index') # index to a minibatch max_epoch = train_params['max_epoch'] #num_batches = data_sh.get_value(borrow=True).shape[0]/train_params['batch_size'] #train_params['ep_inc'] = np.float32(1.0/(num_batches*max_epoch)) #ep_inc = train_params['ep_inc'] l_rate = T.cast(train_params['learning_rate'], dtype=theano.config.floatX) weight_decay = T.cast(train_params['weight_decay'], dtype=theano.config.floatX)
'persistent_on' : True, 'init_momentum' : 0.5, 'momentum' : 0.9, 'moment_start' : 0.01, 'weight_decay' : 0.0002, 'mean_field' : False, 'introspect_freq' : 10, 'sparse_cost' : 0.01, 'sparse_damping' : 0.9, 'sparse_target' : 0.2, 'learning_rate_line' : 0.001, 'finetune_learning_rate' : 0.0001, } num_hid = 75 rbm = RBMReplSoftmax(num_vis = num_vis, num_hid = num_hid, train_params = train_params, from_cache = True) #preh, h = rbm.prop_up(data_sh[0:100]) #f = theano.function([], [preh,h], givens=[(rbm.input, data_sh[0:100])] ) db_redis.r0.flushdb() iter=0 def load_watches(watches): global iter start = time.time() for k in watches: name = "%i:%s" % (iter, k) load_bin(name, watches[k]) db_redis.r0.set("last_it", iter)