def test_multiple_monitoring_datasets(): # tests that DefaultTrainingAlgorithm can take multiple # monitoring datasets. BATCH_SIZE = 2 BATCHES = 3 NUM_FEATURES = 4 dim = 3 m = 10 rng = np.random.RandomState([2014, 02, 25]) X = rng.randn(m, dim) Y = rng.randn(m, dim) train = DenseDesignMatrix(X=X) test = DenseDesignMatrix(X=Y) algorithm = DefaultTrainingAlgorithm( batch_size=BATCH_SIZE, batches_per_iter=BATCHES, monitoring_dataset={'train': train, 'test': test}) model = S3C(nvis=NUM_FEATURES, nhid=1, irange=.01, init_bias_hid=0., init_B=1., min_B=1., max_B=1., init_alpha=1., min_alpha=1., max_alpha=1., init_mu=0., m_step=Grad_M_Step(learning_rate=0.), e_step=E_Step(h_new_coeff_schedule=[1.])) algorithm.setup(model=model, dataset=train) algorithm.train(dataset=train)
def test_counting(): BATCH_SIZE = 2 BATCHES = 3 NUM_FEATURES = 4 num_examples = BATCHES * BATCH_SIZE dataset = DummyDataset(num_examples=num_examples, num_features=NUM_FEATURES) algorithm = DefaultTrainingAlgorithm(batch_size=BATCH_SIZE, batches_per_iter=BATCHES) model = S3C(nvis=NUM_FEATURES, nhid=1, irange=.01, init_bias_hid=0., init_B=1., min_B=1., max_B=1., init_alpha=1., min_alpha=1., max_alpha=1., init_mu=0., m_step=Grad_M_Step(learning_rate=0.), e_step=E_Step(h_new_coeff_schedule=[1.])) algorithm.setup(model=model, dataset=dataset) algorithm.train(dataset=dataset) if not (model.monitor.get_batches_seen() == BATCHES): raise AssertionError('Should have seen '+str(BATCHES) + \ ' batches but saw '+str(model.monitor.get_batches_seen())) assert model.monitor.get_examples_seen() == num_examples assert isinstance(model.monitor.get_examples_seen(), py_integer_types) assert isinstance(model.monitor.get_batches_seen(), py_integer_types)
def __init__(self): """ gets a small batch of data sets up an S3C model and learns on the data creates an expression for the log likelihood of the data """ self.tol = 1e-5 #dataset = serial.load('${GOODFELI_TMP}/cifar10_preprocessed_train_1K.pkl') X = np.random.RandomState([1, 2, 3]).randn(1000, 108) #dataset.get_batch_design(1000) #X = X[:,0:2] #warnings.warn('hack') #X[0,0] = 1. #X[0,1] = -1. m, D = X.shape N = 300 self.model = S3C( nvis=D, #disable_W_update = 1, nhid=N, irange=.5, init_bias_hid=-.1, init_B=1., min_B=1e-8, max_B=1e8, tied_B=1, e_step=E_Step_Scan( #h_new_coeff_schedule = [ ], h_new_coeff_schedule=[.01]), init_alpha=1., min_alpha=1e-8, max_alpha=1e8, init_mu=1., m_step=Grad_M_Step(learning_rate=1.0), ) #warnings.warn('hack') #W = self.model.W.get_value() #W[0,0] = 1. #W[1,0] = 1. #self.model.W.set_value(W) self.orig_params = self.model.get_param_values() model = self.model self.mf_obs = model.e_step.infer(X) self.stats = SufficientStatistics.from_observations( needed_stats=model.m_step.needed_stats(), V=X, **self.mf_obs) self.prob = self.model.expected_log_prob_vhs( self.stats, H_hat=self.mf_obs['H_hat'], S_hat=self.mf_obs['S_hat']) self.X = X self.m = m self.D = D self.N = N
def __init__(self): """ gets a small batch of data sets up an S3C model """ # We also have to change the value of config.floatX in __init__. self.prev_floatX = config.floatX config.floatX = 'float64' try: self.tol = 1e-5 #dataset = serial.load('${PYLEARN2_DATA_PATH}/stl10/stl10_patches/data.pkl') #X = dataset.get_batch_design(1000) #X = X[:,0:5] X = np.random.RandomState([1, 2, 3]).randn(1000, 5) X -= X.mean() X /= X.std() m, D = X.shape N = 5 #don't give the model an e_step or learning rate so it won't spend years compiling a learn_func self.model = S3C( nvis=D, nhid=N, irange=.1, init_bias_hid=0., init_B=3., min_B=1e-8, max_B=1000., init_alpha=1., min_alpha=1e-8, max_alpha=1000., init_mu=1., e_step=None, m_step=Grad_M_Step(), min_bias_hid=-1e30, max_bias_hid=1e30, ) self.model.make_pseudoparams() self.h_new_coeff_schedule = [ .1, .2, .3, .4, .5, .6, .7, .8, .9, 1. ] self.e_step = E_Step_Scan( h_new_coeff_schedule=self.h_new_coeff_schedule) self.e_step.register_model(self.model) self.X = X self.N = N self.m = m finally: config.floatX = self.prev_floatX
def __init__(self): """ gets a small batch of data sets up an S3C model and learns on the data creates an expression for the log likelihood of the data """ # We also have to change the value of config.floatX in __init__. self.prev_floatX = config.floatX config.floatX = 'float64' try: self.tol = 1e-5 if config.mode in ["DebugMode", "DEBUG_MODE"]: X = np.random.RandomState([1, 2, 3]).randn(30, 108) m, D = X.shape N = 10 else: X = np.random.RandomState([1, 2, 3]).randn(1000, 108) m, D = X.shape N = 300 self.model = S3C(nvis = D, nhid = N, irange = .5, init_bias_hid = -.1, init_B = 1., min_B = 1e-8, max_B = 1e8, tied_B = 1, e_step = E_Step_Scan( h_new_coeff_schedule = [ .01 ] ), init_alpha = 1., min_alpha = 1e-8, max_alpha = 1e8, init_mu = 1., m_step = Grad_M_Step( learning_rate = 1.0 ), ) self.orig_params = self.model.get_param_values() model = self.model self.mf_obs = model.e_step.infer(X) self.stats = SufficientStatistics.from_observations(needed_stats = model.m_step.needed_stats(), V =X, ** self.mf_obs) self.prob = self.model.expected_log_prob_vhs( self.stats , H_hat = self.mf_obs['H_hat'], S_hat = self.mf_obs['S_hat']) self.X = X self.m = m self.D = D self.N = N finally: config.floatX = self.prev_floatX
def make_s3c(self): return S3C(nvis = self.D, nhid = self.N, irange = .1, init_bias_hid = -1.5, init_B = 3., min_B = 1e-8, max_B = 1000., init_alpha = 1., min_alpha = 1e-8, max_alpha = 1000., init_mu = 1., e_step = None, m_step = Grad_M_Step(), min_bias_hid = -1e30, max_bias_hid = 1e30, )
def __init__(self): """ gets a small batch of data sets up a PD-DBM model """ self.tol = 1e-5 X = np.random.RandomState([1,2,3]).randn(1000,5) X -= X.mean() X /= X.std() m, D = X.shape N = 6 N2 = 7 s3c = S3C(nvis = D, nhid = N, irange = .1, init_bias_hid = -1.5, init_B = 3., min_B = 1e-8, max_B = 1000., init_alpha = 1., min_alpha = 1e-8, max_alpha = 1000., init_mu = 1., e_step = None, m_step = Grad_M_Step(), min_bias_hid = -1e30, max_bias_hid = 1e30, ) rbm = RBM(nvis = N, nhid = N2, irange = .1, init_bias_vis = -1.5, init_bias_hid = 1.5) #don't give the model an inference procedure or learning rate so it won't spend years compiling a learn_func self.model = PDDBM( dbm = DBM( use_cd = 1, rbms = [ rbm ]), s3c = s3c ) self.model.make_pseudoparams() self.inference_procedure = InferenceProcedure( clip_reflections = True, rho = .5 ) self.inference_procedure.register_model(self.model) self.X = X self.N = N self.N2 = N2 self.m = m
model.e_step = e_step e_step.register_model(model) print 'loading data' data = np.load(data_path) m,n = data.shape print 'batch_size: ',batch_size_str batch_size = int(batch_size_str) assert m % batch_size == 0 print 'building energy functional expression' V = T.matrix() obs = model.get_hidden_obs(V) needed_stats = S3C.energy_functional_needed_stats() stats = SufficientStatistics.from_observations(needed_stats = needed_stats, V = V, ** obs) energy_functional = model.energy_functional_batch( V =V, ** obs) assert len(energy_functional.type.broadcastable) == 1 print 'compiling energy functional theano function' f = function([V],energy_functional) print 'computing energy functional values' out = np.zeros((m,),dtype='float32') times = [] for i in xrange(0,m,batch_size): print '\t',i
print 'done' batch_size = int(sys.argv[2]) num_batches = int(sys.argv[3]) ga_updates = int(sys.argv[4]) learning_rate = float(sys.argv[5]) print 'defining em functional...' import theano.tensor as T V = T.matrix("V") model.make_pseudoparams() from pylearn2.models.s3c import S3C needed_stats = S3C.expected_log_prob_vhs_needed_stats() from pylearn2.models.s3c import SufficientStatistics params = [] for i in xrange(len(model.e_step.h_new_coeff_schedule)): param = sharedX(model.e_step.h_new_coeff_schedule[i], name='h' + str(i)) model.e_step.h_new_coeff_schedule[i] = param params.append(param) for i in xrange(len(model.e_step.s_new_coeff_schedule)): param = sharedX(model.e_step.s_new_coeff_schedule[i], name='s' + str(i)) model.e_step.s_new_coeff_schedule[i] = param params.append(param)
def __init__(self, model = None, X = None, tol = 1e-5, init_H = None, init_S = None, init_G = None): """ gets a small batch of data sets up a PD-DBM model """ self.tol = tol if X is None: X = np.random.RandomState([1,2,3]).randn(1000,5) X -= X.mean() X /= X.std() m, D = X.shape if model is None: N = 6 N2 = 7 s3c = S3C(nvis = D, nhid = N, irange = .1, init_bias_hid = -1.5, init_B = 3., min_B = 1e-8, max_B = 1000., init_alpha = 1., min_alpha = 1e-8, max_alpha = 1000., init_mu = 1., e_step = None, m_step = Grad_M_Step(), min_bias_hid = -1e30, max_bias_hid = 1e30, ) rbm = RBM(nvis = N, nhid = N2, irange = .5, init_bias_vis = -1.5, init_bias_hid = 1.5) #don't give the model an inference procedure or learning rate so it won't spend years compiling a learn_func self.model = PDDBM( dbm = DBM( use_cd = 1, rbms = [ rbm ]), s3c = s3c ) self.model.make_pseudoparams() self.inference_procedure = InferenceProcedure( clip_reflections = True, rho = .5 ) self.inference_procedure.register_model(self.model) else: self.model = model self.inference_procedure = model.inference_procedure N = model.s3c.nhid N2 = model.dbm.rbms[0].nhid self.X = X self.N = N self.N2 = N2 self.m = m if init_H is None: self.init_H = np.cast[config.floatX](self.model.rng.uniform(0.,1.,(self.m, self.N))) self.init_S = np.cast[config.floatX](self.model.rng.uniform(-5.,5.,(self.m, self.N))) self.init_G = np.cast[config.floatX](self.model.rng.uniform(0.,1.,(self.m,self.N2))) else: assert init_S is not None assert init_G is not None self.init_H = init_H self.init_S = init_S self.init_G = init_G
batch_size = int(sys.argv[2]) num_batches = int(sys.argv[3]) ga_updates = int(sys.argv[4]) learning_rate = float(sys.argv[5]) print 'defining em functional...' import theano.tensor as T V = T.matrix("V") model.make_pseudoparams() obs = model.e_step.variational_inference(V) from pylearn2.models.s3c import S3C needed_stats = S3C.expected_log_prob_vhs_needed_stats() from pylearn2.models.s3c import SufficientStatistics stats = SufficientStatistics.from_observations( needed_stats = needed_stats, V = V, ** obs ) em_functional = model.em_functional( stats = stats, H_hat = obs['H_hat'], S_hat = obs['S_hat'], var_s0_hat = obs['var_s0_hat'], var_s1_hat = obs['var_s1_hat']) assert len(em_functional.type.broadcastable) == 0 print 'compiling function...' from theano import function H = sharedX(np.zeros((batch_size, model.nhid), dtype='float32')) S = sharedX(np.zeros((batch_size, model.nhid), dtype='float32')) new_stats = SufficientStatistics.from_observations( needed_stats = needed_stats, V = V, H_hat = H, S_hat = S,
model.e_step = e_step e_step.register_model(model) print 'loading data' data = np.load(data_path) m, n = data.shape print 'batch_size: ', batch_size_str batch_size = int(batch_size_str) assert m % batch_size == 0 print 'building energy functional expression' V = T.matrix() obs = model.get_hidden_obs(V) needed_stats = S3C.energy_functional_needed_stats() stats = SufficientStatistics.from_observations(needed_stats=needed_stats, V=V, **obs) energy_functional = model.energy_functional_batch(V=V, **obs) assert len(energy_functional.type.broadcastable) == 1 print 'compiling energy functional theano function' f = function([V], energy_functional) print 'computing energy functional values' out = np.zeros((m, ), dtype='float32') times = [] for i in xrange(0, m, batch_size):
def __init__(self, model): """ model must be a PDDBM or S3C model """ self.verbose = False batch_size = 87 model._test_batch_size = batch_size self.model = model pddbm = hasattr(model,'dbm') if not pddbm: #hack s3c model to follow pddbm interface model.inference_procedure = model.e_step has_labels = False else: has_labels = model.dbm.num_classes > 0 V = T.matrix("V") if has_labels: Y = T.matrix("Y") else: Y = None if config.compute_test_value != 'off': V.tag.test_value = np.cast[V.type.dtype](model.get_input_space().get_origin_batch(batch_size)) self.model.make_pseudoparams() obs = model.inference_procedure.infer(V,Y) obs['H_hat'] = T.clip(obs['H_hat'],1e-7,1.-1e-7) if pddbm: obs['G_hat'] = tuple([ T.clip(elem,1e-7,1.-1e-7) for elem in obs['G_hat'] ]) needed_stats = S3C.expected_log_prob_vhs_needed_stats() trunc_kl = model.inference_procedure.truncated_KL(V, obs = obs, Y = Y).mean() assert len(trunc_kl.type.broadcastable) == 0 if pddbm: G = [ sharedX(np.zeros((batch_size, rbm.nhid), dtype='float32')) for rbm in model.dbm.rbms ] h_dim = model.s3c.nhid else: h_dim = model.nhid H = sharedX(np.zeros((batch_size, h_dim), dtype='float32')) S = sharedX(np.zeros((batch_size, h_dim), dtype='float32')) updates = { H : obs['H_hat'], S : obs['S_hat'] } if pddbm: for G_elem, G_hat_elem in zip(G, obs['G_hat']): updates[G_elem] = G_hat_elem inputs = [ V ] if has_labels: inputs.append(Y) if self.verbose: print 'batch gradient class compiling init function' self.init = function(inputs, trunc_kl, updates = updates ) if self.verbose: print 'done' new_stats = SufficientStatistics.from_observations( needed_stats = needed_stats, V = V, H_hat = H, S_hat = S, var_s0_hat = obs['var_s0_hat'], var_s1_hat = obs['var_s1_hat']) obs = { "H_hat" : H, "S_hat" : S, "var_s0_hat" : obs['var_s0_hat'], "var_s1_hat" : obs['var_s1_hat'], } if pddbm: obs['G_hat'] = G obj = self.model.inference_procedure.truncated_KL( V, obs = obs, Y = Y ).mean() if pddbm: grad_G_sym = [ T.grad(obj, G_elem) for G_elem in G ] grad_H_sym = T.grad(obj,H) grad_S_sym = T.grad(obj,S) grad_H = sharedX( H.get_value()) grad_S = sharedX( S.get_value()) updates = { grad_H : grad_H_sym, grad_S : grad_S_sym } if pddbm: grad_G = [ sharedX( G_elem.get_value()) for G_elem in G ] for grad_G_elem, grad_G_sym_elem in zip(grad_G,grad_G_sym): updates[grad_G_elem] = grad_G_sym_elem if self.verbose: print 'batch gradient class compiling gradient function' self.compute_grad = function(inputs, updates = updates ) if self.verbose: print 'done' if self.verbose: print 'batch gradient class compiling objective function' self.obj = function(inputs, obj) if self.verbose: print 'done' self.S = S self.H = H self.grad_S = grad_S self.grad_H = grad_H if pddbm: self.G = G self.grad_G = grad_G self.pddbm = pddbm self.has_labels = has_labels
def __init__(self, model): """ model must be a PDDBM or S3C model """ self.verbose = False batch_size = 87 model._test_batch_size = batch_size self.model = model pddbm = hasattr(model,'dbm') if not pddbm: #hack s3c model to follow pddbm interface model.inference_procedure = model.e_step has_labels = False else: has_labels = model.dbm.num_classes > 0 V = T.matrix("V") if has_labels: Y = T.matrix("Y") else: Y = None if config.compute_test_value != 'off': V.tag.test_value = np.cast[V.type.dtype](model.get_input_space().get_origin_batch(batch_size)) self.model.make_pseudoparams() obs = {} for key in model.inference_procedure.hidden_obs: obs[key] = model.inference_procedure.hidden_obs[key] obs['H_hat'] = T.clip(obs['H_hat'],1e-7,1.-1e-7) if pddbm: obs['G_hat'] = tuple([ T.clip(elem,1e-7,1.-1e-7) for elem in obs['G_hat'] ]) needed_stats = S3C.expected_log_prob_vhs_needed_stats() trunc_kl = model.inference_procedure.truncated_KL(V, obs, Y).mean() assert len(trunc_kl.type.broadcastable) == 0 if pddbm: G = model.inference_procedure.hidden_obs['G_hat'] h_dim = model.s3c.nhid else: h_dim = model.nhid H = model.inference_procedure.hidden_obs['H_hat'] S = model.inference_procedure.hidden_obs['H_hat'] inputs = [ V ] if has_labels: inputs.append(Y) if self.verbose: print 'batch gradient class compiling init function' self.init_kl = function(inputs, trunc_kl) if self.verbose: print 'done' new_stats = SufficientStatistics.from_observations( needed_stats = needed_stats, V = V, H_hat = H, S_hat = S, var_s0_hat = obs['var_s0_hat'], var_s1_hat = obs['var_s1_hat']) #obs = { # "H_hat" : H, # "S_hat" : S, # "var_s0_hat" : obs['var_s0_hat'], # "var_s1_hat" : obs['var_s1_hat'], # } if pddbm: obs['G_hat'] = G obj = self.model.inference_procedure.truncated_KL( V, obs, Y ).mean() if pddbm: grad_G_sym = [ T.grad(obj, G_elem) for G_elem in G ] grad_H_sym = T.grad(obj,H) grad_S_sym = T.grad(obj,S) grad_H = sharedX( H.get_value()) grad_S = sharedX( S.get_value()) updates = { grad_H : grad_H_sym, grad_S : grad_S_sym } if pddbm: grad_G = [ sharedX( G_elem.get_value()) for G_elem in G ] for grad_G_elem, grad_G_sym_elem in zip(grad_G,grad_G_sym): updates[grad_G_elem] = grad_G_sym_elem if self.verbose: print 'batch gradient class compiling gradient function' self.compute_grad = function(inputs, updates = updates ) if self.verbose: print 'done' if self.verbose: print 'batch gradient class compiling objective function' self.obj = function(inputs, obj) if self.verbose: print 'done' self.S = S self.H = H self.grad_S = grad_S self.grad_H = grad_H if pddbm: self.G = G self.grad_G = grad_G self.pddbm = pddbm self.has_labels = has_labels