def __init__(self, nvis, bias_from_marginals=None): """ nvis: the dimension of the space bias_from_marginals: a dataset, whose marginals are used to initialize the visible biases """ self.__dict__.update(locals()) del self.self # Don't serialize the dataset del self.bias_from_marginals self.space = VectorSpace(nvis) self.input_space = self.space origin = self.space.get_origin() if bias_from_marginals is None: init_bias = np.zeros((nvis, )) else: X = bias_from_marginals.get_design_matrix() assert X.max() == 1. assert X.min() == 0. assert not np.any((X > 0.) * (X < 1.)) mean = X.mean(axis=0) mean = np.clip(mean, 1e-7, 1 - 1e-7) init_bias = inverse_sigmoid_numpy(mean) self.bias = sharedX(init_bias, 'visible_bias')
def test_binary_vis_layer_make_state(): # Verifies that BinaryVector.make_state creates # a shared variable whose value passes check_binary_samples n = 5 num_samples = 1000 tol = .04 layer = BinaryVector(nvis = n) rng = np.random.RandomState([2012,11,1]) mean = rng.uniform(1e-6, 1. - 1e-6, (n,)) z = inverse_sigmoid_numpy(mean) layer.set_biases(z.astype(config.floatX)) init_state = layer.make_state(num_examples=num_samples, numpy_rng=rng) value = init_state.get_value() check_binary_samples(value, (num_samples, n), mean, tol)
def test_sample(self): # Verifies that BinaryVector.sample returns an expression # whose value passes check_samples assert hasattr(np, 'exp') n = 5 num_samples = 1000 tol = .04 vis = BinaryVector(nvis=n) hid = DummyLayer() rng = np.random.RandomState([2012, 11, 1, 259]) mean = rng.uniform(1e-6, 1. - 1e-6, (n, )) ofs = rng.randn(n) vis.set_biases(ofs.astype(config.floatX)) z = inverse_sigmoid_numpy(mean) - ofs z_var = sharedX(np.zeros((num_samples, n)) + z) theano_rng = MRG_RandomStreams(2012 + 11 + 1) sample = vis.sample(state_above=z_var, layer_above=hid, theano_rng=theano_rng) sample = sample.eval() TestBinaryVector.check_samples(sample, (num_samples, n), mean, tol)
def init_sigmoid_bias_from_array(arr): """ .. todo:: WRITEME """ X = arr if not (X.max() == 1): raise ValueError( "Expected design matrix to consist entirely " "of 0s and 1s, but maximum value is " + str(X.max()) ) if X.min() != 0.0: raise ValueError( "Expected design matrix to consist entirely of " "0s and 1s, but minimum value is " + str(X.min()) ) # removed this check so we can initialize the marginals # with a dataset of bernoulli params # assert not np.any( (X > 0.) * (X < 1.) ) mean = X.mean(axis=0) mean = np.clip(mean, 1e-7, 1 - 1e-7) init_bias = inverse_sigmoid_numpy(mean) return init_bias
def test_sample(self): # Verifies that BinaryVector.sample returns an expression # whose value passes check_samples assert hasattr(np, 'exp') n = 5 num_samples = 1000 tol = .04 vis = BinaryVector(nvis=n) hid = DummyLayer() rng = np.random.RandomState([2012,11,1,259]) mean = rng.uniform(1e-6, 1. - 1e-6, (n,)) ofs = rng.randn(n) vis.set_biases(ofs.astype(config.floatX)) z = inverse_sigmoid_numpy(mean) - ofs z_var = sharedX(np.zeros((num_samples, n)) + z) theano_rng = MRG_RandomStreams(2012+11+1) sample = vis.sample(state_above=z_var, layer_above=hid, theano_rng=theano_rng) sample = sample.eval() TestBinaryVector.check_samples(sample, (num_samples, n), mean, tol)
def __init__(self, nvis, bias_from_marginals = None): """ nvis: the dimension of the space bias_from_marginals: a dataset, whose marginals are used to initialize the visible biases """ self.__dict__.update(locals()) del self.self # Don't serialize the dataset del self.bias_from_marginals self.space = VectorSpace(nvis) self.input_space = self.space origin = self.space.get_origin() if bias_from_marginals is None: init_bias = np.zeros((nvis,)) else: X = bias_from_marginals.get_design_matrix() assert X.max() == 1. assert X.min() == 0. assert not np.any( (X > 0.) * (X < 1.) ) mean = X.mean(axis=0) mean = np.clip(mean, 1e-7, 1-1e-7) init_bias = inverse_sigmoid_numpy(mean) self.bias = sharedX(init_bias, 'visible_bias')
def run_rbm( pos_weight = 1., neg_weight = 1., bias_hid = -1., bias_vis = inverse_sigmoid_numpy( 1. / float(D) ) ): rbm.bias_vis.set_value( np.ones( (D,), dtype='float32') * bias_vis) rbm.bias_hid.set_value( np.ones( (D,), dtype='float32') * bias_hid) rbm.transformer._W.set_value( np.identity(D, dtype='float32') * \ (pos_weight + neg_weight) - np.ones( (D,D), dtype = 'float32') \ * neg_weight) return float(good_prob_func(all_states, good_states))
def run_rbm(pos_weight=1., neg_weight=1., bias_hid=-1., bias_vis=inverse_sigmoid_numpy(1. / float(D))): rbm.bias_vis.set_value(np.ones((D, ), dtype='float32') * bias_vis) rbm.bias_hid.set_value(np.ones((D, ), dtype='float32') * bias_hid) rbm.transformer._W.set_value( np.identity(D, dtype='float32') * \ (pos_weight + neg_weight) - np.ones( (D,D), dtype = 'float32') \ * neg_weight) return float(good_prob_func(all_states, good_states))
def test_binary_vis_layer_sample(): # Verifies that BinaryVector.sample returns an expression # whose value passes check_binary_samples assert hasattr(np, 'exp') n = 5 num_samples = 1000 tol = .04 class DummyLayer(object): """ A layer that we build for the test that just uses a state as its downward message. """ def downward_state(self, state): return state def downward_message(self, state): return state vis = BinaryVector(nvis=n) hid = DummyLayer() rng = np.random.RandomState([2012,11,1,259]) mean = rng.uniform(1e-6, 1. - 1e-6, (n,)) ofs = rng.randn(n) vis.set_biases(ofs.astype(config.floatX)) z = inverse_sigmoid_numpy(mean) - ofs z_var = sharedX(np.zeros((num_samples, n)) + z) theano_rng = MRG_RandomStreams(2012+11+1) sample = vis.sample(state_above=z_var, layer_above=hid, theano_rng=theano_rng) sample = sample.eval() check_binary_samples(sample, (num_samples, n), mean, tol)
def init_sigmoid_bias_from_array(arr): """ .. todo:: WRITEME """ X = arr if not (X.max() == 1): raise ValueError("Expected design matrix to consist entirely " "of 0s and 1s, but maximum value is " + str(X.max())) if X.min() != 0.: raise ValueError("Expected design matrix to consist entirely of " "0s and 1s, but minimum value is " + str(X.min())) # removed this check so we can initialize the marginals # with a dataset of bernoulli params # assert not np.any( (X > 0.) * (X < 1.) ) mean = X.mean(axis=0) mean = np.clip(mean, 1e-7, 1 - 1e-7) init_bias = inverse_sigmoid_numpy(mean) return init_bias
def __init__(self, nvis = None, nhid = None, vis_space = None, hid_space = None, transformer = None, irange=0.5, rng=None, init_bias_vis = None, init_bias_vis_marginals = None, init_bias_hid=0.0, base_lr = 1e-3, anneal_start = None, nchains = 100, sml_gibbs_steps = 1, random_patches_src = None, monitor_reconstruction = False): """ Construct an RBM object. Parameters ---------- nvis : int Number of visible units in the model. (Specifying this implies that the model acts on a vector, i.e. it sets vis_space = pylearn2.space.VectorSpace(nvis) ) nhid : int Number of hidden units in the model. (Specifying this implies that the model acts on a vector) vis_space: A pylearn2.space.Space object describing what kind of vector space the RBM acts on. Don't specify if you used nvis / hid hid_space: A pylearn2.space.Space object describing what kind of vector space the RBM's hidden units live in. Don't specify if you used nvis / nhid init_bias_vis_marginals: either None, or a Dataset to use to initialize the visible biases to the inverse sigmoid of the data marginals irange : float, optional The size of the initial interval around 0 for weights. rng : RandomState object or seed NumPy RandomState object to use when initializing parameters of the model, or (integer) seed to use to create one. init_bias_vis : array_like, optional Initial value of the visible biases, broadcasted as necessary. init_bias_hid : array_like, optional initial value of the hidden biases, broadcasted as necessary. monitor_reconstruction : if True, will request a monitoring channel to monitor reconstruction error random_patches_src: Either None, or a Dataset from which to draw random patches in order to initialize the weights. Patches will be multiplied by irange Parameters for default SML learning rule: base_lr : the base learning rate anneal_start : number of steps after which to start annealing on a 1/t schedule nchains: number of negative chains sml_gibbs_steps: number of gibbs steps to take per update """ Model.__init__(self) Block.__init__(self) if init_bias_vis_marginals is not None: assert init_bias_vis is None X = init_bias_vis_marginals.X assert X.min() >= 0.0 assert X.max() <= 1.0 marginals = X.mean(axis=0) #rescale the marginals a bit to avoid NaNs init_bias_vis = inverse_sigmoid_numpy(.01 + .98 * marginals) if init_bias_vis is None: init_bias_vis = 0.0 if rng is None: # TODO: global rng configuration stuff. rng = numpy.random.RandomState(1001) self.rng = rng if vis_space is None: #if we don't specify things in terms of spaces and a transformer, #assume dense matrix multiplication and work off of nvis, nhid assert hid_space is None assert transformer is None or isinstance(transformer,MatrixMul) assert nvis is not None assert nhid is not None if transformer is None: if random_patches_src is None: W = rng.uniform(-irange, irange, (nvis, nhid)) else: if hasattr(random_patches_src, '__array__'): W = irange * random_patches_src.T assert W.shape == (nvis, nhid) else: #assert type(irange) == type(0.01) #assert irange == 0.01 W = irange * random_patches_src.get_batch_design(nhid).T self.transformer = MatrixMul( sharedX( W, name='W', borrow=True ) ) else: self.transformer = transformer self.vis_space = VectorSpace(nvis) self.hid_space = VectorSpace(nhid) else: assert hid_space is not None assert transformer is not None assert nvis is None assert nhid is None self.vis_space = vis_space self.hid_space = hid_space self.transformer = transformer try: b_vis = self.vis_space.get_origin() b_vis += init_bias_vis except ValueError: raise ValueError("bad shape or value for init_bias_vis") self.bias_vis = sharedX(b_vis, name='bias_vis', borrow=True) try: b_hid = self.hid_space.get_origin() b_hid += init_bias_hid except ValueError: raise ValueError('bad shape or value for init_bias_hid') self.bias_hid = sharedX(b_hid, name='bias_hid', borrow=True) self.random_patches_src = random_patches_src self.register_names_to_del(['random_patches_src']) self.__dict__.update(nhid=nhid, nvis=nvis) self._params = safe_union(self.transformer.get_params(), [self.bias_vis, self.bias_hid]) self.base_lr = base_lr self.anneal_start = anneal_start self.nchains = nchains self.sml_gibbs_steps = sml_gibbs_steps
init_beta = 1./(.01+residuals.var(axis=0)) print 'init_beta.shape: ',init_beta.shape norms = [ np.sqrt(np.square(mu).sum()) for mu in means ] W = np.zeros( (means[0].shape[0], len(means)), dtype = config.floatX) for i in xrange(len(means)): W[:,i] = means[i]/norms[i] init_mu = np.asarray(norms) model = checked_call(S3C, dict(nvis = X.shape[1], nhid = len(means), init_bias_hid = inverse_sigmoid_numpy(np.asarray([ (y==i).mean() for i in xrange(y.max()+1)])), irange = 0., min_B = .1, max_B = 1e6, min_alpha = .1, max_alpha = 1e6, m_step = checked_call(Grad_M_Step, dict(learning_rate = 1.)), init_mu = init_mu, init_alpha = init_mu * 10., init_B = init_beta, e_step = E_Step_Scan( h_new_coeff_schedule = [ .1 ] * 50, s_new_coeff_schedule = [ .1 ] * 50, clip_reflections = 1, rho = 0.5 ))
layer_1_target = uniform_between(.01, .2) layer_2_target = uniform_between(.01, .2) layer_1_eps = (uniform_between(0., 1.) > .5) * rng.uniform(0., layer_1_target) layer_2_eps = (uniform_between(0., 1.) > .5) * rng.uniform(0., layer_2_target) layer_1_coeff = 10 ** uniform_between(-2., -.5) layer_1_coeff *= use_sparsity layer_2_coeff = 10 ** uniform_between(-2., -.5) layer_2_coeff *= use_sparsity # Layer 1 layer_1_dim = rng.randint(250,751, (num_jobs)) layer_1_irange = uniform_between(1./np.sqrt(784), 1./np.sqrt(layer_1_dim)) switch = uniform_between(0., 1.) > 0.5 if_no_sparsity = switch * uniform_between(-2., 0.) if_sparsity = switch * inverse_sigmoid_numpy(layer_1_target) layer_1_init_bias = use_sparsity * if_sparsity + (1-use_sparsity) * if_no_sparsity # Layer 2 layer_2_dim = rng.randint(500,1500, (num_jobs)) layer_2_irange = uniform_between(1./np.sqrt(layer_1_dim), 1./np.sqrt(layer_2_dim)) switch = uniform_between(0., 1.) > 0.5 if_no_sparsity = switch * uniform_between(-2., 0.) if_sparsity = switch * inverse_sigmoid_numpy(layer_2_target) layer_2_init_bias = use_sparsity * if_sparsity + (1-use_sparsity) * if_no_sparsity # Optimizer reset_alpha = uniform_between(0., 1.) > 0.5