def set_input_space(self, space): self.input_space = space if isinstance(space, VectorSpace): self.requires_reformat = False self.input_dim = space.dim else: self.requires_reformat = True self.input_dim = space.get_total_dimension() self.desired_space = VectorSpace(self.input_dim) self.output_space = VectorSpace(self.dim) rng = self.mlp.rng W = rng.uniform(-self.irange, self.irange, (self.input_dim, self.dim)) W = sharedX(W) W.name = self.layer_name + '_W' self.transformer = MatrixMul(W) W, = self.transformer.get_params() assert W.name is not None
def set_input_space(self, space): """ Note: this resets parameters! """ self.input_space = space if isinstance(space, VectorSpace): self.requires_reformat = False self.input_dim = space.dim else: self.requires_reformat = True self.input_dim = space.get_total_dimension() self.desired_space = VectorSpace(self.input_dim) self.output_space = VectorSpace(self.dim) rng = self.dbm.rng if self.irange is not None: assert self.sparse_init is None W = rng.uniform(-self.irange, self.irange, (self.input_dim, self.dim)) * \ (rng.uniform(0.,1., (self.input_dim, self.dim)) < self.include_prob) else: assert self.sparse_init is not None W = np.zeros((self.input_dim, self.dim)) W *= self.sparse_stdev W = sharedX(W) W.name = self.layer_name + '_W' self.transformer = MatrixMul(W) W ,= self.transformer.get_params() assert W.name is not None
def set_input_space(self, space): self.input_space = space if isinstance(space, VectorSpace): self.requires_reformat = False self.input_dim = space.dim else: self.requires_reformat = True self.input_dim = space.get_total_dimension() self.desired_space = VectorSpace(self.input_dim) if self.fprop_code==True: self.output_space = VectorSpace(self.dim) else: self.output_space = VectorSpace(self.input_dim) rng = self.mlp.rng W = rng.randn(self.input_dim, self.dim) self.W = sharedX(W.T, self.layer_name + '_W') self.transformer = MatrixMul(self.W) self.W, = self.transformer.get_params() b = np.zeros((self.input_dim,)) self.b = sharedX(b, self.layer_name + '_b') # We need both to pass input_dim valid X = .001 * rng.randn(self.batch_size, self.dim) self.X = sharedX(X, self.layer_name + '_X') self._params = [self.W, self.b, self.X] self.state_below = T.zeros((self.batch_size, self.input_dim))
def _prepare_generator(self, generator, noise_space, condition_distribution, new_W_irange, input_source): noise_dim = noise_space.get_total_dimension() condition_dim = self.condition_space.get_total_dimension() first_layer = generator.mlp.layers[0] pretrain_W, _ = first_layer.get_param_values() rng = generator.mlp.rng new_W = np.vstack((pretrain_W, rng.uniform(-new_W_irange, new_W_irange, (condition_dim, pretrain_W.shape[1])))) new_W = sharedX(new_W) new_W.name = first_layer.get_params()[0].name + '_retrain' first_layer.transformer = MatrixMul(new_W) first_layer.input_space = CompositeSpace( components=[noise_space, self.condition_space]) generator.mlp.input_space = first_layer.input_space # HACK! generator.mlp._input_source = input_source return ConditionalGenerator( generator.mlp, input_condition_space=self.condition_space, condition_distribution=condition_distribution, noise_dim=noise_dim)
def set_input_space(self, space): self.input_space = space if isinstance(space, VectorSpace): self.requires_reformat = False self.input_dim = space.dim else: self.requires_reformat = True self.input_dim = space.get_total_dimension() self.desired_space = VectorSpace(self.input_dim) self.output_space = VectorSpace(self.dim) self.rng = self.mlp.rng # sanity checking assert self.dictionary.input_dim == self.input_dim assert self.dictionary.size >= self.dim indices = self.rng.permutation(self.dictionary.size) indices = indices[:self.dim] indices.sort() W = self.dictionary.get_subdictionary(indices) # dictionary atoms are stored in rows but transformers expect them to # be in columns. W = sharedX(W.T) W.name = self.layer_name + "_W" self.transformer = MatrixMul(W)
def set_input_space(self, space): """ Note: this resets parameters! """ self.input_space = space if isinstance(space, VectorSpace): self.requires_reformat = False self.input_dim = space.dim else: self.requires_reformat = True self.input_dim = space.get_total_dimension() self.desired_space = VectorSpace(self.input_dim) if not (self.detector_layer_dim % self.pool_size == 0): raise ValueError( "detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d" % (self.detector_layer_dim, self.pool_size, self.detector_layer_dim % self.pool_size)) self.h_space = VectorSpace(self.detector_layer_dim) self.pool_layer_dim = self.detector_layer_dim / self.pool_size self.output_space = VectorSpace(self.pool_layer_dim) rng = self.dbm.rng if self.irange is not None: assert self.sparse_init is None W = rng.uniform(-self.irange, self.irange, (self.input_dim, self.detector_layer_dim)) * \ (rng.uniform(0.,1., (self.input_dim, self.detector_layer_dim)) < self.include_prob) else: assert self.sparse_init is not None W = np.zeros((self.input_dim, self.detector_layer_dim)) for i in xrange(self.detector_layer_dim): for j in xrange(self.sparse_init): idx = rng.randint(0, self.input_dim) while W[idx, i] != 0: idx = rng.randint(0, self.input_dim) W[idx, i] = rng.randn() W = sharedX(W) W.name = self.layer_name + '_W' self.transformer = MatrixMul(W) W, = self.transformer.get_params() assert W.name is not None
def set_input_space(self, space): self.input_space = space assert isinstance(space, CompositeSpace) self.input_dim = [] self.desired_space = [] for sp in space.components: if isinstance(sp, VectorSpace): self.requires_reformat = False self.input_dim.append(sp.dim) else: self.requires_reformat = True self.input_dim.append(sp.get_total_dimension()) self.desired_space.append( VectorSpace(self.input_dim[-1]) ) if self.fprop_code==True: self.output_space = VectorSpace(self.dim) else: #self.output_space = VectorSpace(self.input_dim) # TODO: return composite space raise NotImplementedError rng = self.mlp.rng self.W = [] self.S = [] self.b = [] self.transformer = [] self._params = [] X = .001 * rng.randn(self.batch_size, self.dim) self.X = sharedX(X, self.layer_name + '_X') for c in range(len(self.input_space.components)): W = rng.randn(self.input_dim[c], self.dim) self.W += [ sharedX(W.T, self.layer_name + '_W' + str(c)) ] self.transformer += [ MatrixMul(self.W[c]) ] self.W[-1], = self.transformer[-1].get_params() b = np.zeros((self.input_dim[c],)) self.b += [ sharedX(b, self.layer_name + '_b' + str(c)) ] # We need both to pass input_dim valid S = rng.normal(0, .001, size=(self.batch_size, self.input_dim[c])) self.S += [ sharedX(S, self.layer_name + '_S' + str(c)) ] self._params += [self.W[-1], self.b[-1]] #self.state_below = T.zeros((self.batch_size, self.input_dim)) cost = self.get_local_cost() self.opt = top.Optimizer(self.X, cost, method='rmsprop', learning_rate=self.lr, momentum=.9)
def setUpClass(cls): cls.test_m = 2 cls.rng = N.random.RandomState([1, 2, 3]) cls.nv = 3 cls.nh = 4 cls.vW = cls.rng.randn(cls.nv, cls.nh) cls.W = sharedX(cls.vW) cls.vbv = as_floatX(cls.rng.randn(cls.nv)) cls.bv = T.as_tensor_variable(cls.vbv) cls.bv.tag.test_value = cls.vbv cls.vbh = as_floatX(cls.rng.randn(cls.nh)) cls.bh = T.as_tensor_variable(cls.vbh) cls.bh.tag.test_value = cls.bh cls.vsigma = as_floatX(cls.rng.uniform(0.1, 5)) cls.sigma = T.as_tensor_variable(cls.vsigma) cls.sigma.tag.test_value = cls.vsigma cls.E = GRBM_Type_1(transformer=MatrixMul(cls.W), bias_vis=cls.bv, bias_hid=cls.bh, sigma=cls.sigma) cls.V = T.matrix() cls.V.tag.test_value = as_floatX(cls.rng.rand(cls.test_m, cls.nv)) cls.H = T.matrix() cls.H.tag.test_value = as_floatX(cls.rng.rand(cls.test_m, cls.nh)) cls.E_func = function([cls.V, cls.H], cls.E([cls.V, cls.H])) cls.F_func = function([cls.V], cls.E.free_energy(cls.V)) cls.log_P_H_given_V_func = \ function([cls.H, cls.V], cls.E.log_P_H_given_V(cls.H, cls.V)) cls.score_func = function([cls.V], cls.E.score(cls.V)) cls.F_of_V = cls.E.free_energy(cls.V) cls.dummy = T.sum(cls.F_of_V) cls.negscore = T.grad(cls.dummy, cls.V) cls.score = -cls.negscore cls.generic_score_func = function([cls.V], cls.score)
def set_input_space(self, space): self.input_space = space if isinstance(space, VectorSpace): self.requires_reformat = False self.input_dim = space.dim else: self.requires_reformat = True self.input_dim = space.get_total_dimension() self.desired_space = VectorSpace(self.input_dim) if self.fprop_code==True: self.output_space = VectorSpace(self.dim) else: self.output_space = VectorSpace(self.input_dim) rng = self.mlp.rng W = rng.randn(self.input_dim, self.dim) self.W = sharedX(W.T, self.layer_name + '_W') self.transformer = MatrixMul(self.W) self.W, = self.transformer.get_params() b = np.zeros((self.input_dim,)) self.b = sharedX(b, self.layer_name + '_b') # We need both to pass input_dim valid X = .001 * rng.randn(self.batch_size, self.dim) self.X = sharedX(X, self.layer_name + '_X') S = rng.normal(0, .001, size=(self.batch_size, self.input_dim)) self.S = sharedX(S, self.layer_name + '_S') self._params = [self.W, self.b] #self.state_below = T.zeros((self.batch_size, self.input_dim)) cost = self.get_local_cost() self.opt = top.Optimizer(self.X, cost, method='rmsprop', learning_rate=self.lr, momentum=.9) self._reconstruction = theano.function([], T.dot(self.X, self.W))
nv = 3 nh = 4 vW = rng.randn(nv, nh) W = sharedX(vW) vbv = as_floatX(rng.randn(nv)) bv = T.as_tensor_variable(vbv) bv.tag.test_value = vbv vbh = as_floatX(rng.randn(nh)) bh = T.as_tensor_variable(vbh) bh.tag.test_value = bh vsigma = as_floatX(rng.uniform(0.1, 5)) sigma = T.as_tensor_variable(vsigma) sigma.tag.test_value = vsigma E = GRBM_Type_1(transformer=MatrixMul(W), bias_vis=bv, bias_hid=bh, sigma=sigma) V = T.matrix() V.tag.test_value = as_floatX(rng.rand(test_m, nv)) H = T.matrix() H.tag.test_value = as_floatX(rng.rand(test_m, nh)) E_func = function([V, H], E([V, H])) F_func = function([V], E.free_energy(V)) log_P_H_given_V_func = function([H, V], E.log_P_H_given_V(H, V)) score_func = function([V], E.score(V)) F_of_V = E.free_energy(V)
def set_input_space(self, space): self.input_space = space if not isinstance(space, Conv2DSpace): raise BadInputSpaceError("ConvRectifiedLinear.set_input_space " "expected a Conv2DSpace, got " + str(space) + " of type " + str(type(space))) rng = self.mlp.rng if self.border_mode == 'valid': output_shape = [ (self.input_space.shape[0] - self.kernel_shape[0]) / self.kernel_stride[0] + 1, (self.input_space.shape[1] - self.kernel_shape[1]) / self.kernel_stride[1] + 1 ] elif self.border_mode == 'full': output_shape = [ (self.input_space.shape[0] + self.kernel_shape[0]) / self.kernel_stride[0] - 1, (self.input_space.shape[1] + self.kernel_shape[1]) / self.kernel_stride[1] - 1 ] self.detector_space = Conv2DSpace(shape=output_shape, num_channels=self.output_channels, axes=('b', 'c', 0, 1)) if self.irange is not None: assert self.sparse_init is None self.transformer = conv2d.make_random_conv2D( irange=self.irange, input_space=self.input_space, output_space=self.detector_space, kernel_shape=self.kernel_shape, batch_size=self.mlp.batch_size, subsample=self.kernel_stride, border_mode=self.border_mode, rng=rng) elif self.sparse_init is not None: self.transformer = conv2d.make_sparse_random_conv2D( num_nonzero=self.sparse_init, input_space=self.input_space, output_space=self.detector_space, kernel_shape=self.kernel_shape, batch_size=self.mlp.batch_size, subsample=self.kernel_stride, border_mode=self.border_mode, rng=rng) W, = self.transformer.get_params() W.name = 'W' self.b = sharedX( np.zeros(((self.num_pieces * self.output_channels), )) + self.init_bias) self.b.name = 'b' print 'Input shape: ', self.input_space.shape print 'Detector space: ', self.detector_space.shape assert self.pool_type in ['max', 'mean'] dummy_batch_size = self.mlp.batch_size if dummy_batch_size is None: dummy_batch_size = 2 dummy_detector = sharedX( self.detector_space.get_origin_batch(dummy_batch_size)) #dummy_p = dummy_p.eval() self.output_space = Conv2DSpace(shape=[400, 1], num_channels=self.output_channels, axes=('b', 'c', 0, 1)) W = rng.uniform(-self.irange, self.irange, (426, (self.num_pieces * self.output_channels))) W = sharedX(W) W.name = self.layer_name + "_w" self.transformer = MatrixMul(W) print 'Output space: ', self.output_space.shape
def set_input_space(self, space): """ Note: this resets parameters! """ self.input_space = space assert self.gater.get_input_space() == space if isinstance(space, VectorSpace): self.requires_reformat = False self.input_dim = space.dim else: self.requires_reformat = True self.input_dim = space.get_total_dimension() self.desired_space = VectorSpace(self.input_dim) if not ((self.detector_layer_dim - self.pool_size) % self.pool_stride == 0): if self.pool_stride == self.pool_size: raise ValueError( "detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d" % (self.detector_layer_dim, self.pool_size, self.detector_layer_dim % self.pool_size)) raise ValueError() self.h_space = VectorSpace(self.detector_layer_dim) self.pool_layer_dim = (self.detector_layer_dim - self.pool_size) / self.pool_stride + 1 self.output_space = VectorSpace(self.pool_layer_dim) rng = self.mlp.rng if self.irange is not None: assert self.sparse_init is None W = rng.uniform(-self.irange, self.irange, (self.input_dim, self.detector_layer_dim)) * \ (rng.uniform(0.,1., (self.input_dim, self.detector_layer_dim)) < self.include_prob) else: assert self.sparse_init is not None W = np.zeros((self.input_dim, self.detector_layer_dim)) def mask_rejects(idx, i): if self.mask_weights is None: return False return self.mask_weights[idx, i] == 0. for i in xrange(self.detector_layer_dim): assert self.sparse_init <= self.input_dim for j in xrange(self.sparse_init): idx = rng.randint(0, self.input_dim) while W[idx, i] != 0 or mask_rejects(idx, i): idx = rng.randint(0, self.input_dim) W[idx, i] = rng.randn() W *= self.sparse_stdev W = sharedX(W) W.name = self.layer_name + '_W' self.transformer = MatrixMul(W) W, = self.transformer.get_params() assert W.name is not None if not hasattr(self, 'randomize_pools'): self.randomize_pools = False if self.randomize_pools: permute = np.zeros( (self.detector_layer_dim, self.detector_layer_dim)) for j in xrange(self.detector_layer_dim): i = rng.randint(self.detector_layer_dim) permute[i, j] = 1 self.permute = sharedX(permute) if self.mask_weights is not None: expected_shape = (self.input_dim, self.detector_layer_dim) if expected_shape != self.mask_weights.shape: raise ValueError("Expected mask with shape " + str(expected_shape) + " but got " + str(self.mask_weights.shape)) self.mask = sharedX(self.mask_weights)
def __init__(self, nvis = None, nhid = None, vis_space = None, hid_space = None, transformer = None, irange=0.5, rng=None, init_bias_vis = None, init_bias_vis_marginals = None, init_bias_hid=0.0, base_lr = 1e-3, anneal_start = None, nchains = 100, sml_gibbs_steps = 1, random_patches_src = None, monitor_reconstruction = False): """ Construct an RBM object. Parameters ---------- nvis : int Number of visible units in the model. (Specifying this implies that the model acts on a vector, i.e. it sets vis_space = pylearn2.space.VectorSpace(nvis) ) nhid : int Number of hidden units in the model. (Specifying this implies that the model acts on a vector) vis_space: A pylearn2.space.Space object describing what kind of vector space the RBM acts on. Don't specify if you used nvis / hid hid_space: A pylearn2.space.Space object describing what kind of vector space the RBM's hidden units live in. Don't specify if you used nvis / nhid init_bias_vis_marginals: either None, or a Dataset to use to initialize the visible biases to the inverse sigmoid of the data marginals irange : float, optional The size of the initial interval around 0 for weights. rng : RandomState object or seed NumPy RandomState object to use when initializing parameters of the model, or (integer) seed to use to create one. init_bias_vis : array_like, optional Initial value of the visible biases, broadcasted as necessary. init_bias_hid : array_like, optional initial value of the hidden biases, broadcasted as necessary. monitor_reconstruction : if True, will request a monitoring channel to monitor reconstruction error random_patches_src: Either None, or a Dataset from which to draw random patches in order to initialize the weights. Patches will be multiplied by irange Parameters for default SML learning rule: base_lr : the base learning rate anneal_start : number of steps after which to start annealing on a 1/t schedule nchains: number of negative chains sml_gibbs_steps: number of gibbs steps to take per update """ Model.__init__(self) Block.__init__(self) if init_bias_vis_marginals is not None: assert init_bias_vis is None X = init_bias_vis_marginals.X assert X.min() >= 0.0 assert X.max() <= 1.0 marginals = X.mean(axis=0) #rescale the marginals a bit to avoid NaNs init_bias_vis = inverse_sigmoid_numpy(.01 + .98 * marginals) if init_bias_vis is None: init_bias_vis = 0.0 if rng is None: # TODO: global rng configuration stuff. rng = numpy.random.RandomState(1001) self.rng = rng if vis_space is None: #if we don't specify things in terms of spaces and a transformer, #assume dense matrix multiplication and work off of nvis, nhid assert hid_space is None assert transformer is None or isinstance(transformer,MatrixMul) assert nvis is not None assert nhid is not None if transformer is None: if random_patches_src is None: W = rng.uniform(-irange, irange, (nvis, nhid)) else: if hasattr(random_patches_src, '__array__'): W = irange * random_patches_src.T assert W.shape == (nvis, nhid) else: #assert type(irange) == type(0.01) #assert irange == 0.01 W = irange * random_patches_src.get_batch_design(nhid).T self.transformer = MatrixMul( sharedX( W, name='W', borrow=True ) ) else: self.transformer = transformer self.vis_space = VectorSpace(nvis) self.hid_space = VectorSpace(nhid) else: assert hid_space is not None assert transformer is not None assert nvis is None assert nhid is None self.vis_space = vis_space self.hid_space = hid_space self.transformer = transformer try: b_vis = self.vis_space.get_origin() b_vis += init_bias_vis except ValueError: raise ValueError("bad shape or value for init_bias_vis") self.bias_vis = sharedX(b_vis, name='bias_vis', borrow=True) try: b_hid = self.hid_space.get_origin() b_hid += init_bias_hid except ValueError: raise ValueError('bad shape or value for init_bias_hid') self.bias_hid = sharedX(b_hid, name='bias_hid', borrow=True) self.random_patches_src = random_patches_src self.register_names_to_del(['random_patches_src']) self.__dict__.update(nhid=nhid, nvis=nvis) self._params = safe_union(self.transformer.get_params(), [self.bias_vis, self.bias_hid]) self.base_lr = base_lr self.anneal_start = anneal_start self.nchains = nchains self.sml_gibbs_steps = sml_gibbs_steps
nv = 3 nh = 4 vW = rng.randn(nv, nh) W = sharedX(vW) vbv = as_floatX(rng.randn(nv)) bv = T.as_tensor_variable(vbv) bv.tag.test_value = vbv vbh = as_floatX(rng.randn(nh)) bh = T.as_tensor_variable(vbh) bh.tag.test_value = bh vsigma = as_floatX(rng.uniform(0.1, 5)) sigma = T.as_tensor_variable(vsigma) sigma.tag.test_value = vsigma E = GRBM_Type_1(transformer=MatrixMul(W), bias_vis=bv, bias_hid=bh, sigma=sigma) V = T.matrix() V.tag.test_value = as_floatX(rng.rand(test_m, nv)) H = T.matrix() H.tag.test_value = as_floatX(rng.rand(test_m, nh)) E_func = function([V, H], E([V, H])) F_func = function([V], E.free_energy(V)) log_P_H_given_V_func = function([H, V], E.log_P_H_given_V(H, V)) score_func = function([V], E.score(V)) F_of_V = E.free_energy(V) dummy = T.sum(F_of_V) negscore = T.grad(dummy, V)
def test_matrixmul(): """ Tests matrix multiplication for a range of different dtypes. Checks both normal and transpose multiplication using randomly generated matrices. """ rng = np.random.RandomState(222) dtypes = ['int16', 'int32', 'int64', 'float64', 'float32'] tensor_x = [ tensor.wmatrix(), tensor.imatrix(), tensor.lmatrix(), tensor.dmatrix(), tensor.fmatrix() ] np_W, np_x, np_x_T = [], [], [] for dtype in dtypes: if 'int' in dtype: np_W.append( rng.randint(-10, 10, rng.random_integers(5, size=2)).astype(dtype)) np_x.append( rng.randint( -10, 10, (rng.random_integers(5), np_W[-1].shape[0])).astype(dtype)) np_x_T.append( rng.randint( -10, 10, (rng.random_integers(5), np_W[-1].shape[1])).astype(dtype)) elif 'float' in dtype: np_W.append( rng.uniform(-1, 1, rng.random_integers(5, size=2)).astype(dtype)) np_x.append( rng.uniform( -10, 10, (rng.random_integers(5), np_W[-1].shape[0])).astype(dtype)) np_x.append( rng.uniform( -10, 10, (rng.random_integers(5), np_W[-1].shape[1])).astype(dtype)) else: assert False def sharedW(value, dtype): return theano.shared(theano._asarray(value, dtype=dtype)) tensor_W = [sharedW(W, dtype) for W in np_W] matrixmul = [MatrixMul(W) for W in tensor_W] assert all(mm.get_params()[0] == W for mm, W in zip(matrixmul, tensor_W)) fn = [ theano.function([x], mm.lmul(x)) for x, mm in zip(tensor_x, matrixmul) ] fn_T = [ theano.function([x], mm.lmul_T(x)) for x, mm in zip(tensor_x, matrixmul) ] for W, x, x_T, f, f_T in zip(np_W, np_x, np_x_T, fn, fn_T): np.testing.assert_allclose(f(x), np.dot(x, W)) np.testing.assert_allclose(f_T(x_T), np.dot(x_T, W.T))