Beispiel #1
0
	def __init__(self, C = 1.0, 
				kernel = 'rbf',
				gamma = 1.0,
				cache_size = 1024,
				params=None,
				verbose = False):

		Model.__init__(self)

		self.C= C
		self.gamma = gamma
		self.cache_size = cache_size
		self.verbose = verbose
		self.kernel = kernel
#		self.input_space = VectorSpace()

		if params is None:
			# create dummies
			self.bias = 0
			self.alpha = numpy.zeros(1)
		else:
			self.bias = params[0]
			self.alpha = params[1]

		# this way we query predictions without saving anything 
		self.classifier = None
		print self
    def __init__(self, n_vis_units, n_hidden_units):
        Model.__init__(self)

        self._W = sharedX(np.random.uniform(size=(n_vis_units, n_hidden_units)), 'W')
        self._b = sharedX(np.zeros(n_hidden_units), 'b')
        self._b_reconstruction = sharedX(np.zeros(n_vis_units), 'b_reconstruction')
        self.input_space = VectorSpace(dim=n_vis_units)
Beispiel #3
0
    def __init__(self, k, nvis, convergence_th=1e-6, max_iter=None, verbose=False):
        """
        Parameters in conf:

        :type k: int
        :param k: number of clusters.

        :type convergence_th: float
        :param convergence_th: threshold of distance to clusters under which
        kmeans stops iterating.

        :type max_iter: int
        :param max_iter: maximum number of iterations. Defaults to infinity.
        """

        Block.__init__(self)
        Model.__init__(self)

        self.input_space = VectorSpace(nvis)

        self.k = k
        self.convergence_th = convergence_th
        if max_iter:
            if max_iter < 0:
                raise Exception('KMeans init: max_iter should be positive.')
            self.max_iter = max_iter
        else:
            self.max_iter = float('inf')

        self.verbose = verbose
    def __init__(self, rbms=None, max_updates=1e6, flags={}):
        Model.__init__(self)
        Block.__init__(self)
        self.jobman_channel = None
        self.jobman_state = {}
        self.validate_flags(flags)
        self.register_names_to_del(['jobman_channel'])

        # dump initialization parameters to object
        for (k,v) in locals().iteritems():
            if k!='self': setattr(self,k,v)

        # validate that RBMs have the same number of units.
        for (rbm1, rbm2) in zip(rbms[:-1], rbms[1:]):
            assert rbm1.n_h == rbm2.n_v
            assert rbm1.batch_size == rbm2.batch_size
            #assert rbm1.flags['enable_centering']
            #assert rbm2.flags['enable_centering']
        self.rbms = rbms
        self.depth = len(rbms)
        self.rng = self.rbms[0].rng

        # configure input-space (necessary evil)
        self.input_space = VectorSpace(self.rbms[0].n_v)
        self.output_space = VectorSpace(self.rbms[-1].n_h)

        self.batches_seen = 0  # incremented on every batch
        self.examples_seen = 0 # incremented on every training example
        self.batch_size = self.rbms[0].batch_size
        self.cpu_time = 0
        self.init_train_sequence()
        self.do_theano()
Beispiel #5
0
    def __init__(self,
                 k,
                 nvis,
                 convergence_th=1e-6,
                 max_iter=None,
                 verbose=False):
        """
        Parameters in conf:

        :type k: int
        :param k: number of clusters.

        :type convergence_th: float
        :param convergence_th: threshold of distance to clusters under which
        kmeans stops iterating.

        :type max_iter: int
        :param max_iter: maximum number of iterations. Defaults to infinity.
        """

        Block.__init__(self)
        Model.__init__(self)

        self.input_space = VectorSpace(nvis)

        self.k = k
        self.convergence_th = convergence_th
        if max_iter:
            if max_iter < 0:
                raise Exception('KMeans init: max_iter should be positive.')
            self.max_iter = max_iter
        else:
            self.max_iter = float('inf')

        self.verbose = verbose
    def __init__(self, n_vis_units, n_classes):
        Model.__init__(self)

        self._W = sharedX(np.random.uniform(size=(n_vis_units, n_classes)).astype(dtype=np.float32), 'W')
        self._b = sharedX(np.zeros(n_classes, dtype=np.float32), 'b')

        # base class overrides
        self.input_space = VectorSpace(dim=n_vis_units)
        self.output_space = VectorSpace(dim=n_classes)
Beispiel #7
0
    def __init__(self, n_vis, n_hid, sigma=0.4, W=None, h_bias=None, v_bias=None, numpy_rng=None,theano_rng=None):
        """ """
        Model.__init__(self) # self.names_to_del = set(); self._test_batch_size = 2
        Block.__init__(self) # self.fn = None; self.cpu_only = False

        self.n_vis = n_vis
        self.n_hid = n_hid
        self.sigma = sigma
        self.coeff = 1. / (self.sigma**2) #coefficient
        
        self.input_space = VectorSpace(dim=self.n_vis) # add input_space
        self.output_space = VectorSpace(dim=self.n_hid) # add output_space

        if numpy_rng is None:
            # create a number generator
            numpy_rng = numpy.random.RandomState(seed=19920130)
        self.numpy_rng = numpy_rng

        if theano_rng is None:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        self.theano_rng = theano_rng

        if W is None:
            init_W = numpy.asarray(numpy_rng.uniform(
                      low=-4 * numpy.sqrt(6. / (n_hid + n_vis)),
                      high=4 * numpy.sqrt(6. / (n_hid + n_vis)),
                      size=(n_vis, n_hid)),
                      dtype=theano.config.floatX)
            # theano shared variables for weights and biases
            W = theano.shared(value=init_W, name='W', borrow=True)
        else:
            assert isinstance(W, theano.tensor.sharedvar.TensorSharedVariable)
            assert W.get_value().ndim == 2
            
        if h_bias is None:
            # create shared variable for hidden units bias
            h_bias = theano.shared(value=numpy.zeros(n_hid, dtype=theano.config.floatX), name='h_bias', borrow=True)
        else:
            assert isinstance(h_bias, theano.tensor.sharedvar.TensorSharedVariable)
            assert h_bias.get_value().ndim == 1

        if v_bias is None:
            # create shared variable for visible units bias
            v_bias = theano.shared(value=numpy.zeros(n_vis, dtype=theano.config.floatX), name='v_bias', borrow=True)
        else:
            assert isinstance(W, theano.tensor.sharedvar.TensorSharedVariable)
            assert v_bias.get_value().ndim == 1

        self.W = W
        self.h_bias = h_bias
        self.v_bias = v_bias

        self._params = [self.W, self.h_bias, self.v_bias]
Beispiel #8
0
    def __init__(self, n_vis, n_hid, corruptor=None, W=None, b_enc=None, b_dec=None, numpy_rng=None, dec_f=True, extra_cost=None,theano_rng=None):
        """构造函数
        dec_f: 解码单元是否包含非线性函数
        extra_cost:除了基本的MSE Cost和CE Cost之外的代价函数其他惩罚项,例如稀疏惩罚,weight decay等等. 
                用于self.get_default_cost()方法中. 这样依赖需要在模型初始化之前加入希望添加的惩罚项即可.
        """
        Model.__init__(self) # self.names_to_del = set(); self._test_batch_size = 2
        Block.__init__(self) # self.fn = None; self.cpu_only = False

        self.n_vis = n_vis
        self.n_hid = n_hid
        self.extra_cost = extra_cost
        self.dec_f = dec_f
        
        if corruptor is not None:
            self.corruptor = corruptor
        
        self.input_space = VectorSpace(dim=self.n_vis) # add input_space
        self.output_space = VectorSpace(dim=self.n_hid) # add output_space

        if numpy_rng is None:
            # create a number generator
            numpy_rng = numpy.random.RandomState(seed=19900418)
        self.numpy_rng = numpy_rng

        if theano_rng is None:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        self.theano_rng = theano_rng

        if W is None:
            init_W = numpy.asarray(numpy_rng.uniform(
                      low= -4 * numpy.sqrt(6. / (n_hid + n_vis + 1.)),
                      high= 4 * numpy.sqrt(6. / (n_hid + n_vis + 1.)),
                      size=(n_vis, n_hid)),
                      dtype=theano.config.floatX)
            # theano shared variables for weights and biases
            W = theano.shared(value=init_W, name='W', borrow=True)

        if b_enc is None:
            # create shared variable for hidden units bias
            b_enc = theano.shared(value=numpy.zeros(n_hid, dtype=theano.config.floatX), name='b_enc', borrow=True)

        if b_dec is None:
            # create shared variable for visible units bias
            b_dec = theano.shared(value=numpy.zeros(n_vis, dtype=theano.config.floatX), name='b_dec', borrow=True)

        self.W = W
        self.b_enc = b_enc
        self.b_dec = b_dec

        self._params = [self.W, self.b_enc, self.b_dec]  
Beispiel #9
0
    def __init__(self, k, nvis, convergence_th=1e-6, max_iter=None, verbose=False):
        Block.__init__(self)
        Model.__init__(self)

        self.input_space = VectorSpace(nvis)

        self.k = k
        self.convergence_th = convergence_th
        if max_iter:
            if max_iter < 0:
                raise Exception("KMeans init: max_iter should be positive.")
            self.max_iter = max_iter
        else:
            self.max_iter = float("inf")

        self.verbose = verbose
Beispiel #10
0
    def __init__(self, k, nvis, convergence_th=1e-6, max_iter=None,
                 verbose=False):
        Block.__init__(self)
        Model.__init__(self)

        self.input_space = VectorSpace(nvis)

        self.k = k
        self.convergence_th = convergence_th
        if max_iter:
            if max_iter < 0:
                raise Exception('KMeans init: max_iter should be positive.')
            self.max_iter = max_iter
        else:
            self.max_iter = float('inf')

        self.verbose = verbose
Beispiel #11
0
    def __init__(self, n_vis, n_hid, W=None, h_bias=None, v_bias=None, numpy_rng=None,theano_rng=None):
        Model.__init__(self) # self.names_to_del = set(); self._test_batch_size = 2
        Block.__init__(self) # self.fn = None; self.cpu_only = False

        self.n_vis = n_vis
        self.n_hid = n_hid
        
        self.input_space = VectorSpace(dim=self.n_vis) # add input_space
        self.output_space = VectorSpace(dim=self.n_hid) # add output_space

        if numpy_rng is None:
            # create a number generator
            numpy_rng = numpy.random.RandomState(seed=19900418)
        self.numpy_rng = numpy_rng

        if theano_rng is None:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        self.theano_rng = theano_rng

        if W is None:
            init_W = numpy.asarray(numpy_rng.uniform(
                      low=-4 * numpy.sqrt(6. / (n_hid + n_vis)),
                      high=4 * numpy.sqrt(6. / (n_hid + n_vis)),
                      size=(n_vis, n_hid)),
                      dtype=theano.config.floatX)
            # theano shared variables for weights and biases
            W = theano.shared(value=init_W, name='W', borrow=True)

        if h_bias is None:
            # create shared variable for hidden units bias
            h_bias = theano.shared(value=numpy.zeros(n_hid, dtype=theano.config.floatX), name='h_bias', borrow=True)

        if v_bias is None:
            # create shared variable for visible units bias
            v_bias = theano.shared(value=numpy.zeros(n_vis, dtype=theano.config.floatX), name='v_bias', borrow=True)

        self.W = W
        self.h_bias = h_bias
        self.v_bias = v_bias

        self._params = [self.W, self.h_bias, self.v_bias]
Beispiel #12
0
    def __init__(self, nvis, nhid, irange=0.5, rng=None, init_bias_hid=0.0):
        """
        Construct an RBM object.

        Parameters
        ----------
        nvis : int
            Number of visible units in the model.
        nhid : int
            Number of hidden units in the model.
        irange : float, optional
            The size of the initial interval around 0 for weights.
        rng : RandomState object or seed
            NumPy RandomState object to use when initializing parameters
            of the model, or (integer) seed to use to create one.
        """

        Model.__init__(self)

        if rng is None:
            # TODO: global rng configuration stuff.
            rng = numpy.random.RandomState(1001)
        self.visbias = sharedX(
            numpy.zeros(nvis),
            name='vb',
            borrow=True
        )
        self.hidbias = sharedX(
            numpy.zeros(nhid) + init_bias_hid,
            name='hb',
            borrow=True
        )
        self.weights = sharedX(
            rng.uniform(-irange, irange, (nvis, nhid)),
            name='W',
            borrow=True
        )
        self.__dict__.update(nhid=nhid, nvis=nvis)
        self._params = [self.visbias, self.hidbias, self.weights]
Beispiel #13
0
    def __init__(self,
                 numpy_rng=None,
                 theano_rng=None,
                 n_h=99,
                 n_v=100,
                 init_from=None,
                 neg_sample_steps=1,
                 lr_spec=None,
                 lr_mults={},
                 iscales={},
                 clip_min={},
                 clip_max={},
                 l1={},
                 l2={},
                 sp_weight={},
                 sp_targ={},
                 batch_size=13,
                 compile=True,
                 debug=False,
                 seed=1241234,
                 my_save_path=None,
                 save_at=None,
                 save_every=None,
                 flags={},
                 max_updates=5e5):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr_spec is not None
        for k in ['h']:
            assert k in sp_weight.keys()
        for k in ['h']:
            assert k in sp_targ.keys()
        self.validate_flags(flags)

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k, v) in l1.iteritems():
            l1[k] = npy_floatX(v)
        for (k, v) in l2.iteritems():
            l2[k] = npy_floatX(v)
        for (k, v) in sp_weight.iteritems():
            sp_weight[k] = npy_floatX(v)
        for (k, v) in sp_targ.iteritems():
            sp_targ[k] = npy_floatX(v)
        for (k, v) in clip_min.iteritems():
            clip_min[k] = npy_floatX(v)
        for (k, v) in clip_max.iteritems():
            clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k, v) in locals().iteritems():
            if k != 'self': setattr(self, k, v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(
            seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(
            2**30)) if theano_rng is None else theano_rng

        ############### ALLOCATE PARAMETERS #################
        # allocate symbolic variable for input
        self.input = T.matrix('input')
        self.init_parameters()
        self.init_chains()

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr_spec['type'] == 'anneal':
            num = lr_spec['init'] * lr_spec['start']
            denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter)
            self.lr = T.maximum(lr_spec['floor'], num / denum)
        elif lr_spec['type'] == 'linear':
            lr_start = npy_floatX(lr_spec['start'])
            lr_end = npy_floatX(lr_spec['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(
                self.max_updates)
        else:
            raise ValueError('Incorrect value for lr_spec[type]')

        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0  # incremented on every batch
        self.examples_seen = 0  # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size

        self.error_record = []

        if compile: self.do_theano()

        if init_from:
            raise NotImplementedError()
Beispiel #14
0
    def __init__(self,
                 input=None,
                 n_u=[100, 100],
                 enable={},
                 load_from=None,
                 iscales=None,
                 clip_min={},
                 clip_max={},
                 pos_mf_steps=1,
                 pos_sample_steps=0,
                 neg_sample_steps=1,
                 lr_spec={},
                 lr_mults={},
                 l1={},
                 l2={},
                 l1_inf={},
                 flags={},
                 momentum_lambda=0,
                 cg_params={},
                 batch_size=13,
                 computational_bs=0,
                 compile=True,
                 seed=1241234,
                 sp_targ_h=None,
                 sp_weight_h=None,
                 sp_pos_k=5,
                 my_save_path=None,
                 save_at=None,
                 save_every=None,
                 max_updates=1e6):
        """
        :param n_u: list, containing number of units per layer. n_u[0] contains number
         of visible units, while n_u[i] (with i > 0) contains number of hid. units at layer i.
        :param enable: dictionary of flags with on/off behavior
        :param iscales: optional dictionary containing initialization scale for each parameter.
               Key of dictionary should match the name of the associated shared variable.
        :param pos_mf_steps: number of mean-field iterations to perform in positive phase
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param lr: base learning rate
        :param lr_timestamp: list containing update indices at which to change the lr multiplier
        :param lr_mults: dictionary, optionally containing a list of learning rate multipliers
               for parameters of the model. Length of this list should match length of
               lr_timestamp (the lr mult will transition whenever we reach the associated
               timestamp). Keys should match the name of the shared variable, whose learning
               rate is to be adjusted.
        :param l1: dictionary, whose keys are model parameter names, and values are
               hyper-parameters controlling degree of L1-regularization.
        :param l2: same as l1, but for L2 regularization.
        :param l1_inf: same as l1, but the L1 penalty is centered as -\infty instead of 0.
        :param cg_params: dictionary with keys ['rtol','damp','maxiter']
        :param batch_size: size of positive and negative phase minibatch
        :param computational_bs: batch size used internaly by natural
               gradient to reduce memory consumption
        :param seed: seed used to initialize numpy and theano RNGs.
        :param my_save_path: if None, do not save model. Otherwise, contains stem of filename
               to which we will save the model (everything but the extension).
        :param save_at: list containing iteration counts at which to save model
        :param save_every: scalar value. Save model every `save_every` iterations.
        """
        Model.__init__(self)
        Block.__init__(self)
        ### VALIDATE PARAMETERS AND SET DEFAULT VALUES ###
        assert lr_spec is not None
        for (k, v) in clip_min.iteritems():
            clip_min[k] = npy_floatX(v)
        for (k, v) in clip_max.iteritems():
            clip_max[k] = npy_floatX(v)
        [iscales.setdefault('bias%i' % i, 0.) for i in xrange(len(n_u))]
        [iscales.setdefault('W%i' % i, 0.1) for i in xrange(len(n_u))]
        flags.setdefault('enable_centering', False)
        flags.setdefault('enable_natural', False)
        flags.setdefault('enable_warm_start', False)
        flags.setdefault('mlbiases', False)
        flags.setdefault('precondition', None)
        flags.setdefault('minres', False)
        flags.setdefault('minresQLP', False)
        if flags['precondition'] == 'None': flags['precondition'] = None

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### DUMP INITIALIZATION PARAMETERS TO OBJECT ###
        for (k, v) in locals().iteritems():
            if k != 'self': setattr(self, k, v)

        assert len(n_u) > 1
        self.n_v = n_u[0]
        self.depth = len(n_u)

        # allocate random number generators
        self.rng = numpy.random.RandomState(seed)
        self.theano_rng = RandomStreams(self.rng.randint(2**30))

        # allocate bilinear-weight matrices
        self.input = T.matrix()
        self.init_parameters()
        self.init_dparameters()
        self.init_centering()
        self.init_samples()

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr_spec['type'] == 'anneal':
            num = lr_spec['init'] * lr_spec['start']
            denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter)
            self.lr = T.maximum(lr_spec['floor'], num / denum)
        elif lr_spec['type'] == 'linear':
            lr_start = npy_floatX(lr_spec['start'])
            lr_end = npy_floatX(lr_spec['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(
                self.max_updates)
        else:
            raise ValueError('Incorrect value for lr_spec[type]')

        # counter for CPU-time
        self.cpu_time = 0.

        if load_from:
            self.load_parameters(fname=load_from)

        # configure input-space (?new pylearn2 feature?)
        self.input_space = VectorSpace(n_u[0])
        self.output_space = VectorSpace(n_u[-1])
        self.batches_seen = 0  # incremented on every batch
        self.examples_seen = 0  # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size
        self.error_record = []

        if compile: self.do_theano()
Beispiel #15
0
    def __init__(self, model_type=None, alpha=0.2, 
                n_vis_img=None, n_vis_txt=None, n_hid_img=None, n_hid_txt=None, corruptor_img=None, corruptor_txt=None, 
                W_img=None, W_txt=None, b_enc_img=None, b_enc_txt=None, b_dec_img=None, b_dec_txt=None, dec_f_img=True, dec_f_txt=True, 
                img_AE=None, txt_AE=None, numpy_rng=None, theano_rng=None):
        """
        model_type: String, 选择模型类型,目的是为了控制get_default_cost()方法找到所希望的训练代价
                    可选参数: 'Combine', 'CrossModal', 'FullModal'
        param: alpha, 标准代价和关联代价的权重稀疏,alpha越大则标准代价在总的代价函数中的比重越大
        param: img_AE, 图像端用AE
        param: txt_AE, 文本端用AE
        """
        Model.__init__(self) # self.names_to_del = set(); self._test_batch_size = 2
        Block.__init__(self) # self.fn = None; self.cpu_only = False
        assert model_type in ['Combine', 'CrossModal', 'FullModal']
        self.model_type = model_type
        self.alpha = alpha
        
        if numpy_rng is None:
            # create a number generator
            numpy_rng = numpy.random.RandomState(seed=19900418)
        self.numpy_rng = numpy_rng

        if theano_rng is None:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        self.theano_rng = theano_rng
        
        #两个AE共用的部分只有随机数发生器
        if img_AE is None:
            assert n_vis_img is not None
            assert n_hid_img is not None
            
            img_AE = MyAutoEncoder(n_vis=n_vis_img, n_hid=n_hid_img, corruptor=corruptor_img, 
                    W=W_img, b_enc=b_enc_img, b_dec=b_dec_img, dec_f=dec_f_img, 
                    numpy_rng=self.numpy_rng, theano_rng=self.theano_rng)
            
        if txt_AE is None:
            assert n_vis_txt is not None
            assert n_hid_txt is not None
            txt_AE = MyAutoEncoder(n_vis=n_vis_txt, n_hid=n_hid_txt, corruptor=corruptor_txt, 
                    W=W_txt, b_enc=b_enc_txt, b_dec=b_dec_txt, dec_f=dec_f_txt, 
                    numpy_rng=self.numpy_rng, theano_rng=self.theano_rng)
        
        assert img_AE.n_hid == txt_AE.n_hid #目前的模型只能接受两端具有相同维度的编码空间
        
        self.img_AE = img_AE
        self.txt_AE = txt_AE
        
        self.W_img = img_AE.W #not used
        self.W_txt = txt_AE.W #not used
        self.b_enc_img = img_AE.b_enc #not used
        self.b_dec_img = img_AE.b_dec #not used
        self.b_enc_txt = txt_AE.b_enc #not used
        self.b_dec_txt = txt_AE.b_dec #not used
        
        self.n_vis_img = self.img_AE.n_vis
        self.n_vis_txt = self.txt_AE.n_vis
        self.n_hid_img = self.img_AE.n_hid
        self.n_hid_txt = self.txt_AE.n_hid
        self.n_vis = self.img_AE.n_vis + self.txt_AE.n_vis
        self.n_hid = self.img_AE.n_hid + self.txt_AE.n_hid
        
        self.input_space = VectorSpace(dim=self.n_vis) # add input_space
        self.output_space = VectorSpace(dim=self.n_hid) # add output_space

        #init_W = numpy.concatenate([self.img_AE.W, self.txt_AE_W], axis=1)
        #self.W = theano.shared(value=init_W, name='W', borrow=True)
        
        #参数顺序:图像权值矩阵, 图像编码偏置,图像解码偏置,文本权值矩阵,文本编码偏置,文本解码偏置
        self._params = [self.img_AE.W, self.img_AE.b_enc, self.img_AE.b_dec, self.txt_AE.W, self.txt_AE.b_enc, self.txt_AE.b_dec]  
Beispiel #16
0
    def __init__(self, numpy_rng = None, theano_rng = None,
            n_h=100, bw_s=1, n_v=100, init_from=None,
            neg_sample_steps=1,
            lr_spec=None, lr_timestamp=None, lr_mults = {},
            iscales={}, clip_min={}, clip_max={}, truncation_bound={},
            l1 = {}, l2 = {}, orth_lambda=0.,
            var_param_alpha='exp', var_param_lambd='linear',
            sp_type='kl', sp_weight={}, sp_targ={},
            batch_size = 13,
            compile=True,
            debug=False,
            seed=1241234,
            my_save_path=None, save_at=None, save_every=None,
            flags = {},
            max_updates = 5e5):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr_spec is not None
        for k in ['Wv', 'hbias']: assert k in iscales.keys()
        iscales.setdefault('mu', 1.)
        iscales.setdefault('alpha', 0.)
        iscales.setdefault('lambd', 0.)
        for k in ['h']: assert k in sp_weight.keys()
        for k in ['h']: assert k in sp_targ.keys()
        self.validate_flags(flags)

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v)
        for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v)
        for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v)
        for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v)
        for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v)
        for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k,v) in locals().iteritems():
            if k!='self': setattr(self,k,v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng

        # allocate symbolic variable for input
        self.input = T.matrix('input')
        self.init_parameters()
        self.init_chains()

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr_spec['type'] == 'anneal':
            num = lr_spec['init'] * lr_spec['start'] 
            denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter)
            self.lr = T.maximum(lr_spec['floor'], num/denum) 
        elif lr_spec['type'] == 'linear':
            lr_start = npy_floatX(lr_spec['start'])
            lr_end   = npy_floatX(lr_spec['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates)
        else:
            raise ValueError('Incorrect value for lr_spec[type]')

        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0                    # incremented on every batch
        self.examples_seen = 0                   # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size

        self.error_record = []
 
        if compile: self.do_theano()

        #### load layer 1 parameters from file ####
        if init_from:
            self.load_params(init_from)
Beispiel #17
0
    def __init__(self,
                 input=None,
                 Wv=None,
                 vbias=None,
                 hbias=None,
                 numpy_rng=None,
                 theano_rng=None,
                 n_h=100,
                 bw_s=1,
                 n_v=100,
                 init_from=None,
                 neg_sample_steps=1,
                 lr=None,
                 lr_timestamp=None,
                 lr_mults={},
                 iscales={},
                 clip_min={},
                 clip_max={},
                 vbound=5.,
                 l1={},
                 l2={},
                 orth_lambda=0.,
                 var_param_alpha='exp',
                 var_param_beta='linear',
                 sp_type='kl',
                 sp_weight={},
                 sp_targ={},
                 batch_size=13,
                 scalar_b=False,
                 compile=True,
                 debug=False,
                 seed=1241234,
                 my_save_path=None,
                 save_at=None,
                 save_every=None,
                 flags={},
                 max_updates=5e5):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr is not None
        for k in ['Wv', 'vbias', 'hbias']:
            assert k in iscales.keys()
        iscales.setdefault('mu', 1.)
        iscales.setdefault('alpha', 0.)
        iscales.setdefault('beta', 0.)
        for k in ['h']:
            assert k in sp_weight.keys()
        for k in ['h']:
            assert k in sp_targ.keys()

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k, v) in l1.iteritems():
            l1[k] = npy_floatX(v)
        for (k, v) in l2.iteritems():
            l2[k] = npy_floatX(v)
        for (k, v) in sp_weight.iteritems():
            sp_weight[k] = npy_floatX(v)
        for (k, v) in sp_targ.iteritems():
            sp_targ[k] = npy_floatX(v)
        for (k, v) in clip_min.iteritems():
            clip_min[k] = npy_floatX(v)
        for (k, v) in clip_max.iteritems():
            clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k, v) in locals().iteritems():
            if k != 'self': setattr(self, k, v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(
            seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(
            2**30)) if theano_rng is None else theano_rng

        ############### ALLOCATE PARAMETERS #################
        self.n_s = self.n_h * self.bw_s

        self.wv_norms = sharedX(1.0 * numpy.ones(self.n_s), name='wv_norms')
        if Wv is None:
            wv_val = self.rng.randn(n_v, self.n_s) * iscales['Wv']
            self.Wv = sharedX(wv_val, name='Wv')
        else:
            self.Wv = Wv

        self.Wh = numpy.zeros((self.n_s, self.n_h), dtype=floatX)
        for i in xrange(self.n_h):
            self.Wh[i * bw_s:(i + 1) * bw_s, i] = 1.

        # allocate shared variables for bias parameters
        if hbias is None:
            self.hbias = sharedX(iscales['hbias'] * numpy.ones(n_h),
                                 name='hbias')
        else:
            self.hbias = hbias

        # mean (mu) and precision (alpha) parameters on s
        self.mu = sharedX(iscales['mu'] * numpy.ones(self.n_s), name='mu')
        self.alpha = sharedX(iscales['alpha'] * numpy.ones(self.n_s),
                             name='alpha')
        var_param_func = {
            'exp': T.exp,
            'softplus': T.nnet.softplus,
            'linear': lambda x: x
        }
        self.alpha_prec = var_param_func[self.var_param_alpha](self.alpha)

        # diagonal of precision matrix of visible units
        self.vbound = sharedX(vbound, name='vbound')
        self.beta = sharedX(iscales['beta'] * numpy.ones(n_v), name='beta')
        self.beta_prec = var_param_func[self.var_param_beta](self.beta)

        # allocate shared variable for persistent chain
        self.neg_v = sharedX(self.rng.rand(batch_size, n_v), name='neg_v')
        self.neg_ev = sharedX(self.rng.rand(batch_size, n_v), name='neg_ev')
        self.neg_s = sharedX(self.rng.rand(batch_size, self.n_s), name='neg_s')
        self.neg_h = sharedX(self.rng.rand(batch_size, n_h), name='neg_h')

        # moving average values for sparsity
        self.sp_pos_v = sharedX(self.rng.rand(1, self.n_v), name='sp_pos_v')
        self.sp_pos_h = sharedX(self.rng.rand(1, self.n_h), name='sp_pog_h')

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr['type'] == 'anneal':
            num = lr['init'] * lr['start']
            denum = T.maximum(lr['start'], lr['slope'] * self.iter)
            self.lr = T.maximum(lr['floor'], num / denum)
        elif lr['type'] == 'linear':
            lr_start = npy_floatX(lr['start'])
            lr_end = npy_floatX(lr['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(
                self.max_updates)
        else:
            raise ValueError('Incorrect value for lr[type]')

        # learning rate - implemented as shared parameter for GPU
        self.lr_mults_it = {}
        self.lr_mults_shrd = {}
        for (k, v) in lr_mults.iteritems():
            # make sure all learning rate multipliers are float64
            self.lr_mults_it[k] = tools.HyperParamIterator(
                lr_timestamp, lr_mults[k])
            self.lr_mults_shrd[k] = sharedX(self.lr_mults_it[k].value,
                                            name='lr_mults_shrd' + k)

        # allocate symbolic variable for input
        self.input = T.matrix('input') if input is None else input

        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0  # incremented on every batch
        self.examples_seen = 0  # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size

        self.error_record = []

        if compile: self.do_theano()

        #### load layer 1 parameters from file ####
        if init_from:
            self.load_params(init_from)
Beispiel #18
0
    def __init__(self, input_space, output_channels, pool_shape, batch_size=None, detector_axes=('b', 'c', 0, 1), 
                    kernel_shape=(2,2), kernel_stride=(1, 1), border_mode='valid', 
                    transformer=None, h_bias=None, v_bias=None, numpy_rng=None,theano_rng=None):
        """
        vis_space: Conv2DSpace
        transformer: pylearn2.linear.Conv2D instance
        h_bias: vector, 大小等于输出的feature maps数,每个分量对应一个feature map
        v_bias: vector, 大小等于输入的feature maps数,每个分量对应一个feature map
        pool_shape:
        pool_stride: 根据Honglak Lee的原文,pool区域无交叠,于是要求pool_stride=pool_shape,因此暂时不单独设置pool_stride参数
        需要注意,对于卷积RBM,其隐层对应于卷积后的detector_layer,而输出则对应与pool_layer,因此相对于普通RBM只有输入和输出两个space,CRBM有三个space
        """
        Model.__init__(self) # self.names_to_del = set(); self._test_batch_size = 2
        Block.__init__(self) # self.fn = None; self.cpu_only = False
        
        self.kernel_shape = kernel_shape
        self.kernel_stride = kernel_stride
        self.pool_shape = pool_shape
        self.pool_stride = pool_shape
        self.border_mode = border_mode
        
        self.batch_size = batch_size
        self.force_batch_size = batch_size
        
        input_shape = input_space.shape
        input_channels = input_space.num_channels
        if self.border_mode == 'valid':
            detector_shape = [(input_shape[0] - kernel_shape[0])/int(kernel_stride[0]) + 1, (input_shape[1] - kernel_shape[1])/kernel_stride[1] + 1]
        elif self.border_mode == 'full':
            detector_shape = [(input_shape[0] + kernel_shape[0])/int(kernel_stride[0]) - 1, (input_shape[1] + kernel_shape[1])/kernel_stride[1] - 1]
        
        assert isinstance(input_space, Conv2DSpace)
        self.input_space = input_space # add input_space
        self.detector_space = Conv2DSpace(shape=detector_shape, num_channels=output_channels, axes=detector_axes) # add detector_space
        
        #当前只考虑detector layer的feature map可以被pool_shape无交叠完整分割的情况
        #今后需要补充:边缘补齐的情况
        output_shape = (detector_shape[0] / pool_shape[0], detector_shape[1] / pool_shape[1])
        self.output_space = Conv2DSpace(shape=output_shape, num_channels=output_channels, axes=detector_axes) # add output_space

        self.n_vis = numpy.prod(input_space.shape) * input_space.num_channels
        self.n_hid = detector_shape[0] * detector_shape[1] * output_channels

        if numpy_rng is None:
            # create a number generator
            numpy_rng = numpy.random.RandomState(seed=19900418)
        self.numpy_rng = numpy_rng

        if theano_rng is None:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        self.theano_rng = theano_rng

        if transformer is None:
            irange = 4 * numpy.sqrt(6. / (self.n_hid + self.n_vis))
            transformer = make_random_conv2D(irange=irange, input_space=self.input_space, output_space=self.detector_space, 
                                             kernel_shape = self.kernel_shape, batch_size = self.batch_size, 
                                             subsample = kernel_stride,border_mode = self.border_mode, rng=self.numpy_rng)
        else:
            assert isinstance(transformer, Conv2D)

        if h_bias is None:
            # create shared variable for hidden units bias
            h_bias = theano.shared(value=numpy.zeros(self.detector_space.num_channels, dtype=theano.config.floatX), name='h_bias', borrow=True)

        if v_bias is None:
            # create shared variable for visible units bias
            v_bias = theano.shared(value=numpy.zeros(self.input_space.num_channels, dtype=theano.config.floatX), name='v_bias', borrow=True)

        self.transformer = transformer
        self.h_bias = h_bias
        self.v_bias = v_bias

        self._params = safe_union(self.transformer.get_params(), [self.h_bias, self.v_bias])
Beispiel #19
0
    def __init__(self, 
            input=None, Wv=None, vbias=None, hbias=None,
            numpy_rng = None, theano_rng = None,
            n_h=100, bw_s=1, n_v=100, init_from=None,
            neg_sample_steps=1,
            lr=None, lr_timestamp=None, lr_mults = {},
            iscales={}, clip_min={}, clip_max={}, l1 = {}, l2 = {},
            sp_type='kl', sp_weight={}, sp_targ={},
            batch_size = 13,
            compile=True,
            debug=False,
            seed=1241234,
            my_save_path=None, save_at=None, save_every=None,
            flags = {},
            max_updates = 5e5):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr is not None
        for k in ['Wv', 'vbias', 'hbias']: assert k in iscales.keys()
        iscales.setdefault('mu', 1.)
        iscales.setdefault('alpha', 0.)
        for k in ['h']: assert k in sp_weight.keys()
        for k in ['h']: assert k in sp_targ.keys()

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v)
        for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v)
        for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v)
        for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v)
        for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v)
        for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k,v) in locals().iteritems():
            if k!='self': setattr(self,k,v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng

        ############### ALLOCATE PARAMETERS #################
        self.n_s = self.n_h * self.bw_s

        if Wv is None:
            wv_val =  self.rng.randn(n_v, self.n_s) * iscales['Wv']
            self.Wv = sharedX(wv_val, name='Wv')
        else:
            self.Wv = Wv

        self.Wh = numpy.zeros((self.n_s, self.n_h), dtype=floatX)
        for i in xrange(self.n_h):
            self.Wh[i*bw_s:(i+1)*bw_s, i] = 1.

        # allocate shared variables for bias parameters
        if vbias is None:
            self.vbias = sharedX(iscales['vbias'] * numpy.ones(n_v), name='vbias') 
        else:
            self.vbias = vbias

        # allocate shared variables for bias parameters
        if hbias is None:
            self.hbias = sharedX(iscales['hbias'] * numpy.ones(n_h), name='hbias') 
        else:
            self.hbias = hbias

        # mean (mu) and precision (alpha) parameters on s
        self.mu = sharedX(iscales['mu'] * numpy.ones(self.n_s), name='mu')
        self.alpha = sharedX(iscales['alpha'] * numpy.ones(self.n_s), name='alpha')
        self.alpha_prec = T.exp(self.alpha)

        #### load layer 1 parameters from file ####
        if init_from:
            self.load_params(init_from)

        # allocate shared variable for persistent chain
        self.neg_v  = sharedX(self.rng.rand(batch_size, n_v), name='neg_v')
        self.neg_ev = sharedX(self.rng.rand(batch_size, n_v), name='neg_ev')
        self.neg_s  = sharedX(self.rng.rand(batch_size, self.n_s), name='neg_s')
        self.neg_h  = sharedX(self.rng.rand(batch_size, n_h), name='neg_h')
       
        # moving average values for sparsity
        self.sp_pos_v = sharedX(self.rng.rand(1,self.n_v), name='sp_pos_v')
        self.sp_pos_h = sharedX(self.rng.rand(1,self.n_h), name='sp_pog_h')

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr['type'] == 'anneal':
            num = lr['init'] * lr['start'] 
            denum = T.maximum(lr['start'], lr['slope'] * self.iter)
            self.lr = T.maximum(lr['floor'], num/denum) 
        elif lr['type'] == 'linear':
            lr_start = npy_floatX(lr['start'])
            lr_end   = npy_floatX(lr['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates)
        else:
            raise ValueError('Incorrect value for lr[type]')

        # learning rate - implemented as shared parameter for GPU
        self.lr_mults_it = {}
        self.lr_mults_shrd = {}
        for (k,v) in lr_mults.iteritems():
            # make sure all learning rate multipliers are float64
            self.lr_mults_it[k] = tools.HyperParamIterator(lr_timestamp, lr_mults[k])
            self.lr_mults_shrd[k] = sharedX(self.lr_mults_it[k].value, 
                                            name='lr_mults_shrd'+k)

        # allocate symbolic variable for input
        self.input = T.matrix('input') if input is None else input
        
        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0                    # incremented on every batch
        self.examples_seen = 0                   # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size

        self.error_record = []
        
        if compile: self.do_theano()
Beispiel #20
0
    def __init__(self, 
            numpy_rng = None, theano_rng = None,
            n_h=99, n_v=100, init_from=None,
            min_beta=0.9, num_beta=20, gamma=10, cratio=1, cdelay=0,
            neg_sample_steps=1,
            lr_spec=None, lr_mults = {},
            iscales={}, clip_min={}, clip_max={},
            l1 = {}, l2 = {},
            sp_weight={}, sp_targ={},
            batch_size = 13,
            compile=True, debug=False, seed=1241234,
            flags = {},
            max_updates = 5e5, **kwargs):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr_spec is not None
        for k in ['h']: assert k in sp_weight.keys()
        for k in ['h']: assert k in sp_targ.keys()
        self.validate_flags(flags)

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v)
        for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v)
        for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v)
        for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v)
        for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v)
        for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k,v) in locals().iteritems():
            if k!='self': setattr(self,k,v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng

        ############### ALLOCATE PARAMETERS #################
        # allocate symbolic variable for input
        self.input = T.matrix('input')
        self.init_parameters()
        self.init_chains()

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr_spec['type'] == 'anneal':
            num = lr_spec['init'] * lr_spec['start'] 
            denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter)
            self.lr = T.maximum(lr_spec['floor'], num/denum) 
        elif lr_spec['type'] == '1_t':
            self.lr = npy_floatX(lr_spec['num']) / (self.iter + npy_floatX(lr_spec['denum']))
        elif lr_spec['type'] == 'linear':
            lr_start = npy_floatX(lr_spec['start'])
            lr_end   = npy_floatX(lr_spec['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates)
        elif lr_spec['type'] == 'constant':
            self.lr = sharedX(lr_spec['value'], name='lr')
        else:
            raise ValueError('Incorrect value for lr_spec[type]')

        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0               # incremented on every batch
        self.examples_seen = 0              # incremented on every training example
        self.logz = sharedX(0.0, name='logz')
        self.cpu_time = 0

        self.error_record = []
 
        if compile: self.do_theano()

        if init_from:
            raise NotImplementedError()
Beispiel #21
0
    def __init__(self, input = None, n_u=[100,100], enable={}, load_from=None,
            iscales=None, clip_min={}, clip_max={},
            pos_mf_steps=1, pos_sample_steps=0, neg_sample_steps=1, 
            lr_spec={}, lr_mults = {},
            l1 = {}, l2 = {}, l1_inf={}, flags={}, momentum_lambda=0,
            cg_params = {},
            batch_size = 13,
            computational_bs = 0,
            compile=True,
            seed=1241234,
            sp_targ_h = None, sp_weight_h=None, sp_pos_k = 5,
            my_save_path=None, save_at=None, save_every=None,
            max_updates=1e6):
        """
        :param n_u: list, containing number of units per layer. n_u[0] contains number
         of visible units, while n_u[i] (with i > 0) contains number of hid. units at layer i.
        :param enable: dictionary of flags with on/off behavior
        :param iscales: optional dictionary containing initialization scale for each parameter.
               Key of dictionary should match the name of the associated shared variable.
        :param pos_mf_steps: number of mean-field iterations to perform in positive phase
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param lr: base learning rate
        :param lr_timestamp: list containing update indices at which to change the lr multiplier
        :param lr_mults: dictionary, optionally containing a list of learning rate multipliers
               for parameters of the model. Length of this list should match length of
               lr_timestamp (the lr mult will transition whenever we reach the associated
               timestamp). Keys should match the name of the shared variable, whose learning
               rate is to be adjusted.
        :param l1: dictionary, whose keys are model parameter names, and values are
               hyper-parameters controlling degree of L1-regularization.
        :param l2: same as l1, but for L2 regularization.
        :param l1_inf: same as l1, but the L1 penalty is centered as -\infty instead of 0.
        :param cg_params: dictionary with keys ['rtol','damp','maxiter']
        :param batch_size: size of positive and negative phase minibatch
        :param computational_bs: batch size used internaly by natural
               gradient to reduce memory consumption
        :param seed: seed used to initialize numpy and theano RNGs.
        :param my_save_path: if None, do not save model. Otherwise, contains stem of filename
               to which we will save the model (everything but the extension).
        :param save_at: list containing iteration counts at which to save model
        :param save_every: scalar value. Save model every `save_every` iterations.
        """
        Model.__init__(self)
        Block.__init__(self)
        ### VALIDATE PARAMETERS AND SET DEFAULT VALUES ###
        assert lr_spec is not None
        for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v)
        for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v)
        [iscales.setdefault('bias%i' % i, 0.) for i in xrange(len(n_u))]
        [iscales.setdefault('W%i' % i, 0.1) for i in xrange(len(n_u))]
        flags.setdefault('enable_centering', False)
        flags.setdefault('enable_natural', False)
        flags.setdefault('enable_warm_start', False)
        flags.setdefault('mlbiases', False)
        flags.setdefault('precondition', None)
        flags.setdefault('minres', False)
        flags.setdefault('minresQLP', False)
        if flags['precondition'] == 'None': flags['precondition'] = None
       
        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### DUMP INITIALIZATION PARAMETERS TO OBJECT ###
        for (k,v) in locals().iteritems():
            if k!='self': setattr(self,k,v)

        assert len(n_u) > 1
        self.n_v = n_u[0]
        self.depth = len(n_u)

        # allocate random number generators
        self.rng = numpy.random.RandomState(seed)
        self.theano_rng = RandomStreams(self.rng.randint(2**30))

        # allocate bilinear-weight matrices
        self.input = T.matrix()
        self.init_parameters()
        self.init_dparameters()
        self.init_centering()
        self.init_samples()

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr_spec['type'] == 'anneal':
            num = lr_spec['init'] * lr_spec['start'] 
            denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter)
            self.lr = T.maximum(lr_spec['floor'], num/denum) 
        elif lr_spec['type'] == 'linear':
            lr_start = npy_floatX(lr_spec['start'])
            lr_end   = npy_floatX(lr_spec['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates)
        else:
            raise ValueError('Incorrect value for lr_spec[type]')

        # counter for CPU-time
        self.cpu_time = 0.

        if load_from:
            self.load_parameters(fname=load_from)

        # configure input-space (?new pylearn2 feature?)
        self.input_space = VectorSpace(n_u[0])
        self.output_space = VectorSpace(n_u[-1])
        self.batches_seen = 0                    # incremented on every batch
        self.examples_seen = 0                   # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size
        self.error_record = []
 
        if compile: self.do_theano()