def init_parameters(self):

        # marginal precision on visible units 
        self.lambd = sharedX(self.iscales['lambd'] * numpy.ones(self.n_v), name='lambd')

        # init scalar norm for each entry of Wv
        sn_val = self.iscales['scalar_norms'] * numpy.ones(self.n_f)
        self.scalar_norms = sharedX(sn_val, name='scalar_norms')

        # init weight matrices
        self.Wv = self.init_weight(1.0, (self.n_v, self.n_f), 'Wv')
        if self.sparse_gmask or self.sparse_hmask:
            assert self.sparse_gmask and self.sparse_hmask
            self.Wg = sharedX(self.sparse_gmask.mask * self.iscales.get('Wg', 1.0), name='Wg')
            self.Wh = sharedX(self.sparse_hmask.mask * self.iscales.get('Wh', 1.0), name='Wh')
        else:
            self.Wg = self.init_weight(1.0, (self.n_g, self.n_f), 'Wg')
            self.Wh = self.init_weight(1.0, (self.n_h, self.n_f), 'Wh')

        # bias parameters of g, h
        self.gbias = sharedX(self.iscales['gbias'] * numpy.ones(self.n_g), name='gbias')
        self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h), name='hbias')
        # mean (mu) and precision (alpha) parameters on s
        self.mu = sharedX(self.iscales['mu']  * numpy.ones(self.n_g), name='mu')
        self.alpha = sharedX(self.iscales['alpha'] * numpy.ones(self.n_g), name='alpha')
        # mean (eta) and precision (beta) parameters on t
        self.eta = sharedX(self.iscales['eta'] * numpy.ones(self.n_h), name='eta')
        self.beta  = sharedX(self.iscales['beta'] * numpy.ones(self.n_h), name='beta')

        # optional reparametrization of precision parameters
        self.lambd_prec = T.nnet.softplus(self.lambd)
        self.alpha_prec = T.nnet.softplus(self.alpha)
        self.beta_prec = T.nnet.softplus(self.beta)
 def init_chains(self):
     """ Allocate shared variable for persistent chain """
     # initialize buffers to store inference state
     self.pos_g  = sharedX(numpy.zeros((self.batch_size, self.n_g)), name='pos_g')
     self.pos_h  = sharedX(numpy.zeros((self.batch_size, self.n_h)), name='pos_h')
     self.pos_s1 = sharedX(numpy.zeros((self.batch_size, self.n_s)), name='pos_s1')
     self.pos_s0 = sharedX(numpy.zeros((self.batch_size, self.n_s)), name='pos_s0')
     # initialize visible unit chains
     scale = numpy.sqrt(1./softplus(self.lambd.get_value()))
     neg_v  = self.rng.normal(loc=0, scale=scale, size=(self.batch_size, self.n_v))
     self.neg_v  = sharedX(neg_v, name='neg_v')
     # initialize s-chain
     loc = self.mu.get_value()
     scale = numpy.sqrt(1./softplus(self.alpha.get_value()))
     neg_s  = self.rng.normal(loc=loc, scale=scale, size=(self.batch_size, self.n_s))
     self.neg_s  = sharedX(neg_s, name='neg_s')
     # initialize binary g-h chains
     pval_g = sigm(self.gbias.get_value())
     pval_h = sigm(self.hbias.get_value())
     neg_g = self.rng.binomial(n=1, p=pval_g, size=(self.batch_size, self.n_g))
     neg_h = self.rng.binomial(n=1, p=pval_h, size=(self.batch_size, self.n_h))
     self.neg_h  = sharedX(neg_h, name='neg_h')
     self.neg_g  = sharedX(neg_g, name='neg_g')
     # other misc.
     self.pos_counter  = sharedX(0., name='pos_counter')
     self.odd_even = sharedX(0., name='odd_even')
Example #3
0
    def init_parameters(self):
        # init scalar norm for each entry of Wv
        sn_val = self.iscales['scalar_norms'] * numpy.ones(self.n_s)
        self.scalar_norms = sharedX(sn_val, name='scalar_norms')

        # init weight matrices
        normalize_wv = self.flags['wv_norm'] == 'unit' 
        self.Wv = self.init_weight(self.iscales['Wv'], (self.n_v, self.n_s), 'Wv', normalize=normalize_wv)
        if self.sparse_gmask or self.sparse_hmask:
            assert self.sparse_gmask and self.sparse_hmask
            self.Wg = sharedX(self.sparse_gmask.mask * self.iscales.get('Wg', 1.0), name='Wg')
            self.Wh = sharedX(self.sparse_hmask.mask * self.iscales.get('Wh', 1.0), name='Wh')
        else:
            normalize_wg = self.flags['wg_norm'] == 'unit'
            normalize_wh = self.flags['wh_norm'] == 'unit'
            self.Wg = self.init_weight(self.iscales['Wg'], (self.n_g, self.n_s), 'Wg', normalize=normalize_wg)
            self.Wh = self.init_weight(self.iscales['Wh'], (self.n_h, self.n_s), 'Wh', normalize=normalize_wh)

        # avg norm (for wgh_norm='roland')
        norm_wg = numpy.sqrt(numpy.sum(self.Wg.get_value()**2, axis=0)).mean()
        norm_wh = numpy.sqrt(numpy.sum(self.Wh.get_value()**2, axis=0)).mean()
        self.avg_norm_wg = sharedX(norm_wg, name='avg_norm_wg')
        self.avg_norm_wh = sharedX(norm_wh, name='avg_norm_wh')

        # allocate shared variables for bias parameters
        self.gbias = sharedX(self.iscales['gbias'] * numpy.ones(self.n_g), name='gbias')
        self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h), name='hbias')
        self.vbias = sharedX(self.iscales['vbias'] * numpy.ones(self.n_v), name='vbias')

        # mean (mu) and precision (alpha) parameters on s
        self.mu = sharedX(self.iscales['mu'] * numpy.ones(self.n_s), name='mu')
        self.alpha = sharedX(self.iscales['alpha'] * numpy.ones(self.n_s), name='alpha')
        self.alpha_prec = T.nnet.softplus(self.alpha)
Example #4
0
File: rbm.py Project: jaberg/ssrbm
    def __init__(self, conf, numpy_rng, W, Lambda):
        """
        :param W: a LinearTransform instance for the weights.

        :param Lambda: a LinearTransform instance, parametrizing the h-dependent
        precision information regarding visibles.
        """
        self.conf = conf
        self.W = W
        self.Lambda = Lambda
        if Lambda:
            if W.col_shape() != Lambda.col_shape():
                raise ValueError('col_shape mismatch',
                        (W.col_shape(), Lambda.col_shape()))
            if W.row_shape() != Lambda.row_shape():
                raise ValueError('row_shape mismatch',
                        (W.row_shape(), Lambda.row_shape()))

        # Energy term has vW(sh), so...
        h_shp = self.h_shp = W.col_shape()
        s_shp = self.s_shp = W.col_shape()
        v_shp = self.v_shp = W.row_shape()
        logger.info("RBM Shapes h_shp=%s, s_shp=%s, v_shp=%s" %(h_shp, s_shp, v_shp))

        # alpha (precision on slab variables)
        alpha_init = numpy.zeros(s_shp)+conf['alpha0']
        if conf['alpha_irange']:
            alpha_init += (2 * numpy_rng.rand(*s_shp) - 1)*conf['alpha_irange']

        if conf['alpha_logdomain']:
            self.alpha = sharedX(numpy.log(alpha_init), name='alpha')
        else:
            self.alpha = sharedX(alpha_init, name='alpha')

        # mu (mean of slab vars)

        self.mu = sharedX(
                conf['mu0'] + numpy_rng.uniform(size=s_shp,
                    low=-conf['mu_irange'],
                    high=conf['mu_irange']),
                name='mu')

        # b (bias of spike vars)
        self.b = sharedX(
                conf['b0'] + numpy_rng.uniform(size=h_shp,
                    low=-conf['b_irange'],
                    high=conf['b_irange']),
                name='b')

        # B (precision on visible vars)
        if conf['B_full_diag']:
            B_init = numpy.zeros(v_shp) + conf['B0']
        else:
            B_init = numpy.zeros(()) + conf['B0']
        if conf['B_logdomain']:
            B_init = numpy.log(B_init)
        self.B = sharedX(B_init, name='B')

        self._params = [self.mu, self.B, self.b, self.alpha]
Example #5
0
 def __init__(self, name, hidden_dim, input_dim, init_std):
     super(RNN, self).__init__(name, trainable=True)
     self.hidden_dim = hidden_dim
     self.Wx = sharedX(np.random.randn(input_dim, hidden_dim) * init_std,
                       name=name + '/Wx')
     self.Wh = sharedX(np.random.randn(hidden_dim, hidden_dim) * init_std,
                       name=name + '/Wh')
     self.b = sharedX(np.zeros((hidden_dim)), name=name + '/b')
Example #6
0
 def init_parameters(self):
     # init weight matrices
     self.Wv = self.init_weight(self.iscales.get('Wv', 1.0), (self.n_v, self.n_h), 'Wv', normalize=False)
     # allocate shared variables for bias parameters
     self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h), name='hbias')
     # diagonal of precision matrix of visible units
     self.lambd = sharedX(self.iscales['lambd'] * numpy.ones(self.n_v), name='lambd')
     self.lambd_prec = T.nnet.softplus(self.lambd)
Example #7
0
    def __init__(self, conf, numpy_rng, W, Lambda):
        """
        :param W: a LinearTransform instance for the weights.

        :param Lambda: a LinearTransform instance, parametrizing the h-dependent
        precision information regarding visibles.
        """
        self.conf = conf
        self.W = W
        self.Lambda = Lambda
        if Lambda:
            if W.col_shape() != Lambda.col_shape():
                raise ValueError('col_shape mismatch',
                                 (W.col_shape(), Lambda.col_shape()))
            if W.row_shape() != Lambda.row_shape():
                raise ValueError('row_shape mismatch',
                                 (W.row_shape(), Lambda.row_shape()))

        # Energy term has vW(sh), so...
        h_shp = self.h_shp = W.col_shape()
        s_shp = self.s_shp = W.col_shape()
        v_shp = self.v_shp = W.row_shape()
        logger.info("RBM Shapes h_shp=%s, s_shp=%s, v_shp=%s" %
                    (h_shp, s_shp, v_shp))

        # alpha (precision on slab variables)
        alpha_init = numpy.zeros(s_shp) + conf['alpha0']
        if conf['alpha_irange']:
            alpha_init += (2 * numpy_rng.rand(*s_shp) -
                           1) * conf['alpha_irange']

        if conf['alpha_logdomain']:
            self.alpha = sharedX(numpy.log(alpha_init), name='alpha')
        else:
            self.alpha = sharedX(alpha_init, name='alpha')

        # mu (mean of slab vars)

        self.mu = sharedX(conf['mu0'] + numpy_rng.uniform(
            size=s_shp, low=-conf['mu_irange'], high=conf['mu_irange']),
                          name='mu')

        # b (bias of spike vars)
        self.b = sharedX(conf['b0'] + numpy_rng.uniform(
            size=h_shp, low=-conf['b_irange'], high=conf['b_irange']),
                         name='b')

        # B (precision on visible vars)
        if conf['B_full_diag']:
            B_init = numpy.zeros(v_shp) + conf['B0']
        else:
            B_init = numpy.zeros(()) + conf['B0']
        if conf['B_logdomain']:
            B_init = numpy.log(B_init)
        self.B = sharedX(B_init, name='B')

        self._params = [self.mu, self.B, self.b, self.alpha]
Example #8
0
 def init_parameters(self):
     # init weight matrices
     self.Wv = self.init_weight(self.iscales.get('Wv', 1.0), (self.n_v, self.n_h), 'Wv')
     # allocate shared variables for bias parameters
     self.vbias = sharedX(self.iscales['vbias'] * numpy.ones(self.n_v), name='vbias')
     self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h), name='hbias')
     self.cv = sharedX(numpy.zeros(self.n_v), name='cv')
     ch = numpy.ones(self.n_h) * (0.5 if self.flags['enable_centering'] else 0.)
     self.ch = sharedX(ch, name='ch')
Example #9
0
 def init_parameters(self):
     # init weight matrices
     self.Wv = self.init_weight(self.iscales.get('Wv', 1.0),
                                (self.n_v, self.n_h), 'Wv')
     # allocate shared variables for bias parameters
     self.vbias = sharedX(self.iscales['vbias'] * numpy.ones(self.n_v),
                          name='vbias')
     self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h),
                          name='hbias')
 def __init__(self):
     rng = numpy.random.RandomState(123)
     self.Wv = sharedX(0.1 * rng.randn(14*14, 10), name='Wv' )
     self.hbias = sharedX(-1 * numpy.ones(10), name='hbias')
     self.alpha = sharedX(0.1 * rng.rand(10), name='alpha')
     self.mu = sharedX(0.1 * numpy.ones(10), name='mu')
     self.lambd = sharedX(1.0 * numpy.ones(10), name='lambd')
     self.bw_s = 1
     self.n_h = 10
     self.input = T.matrix('input')
Example #11
0
def init_params(options):
    params = OrderedDict()
    params['W_users'] = sharedX(normal((options['n_users'],options['n_factors'])),
                                name='W_users')
    params['W_items'] = sharedX(normal((options['n_items'],options['n_factors'])),
                                name='W_items')
    params['b_users'] = sharedX(np.zeros((options['n_users'], 1)), name='b_users')
    params['b_items'] = sharedX(np.zeros((options['n_items'], 1)), name='b_items')
    params['b'] = sharedX(np.zeros(1), name='b')
    return params
Example #12
0
 def __init__(self, conf, rbm, sampler, visible_batch, clippers):
     self.conf = conf
     self.rbm = rbm
     self.sampler = sampler
     self.visible_batch = visible_batch
     self.iter = sharedX(0, 'iter')
     self.annealing_coef = sharedX(0.0, 'annealing_coef')
     self.lr_dict = lr_dict = {}
     for p in rbm.params():
         lrname = '%s_lr' % p.name
         lr_dict[p] = sharedX(conf.get(lrname, 1.0), lrname)
     self.clippers = clippers
Example #13
0
File: rbm.py Project: jaberg/ssrbm
 def __init__(self, conf, rbm, sampler, visible_batch, clippers):
     self.conf = conf
     self.rbm = rbm
     self.sampler = sampler
     self.visible_batch = visible_batch
     self.iter=sharedX(0, 'iter')
     self.annealing_coef=sharedX(0.0, 'annealing_coef')
     self.lr_dict = lr_dict = {}
     for p in rbm.params():
         lrname = '%s_lr'%p.name
         lr_dict[p] = sharedX(conf.get(lrname, 1.0), lrname)
     self.clippers = clippers
Example #14
0
 def init_parameters(self):
     # init weight matrices
     self.Wv = self.init_weight(self.iscales.get('Wv', 1.0),
                                (self.n_v, self.n_h),
                                'Wv',
                                normalize=False)
     # allocate shared variables for bias parameters
     self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h),
                          name='hbias')
     # diagonal of precision matrix of visible units
     self.lambd = sharedX(self.iscales['lambd'] * numpy.ones(self.n_v),
                          name='lambd')
     self.lambd_prec = T.nnet.softplus(self.lambd)
Example #15
0
 def init_chains(self):
     """ Allocate shared variable for persistent chain """
     # initialize visible unit chains
     scale = numpy.sqrt(1./softplus(self.lambd.get_value()))
     neg_v  = self.rng.normal(loc=0, scale=scale, size=(self.batch_size, self.n_v))
     self.neg_v  = sharedX(neg_v, name='neg_v')
     # initialize s-chain
     scale = numpy.sqrt(1./softplus(self.alpha.get_value()))
     neg_s  = self.rng.normal(loc=0., scale=scale, size=(self.batch_size, self.n_s))
     self.neg_s  = sharedX(neg_s, name='neg_s')
     # initialize binary g-h chains
     pval_h = sigm(self.hbias.get_value())
     neg_h = self.rng.binomial(n=1, p=pval_h, size=(self.batch_size, self.n_h))
     self.neg_h  = sharedX(neg_h, name='neg_h')
Example #16
0
def get_updates(grads, momentum_lambda=None):
    """
    Returns an updates dictionary corresponding to a single step of SGD. The learning rate
    for each parameter is computed as lr * multipliers[param]
    :param lr: base learning rate (common to all parameters)
    :param multipliers: dictionary of learning rate multipliers, each being a shared var
                        e.g. {'hbias': sharedX(0.1), 'Wf': sharedX(0.01)}
    """

    updates = OrderedDict()
    momentum = OrderedDict()

    for (param, gparam) in grads.iteritems():

        if momentum_lambda:
            # create storage for momentum term
            momentum[param] = sharedX(numpy.zeros_like(param.get_value()),
                                      name=param.name + '_old')
            new_grad = (1. - momentum_lambda
                        ) * gparam + momentum_lambda * momentum[param]
            updates[param] = param - new_grad
            updates[momentum[param]] = new_grad
        else:
            updates[param] = param - gparam

    return updates
Example #17
0
def load_penn_treebank(path, sequence_length=100, return_raw=False):
    ''' Loads the Penn Treebank dataset

    Parameters
    ----------
    path : str
        The path to the dataset file (.npz).
    sequence_length : int, optional
        All sequences of characters will have the same length.

    References
    ----------
    This dataset comes from https://github.com/GabrielPereyra/norm-rnn/tree/master/data.

    '''
    if not os.path.isfile(path):
        # Download the dataset.
        data_dir, data_file = os.path.split(path)
        ptb_zipfile = os.path.join(data_dir, 'ptb.zip')

        if not os.path.isfile(ptb_zipfile):
            import urllib
            origin = 'https://www.dropbox.com/s/9hwo2392mfgnnlu/ptb.zip?dl=1'  # Marc's dropbox, TODO: put that somewhere else.
            print("Downloading data (2 Mb) from {} ...".format(origin))
            urllib.urlretrieve(origin, ptb_zipfile)

        # Load the dataset and process it.
        print("Processing data ...")
        with zipfile.ZipFile(ptb_zipfile) as f:
            train = "\n".join((l.lstrip() for l in f.read('ptb.train.txt').split('\n')))
            valid = "\n".join((l.lstrip() for l in f.read('ptb.valid.txt').split('\n')))
            test = "\n".join((l.lstrip() for l in f.read('ptb.test.txt').split('\n')))

        chars = list(set(train) | set(valid) | set(test))
        data_size = len(train) + len(valid) + len(test)
        vocab_size = len(chars)
        print("Dataset has {:,} characters ({:,} | {:,} | {:,}), {:,} unique.".format(data_size, len(train), len(valid), len(test), vocab_size))

        words = train.split(), valid.split(), test.split()
        n_words = len(words[0]) + len(words[1]) + len(words[2])
        print("Dataset has {:,} words ({:,} | {:,} | {:,}), {:,} unique.".format(n_words, len(words[0]), len(words[1]), len(words[2]), len(set(words[0]) | set(words[1]) | set(words[2]))))
        chr2idx = {c: i for i, c in enumerate(chars)}
        idx2chr = {i: c for i, c in enumerate(chars)}

        train = np.array([chr2idx[c] for c in train], dtype=np.int8)
        valid = np.array([chr2idx[c] for c in valid], dtype=np.int8)
        test = np.array([chr2idx[c] for c in test], dtype=np.int8)

        np.savez(path,
                 train=train, valid=valid, test=test,
                 chr2idx=chr2idx, idx2chr=idx2chr)

    print("Loading data ...")
    ptb = np.load(path)
    if return_raw:
        return (ptb['train'], ptb['valid'], ptb['test']), ptb['idx2chr'].item()

    # datasets = [_shared_dataset(_split_into_sequences(d, sequence_length)) for d in [ptb['train'], ptb['valid'], ptb['test']]]
    datasets = [utils.sharedX(_split_into_sequences(d, sequence_length)) for d in [ptb['train'], ptb['valid'], ptb['test']]]
    return datasets, ptb['idx2chr'].item()
Example #18
0
File: rbm.py Project: jaberg/ssrbm
 def cd_updates(self, pos_v, neg_v, lr, other_cost=0):
     grads = contrastive_grad(self.free_energy_given_v,
             pos_v, neg_v,
             wrt=self.params(),
             other_cost=other_cost)
     stepsizes=lr
     if self.conf.get('momentum', 0.0):
         logger.info('Using momentum %s'%self.conf['momentum'])
         rval = dict(
                 sgd_momentum_updates(
                     self.params(),
                     grads,
                     stepsizes=stepsizes,
                     momentum=self.conf['momentum']))
     else:
         rval = dict(
                 sgd_updates(
                     self.params(),
                     grads,
                     stepsizes=stepsizes))
     #DEBUG STORE GRADS
     grad_shared_vars = [sharedX(0*p.get_value(),'') for p in self.params()]
     self.grad_shared_vars = grad_shared_vars
     rval.update(dict(zip(grad_shared_vars, grads)))
     return rval
Example #19
0
def orthogonal(shape, scale=1.1):
    """ benanne lasagne ortho init (faster than qr approach)"""
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    q = u if u.shape == flat_shape else v  # pick the one with the correct shape
    q = q.reshape(shape)
    return sharedX(scale * q[:shape[0], :shape[1]])
 def __call__(self, shape, name=None):
     if len(shape) == 2:
         scale = np.sqrt(2./shape[0])
     elif len(shape) == 4:
         scale = np.sqrt(2./np.prod(shape[1:]))
     else:
         raise NotImplementedError
     return sharedX(np_rng.normal(size=shape, scale=scale), name=name)
 def __call__(self, shape, name=None):
     print 'called orthogonal init with shape', shape
     flat_shape = (shape[0], np.prod(shape[1:]))
     a = np_rng.normal(0.0, 1.0, flat_shape)
     u, _, v = np.linalg.svd(a, full_matrices=False)
     q = u if u.shape == flat_shape else v # pick the one with the correct shape
     q = q.reshape(shape)
     return sharedX(self.scale * q[:shape[0], :shape[1]], name=name)
Example #22
0
def train(options, train_data, valid_data, test_data):
    np.random.seed(12345)

    if not os.path.exists(options['saveto']):
        os.makedirs(options['saveto'])

    print 'Building the model...'
    params = init_params(options)
    users_id, items_id, bow, y, y_pred, bow_pred, mse, nll, cost = build_model(options, params)

    print 'Computing gradients...'
    lrt = sharedX(options['lr'])
    grads = T.grad(cost, params.values())
    updates = sgd(params.values(), grads, lrt)

    print 'Compiling theano functions...'
    eval_fn = theano.function([users_id, items_id, y], mse)
    train_fn = theano.function([users_id, items_id, bow, y], [cost, mse, nll],
                               updates=updates)

    print "Training..."
    train_iter = MultiFixDimIterator(*train_data, batch_size=options['batch_size'],
                                     shuffle=True)
    valid_iter = MultiFixDimIterator(*valid_data, batch_size=100)
    test_iter  = MultiFixDimIterator(*test_data,  batch_size=100)
    best_valid = float('inf')
    best_test  = float('inf')

    n_batches = np.ceil(train_data[0].shape[0]*1./options['batch_size']).astype('int')
    disp_str = ['Train COST', 'Train MSE', 'Train NLL']

    for eidx in range(options['n_epochs']):
        accum_cost, accum_mse, accum_nll = 0., 0., 0.
        for batch in train_iter:
            batch = prepare_batch_data(options, batch)
            b_cost, b_mse, b_nll = train_fn(*batch)
            accum_cost += b_cost
            accum_mse  += b_mse
            accum_nll  += b_nll

        disp_val = [val/n_batches for val in [accum_cost, accum_mse, accum_nll]]
        res_str = ('[%d] ' % eidx) + ", ".join("%s: %.4f" %(s,v) for s,v in
                                               zip(disp_str, disp_val))
        print res_str

        if (eidx+1) % options['valid_freq'] == 0:
            disp_val = [np.mean([eval_fn(*vbatch) for vbatch in valid_iter]),
                        np.mean([eval_fn(*tbatch) for tbatch in test_iter])]
            res_str = ", ".join("%s: %.4f" %(s,v) for s,v in
                                zip(['Valid MSE', 'Test MSE'], disp_val))
            print res_str

            if best_valid > disp_val[0]:
                best_valid, best_test = disp_val
                dump_params(options['saveto'], eidx, "best_params", params)

    print "Done training..."
    print "Best Valid MSE: %.4f and Test MSE: %.4f" % best_test
 def init_chains(self):
     """ Allocate shared variable for persistent chain """
     # initialize s-chain
     loc = self.mu.get_value()
     scale = numpy.sqrt(1./softplus(self.alpha.get_value()))
     neg_s  = self.rng.normal(loc=loc, scale=scale, size=(self.batch_size, self.n_s))
     self.neg_s  = sharedX(neg_s, name='neg_s')
     # initialize binary v chains
     pval_v = sigm(self.vbias.get_value())
     neg_v = self.rng.binomial(n=1, p=pval_v, size=(self.batch_size, self.n_v))
     self.neg_v  = sharedX(neg_v, name='neg_v')
     # initialize binary h chains
     pval_h = sigm(self.hbias.get_value())
     neg_h = self.rng.binomial(n=1, p=pval_h, size=(self.batch_size, self.n_h))
     self.neg_h  = sharedX(neg_h, name='neg_h')
     # moving average values for sparsity
     self.sp_pos_v = sharedX(neg_v, name='sp_pos_v')
     self.sp_pos_h = sharedX(neg_h, name='sp_pos_h')
 def __call__(self, shape, name=None):
     if shape[0] != shape[1]:
         w = np.zeros(shape)
         o_idxs = np.arange(shape[0])
         i_idxs = np.random.permutation(np.tile(np.arange(shape[1]), shape[0]/shape[1]+1))[:shape[0]]
         w[o_idxs, i_idxs] = self.scale
     else:
         w = np.identity(shape[0]) * self.scale
     return sharedX(w, name=name)
Example #25
0
 def __init__(self, rbm, particles, rng):
     if not hasattr(rng, 'randn'):
         rng = numpy.random.RandomState(rng)
     seed = int(rng.randint(2**30))
     self.rbm = rbm
     self.n_particles = particles.shape[0]
     assert particles.shape[1:] == rbm.v_shp
     self.particles = sharedX(particles, name='particles')
     self.s_rng = RandomStreams(seed)
    def get_updates(self, cost, params):
        grads = T.grad(cost=cost, wrt=params)
        updates = []
        for p, g in zip(params, grads):
            d = sharedX(p.get_value() * 0.0)
            new_d = self.mm * d - self.lr * (g + self.wd * p)
            updates.append((d, new_d))
            updates.append((p, p + new_d))

        return updates
Example #27
0
    def get_updates(self, cost, params):
        grads = T.grad(cost=cost, wrt=params)
        updates = []
        for p, dx in zip(params, grads):
            cache = sharedX(0)
            new_d = self.lr * dx / (T.sqrt(cache) + self.eps)
            updates.append((cache, T.sum(dx * dx)))
            updates.append((p, p - new_d))

        return updates
    def get_updates(self, cost, params):
        grads = T.grad(cost=cost, wrt=params)
        updates = []
        for p, dx in zip(params, grads):
            cache = sharedX(0)
            delta = self.lr * dx / (T.sqrt(cache) + self.eps)
            updates.append((cache, dx.norm(2)))
            updates.append((p, p - delta))

        return updates
Example #29
0
 def init_chains(self):
     """ Allocate shared variable for persistent chain """
     # initialize visible unit chains
     scale = numpy.sqrt(1. / softplus(self.lambd.get_value()))
     neg_v = self.rng.normal(loc=0,
                             scale=scale,
                             size=(self.batch_size, self.n_v))
     self.neg_v = sharedX(neg_v, name='neg_v')
     # initialize s-chain
     scale = numpy.sqrt(1. / softplus(self.alpha.get_value()))
     neg_s = self.rng.normal(loc=0.,
                             scale=scale,
                             size=(self.batch_size, self.n_s))
     self.neg_s = sharedX(neg_s, name='neg_s')
     # initialize binary g-h chains
     pval_h = sigm(self.hbias.get_value())
     neg_h = self.rng.binomial(n=1,
                               p=pval_h,
                               size=(self.batch_size, self.n_h))
     self.neg_h = sharedX(neg_h, name='neg_h')
Example #30
0
    def init_chains(self):
        """ Allocate shared variable for persistent chain """
        self.neg_ev = sharedX(self.rng.rand(self.batch_size, self.n_v), name='neg_ev')
        self.neg_h  = sharedX(self.rng.rand((self.cratio+1)*self.batch_size, self.n_h), name='neg_h')
        self.neg_v  = sharedX(self.rng.rand((self.cratio+1)*self.batch_size, self.n_v), name='neg_v')
        self.beta = sharedX(numpy.ones((self.cratio+1)*self.batch_size), name='betas')
        self.beta_mat = T.shape_padright(self.beta)

        ### CAST is mostly implemented in numpy ###
        # Generate range of possible temperatures
        self._betas = numpy.linspace(1.0, self.min_beta, self.num_beta).astype(floatX)
        # Chain i is at inverse temperatures betas[beta_idx[i]].
        self.beta_idx = self.rng.random_integers(low=0,
                high=self.num_beta-1,
                size=(self.cratio * self.batch_size))
        self.beta_logw = numpy.zeros(self.num_beta)
        self.swap_timer = 1

        # Beta weights (adaptive weights for WL)
        self.update_temperatures()
Example #31
0
File: rbm.py Project: jaberg/ssrbm
 def __init__(self, rbm, particles, rng):
     if not hasattr(rng, 'randn'):
         rng = numpy.random.RandomState(rng)
     seed=int(rng.randint(2**30))
     self.rbm = rbm
     self.n_particles = particles.shape[0]
     assert particles.shape[1:] == rbm.v_shp
     self.particles = sharedX(
         particles,
         name='particles')
     self.s_rng = RandomStreams(seed)
Example #32
0
 def init_centering(self):
     self.avg_pos_g = sharedX(0.5 * numpy.ones(self.n_g), name='avg_pos_g')
     self.avg_pos_h = sharedX(0.5 * numpy.ones(self.n_h), name='avg_pos_h')
     self.avg_pos_v = sharedX(numpy.zeros(self.n_v), name='avg_pos_v')
     self.avg_pos_g_tm1 = sharedX(0. * numpy.ones(self.n_g), name='avg_pos_g_tm1')
     self.avg_pos_h_tm1 = sharedX(0. * numpy.ones(self.n_h), name='avg_pos_h_tm1')
     self.avg_pos_v_tm1 = sharedX(numpy.zeros(self.n_v), name='avg_pos_v_tm1')
    def init_parameters(self):
        assert self.sparse_hmask

        # init scalar norm for each entry of Wv
        sn_val = self.iscales['scalar_norms'] * numpy.ones(self.n_s)
        self.scalar_norms = sharedX(sn_val, name='scalar_norms')

        if self.flags['igo_init']:
            print 'Overriding iscales initialization with 1./sqrt(nv x nh)'
            self.iscales['Wv'] = 1./numpy.sqrt(max(self.n_v, self.n_s))
            self.iscales['Wg'] = 1./numpy.sqrt(max(self.n_g, self.n_s))
            self.iscales['Wh'] = 1./numpy.sqrt(max(self.n_h, self.n_s))

        # Init (visible, slabs) weight matrix.
        self.Wv = self.init_weight(self.iscales['Wv'], (self.n_v, self.n_s), 'Wv',
                        normalize= (self.flags['wv_norm'] == 'unit'))

        # Initialize (slab, hidden) pooling matrix
        self.Wh = sharedX(self.sparse_hmask.mask.T * self.iscales.get('Wh', 1.0), name='Wh')

        # Initialize (slabs, g-unit) weight matrix.
        if self.sparse_gmask:
            self.Wg = sharedX(self.sparse_gmask.mask.T * self.iscales.get('Wg', 1.0), name='Wg')
        else:
            self.Wg = self.init_weight(self.iscales['Wg'], (self.n_s, self.n_g), 'Wg')

        # allocate shared variables for bias parameters
        self.gbias = sharedX(self.iscales['gbias'] * numpy.ones(self.n_g), name='gbias')
        self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h), name='hbias')
        self.cg = sharedX(0.5 * numpy.ones(self.n_g), name='cg')
        self.ch = sharedX(0.5 * numpy.ones(self.n_h), name='ch')

        # mean (mu) and precision (alpha) parameters on s
        self.mu = sharedX(self.iscales['mu'] * numpy.ones(self.n_s), name='mu')
        self.alpha = sharedX(self.iscales['alpha'] * numpy.ones(self.n_s), name='alpha')
        self.alpha_prec = T.nnet.softplus(self.alpha)

        # diagonal of precision matrix of visible units
        self.lambd = sharedX(self.iscales['lambd'] * numpy.ones(self.n_v), name='lambd')
        self.lambd_prec = T.nnet.softplus(self.lambd)
Example #34
0
    def init_parameters(self):
        assert self.sparse_hmask

        # Init (visible, slabs) weight matrix.
        self.Wv = self.init_weight(self.iscales['Wv'], (self.n_v, self.n_s),
                                   'Wv',
                                   normalize=True)
        self.norm_wv = T.sqrt(T.sum(self.Wv**2, axis=0))
        self.mu = sharedX(self.iscales['mu'] * numpy.ones(self.n_s), name='mu')

        # Initialize (slab, hidden) pooling matrix
        self.Wh = sharedX(self.sparse_hmask.mask.T *
                          self.iscales.get('Wh', 1.0),
                          name='Wh')

        # allocate shared variables for bias parameters
        self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h),
                             name='hbias')
        self.ch = sharedX(0.5 * numpy.ones(self.n_h), name='ch')

        # precision (alpha) parameters on s
        self.alpha = sharedX(self.iscales['alpha'] * numpy.ones(self.n_s),
                             name='alpha')
        self.alpha_prec = T.nnet.softplus(self.alpha)

        # diagonal of precision matrix of visible units
        self.lambd = sharedX(self.iscales['lambd'] * numpy.ones(self.n_v),
                             name='lambd')
        self.lambd_prec = T.nnet.softplus(self.lambd)
Example #35
0
 def init_parameters_from_model(self, model):
     self.scalar_norms = model.scalar_norms
     self.Wv = model.Wv
     self.Wg = model.Wg
     self.Wh = model.Wh
     self.avg_norm_wg = model.avg_norm_wg
     self.avg_norm_wh = model.avg_norm_wh
     self.gbias = model.gbias
     self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h), name='hbias')
     self.vbias = model.vbias
     self.mu = model.mu
     self.alpha = model.alpha
     self.alpha_prec = model.alpha_prec
    def __call__(self, shape, name=None):
        w = np.zeros(shape)
        ycenter = shape[2]//2
        xcenter = shape[3]//2

        if shape[0] == shape[1]:
            o_idxs = np.arange(shape[0])
            i_idxs = np.arange(shape[1])
        elif shape[1] < shape[0]:
            o_idxs = np.arange(shape[0])
            i_idxs = np.random.permutation(np.tile(np.arange(shape[1]), shape[0]/shape[1]+1))[:shape[0]]
        w[o_idxs, i_idxs, ycenter, xcenter] = self.scale
        return sharedX(w, name=name)
 def get_updates(self, cost, params):
     # Your codes here
     grads = T.grad(cost=cost, wrt=params)
     updates = []
     #cache = sharedX(params.get_value() * 0.0)
     for p, g in zip(params, grads):
         cache = sharedX(p.get_value() * 0.0)
         new_cache = self.rho * cache + (1 - self.rho) * g**2
         new_p = p - self.lr * g / (np.sqrt(new_cache) + self.eps)
         updates.append((cache, new_cache))
         updates.append((p, new_p))
     return updates
     '''
Example #38
0
 def __setattr__(self, name, array):
     params = self.get_dict()
     if name not in params:
         params[name] = sharedX(array, name=name)
     else:
         print "%s already assigned" % name
         if array.shape != params[name].get_value().shape:
             raise ValueError(
                 'The shape mismatch for the new value you want to assign'
                 'to %s' % name)
         params[name].set_value(np.asarray(array,
                                           dtype=theano.config.floatX),
                                borrow=True)
 def init_chains(self):
     """ Allocate shared variable for persistent chain """
     self.neg_g  = sharedX(self.rng.rand(self.batch_size, self.n_g), name='neg_g')
     self.neg_s  = sharedX(self.rng.rand(self.batch_size, self.n_g), name='neg_s')
     self.neg_h  = sharedX(self.rng.rand(self.batch_size, self.n_h), name='neg_h')
     self.neg_t  = sharedX(self.rng.rand(self.batch_size, self.n_h), name='neg_t')
     self.neg_v  = sharedX(self.rng.rand(self.batch_size, self.n_v), name='neg_v')
     self.neg_ev = sharedX(self.rng.rand(self.batch_size, self.n_v), name='neg_ev')
Example #40
0
 def updates(self, with_s_mu=False):
     new_particles, _locals = self.rbm.gibbs_step_for_v(self.particles,
                                                        self.s_rng,
                                                        return_locals=True)
     if with_s_mu:
         if not hasattr(self.rbm, 's_sample'):
             shp = (self.n_particles, ) + self.rbm.s_shp
             self.rbm.s_sample = sharedX(numpy.zeros(shp), 's_sample')
         return {
             self.particles: new_particles,
             self.rbm.s_sample: _locals['s_mu']
         }
     else:
         return {self.particles: new_particles}
Example #41
0
File: rbm.py Project: jaberg/ssrbm
 def updates(self, with_s_mu=False):
     new_particles, _locals  = self.rbm.gibbs_step_for_v(
             self.particles,
             self.s_rng,
             return_locals=True)
     if with_s_mu:
         if not hasattr(self.rbm, 's_sample'):
             shp = (self.n_particles,)+self.rbm.s_shp
             self.rbm.s_sample = sharedX(numpy.zeros(shp), 's_sample')
         return {self.particles: new_particles,
                 self.rbm.s_sample: _locals['s_mu']
                 }
     else:
         return {self.particles: new_particles}
Example #42
0
 def __setattr__(self, name, array):
     params = self.__dict__['params']
     if name not in params:
         params[name] = sharedX(array,
                                name=name)
     else:
         print "%s already assigned" % name
         if array.shape != params[name].get_value().shape:
             raise ValueError('The shape mismatch for the new value you want to assign'
                              'to %s' % name)
         params[name].set_value(np.asarray(
                 array,
                 dtype = theano.config.floatX
             ), borrow=True)
    def init_parameters(self):
        assert self.sparse_hmask

        # Init (visible, slabs) weight matrix.
        self.Wv = self.init_weight(self.iscales['Wv'], (self.n_v, self.n_s), 'Wv',
                normalize = self.flags['wv_norm'] == 'unit')
        self.gamma = sharedX(numpy.ones(self.n_s), 'gamma')
        self._Wv = 1./self.gamma * self.Wv

        self.norm_wv = T.sqrt(T.sum(self.Wv**2, axis=0))
        self.mu = sharedX(self.iscales['mu'] * numpy.ones(self.n_s), name='mu')
        self._mu = self.gamma * self.mu

        # Initialize (slab, hidden) pooling matrix
        self.Wh = sharedX(self.sparse_hmask.mask.T * self.iscales.get('Wh', 1.0), name='Wh')

        # Initialize (slabs, g-unit) weight matrix.
        self.Ug = self.init_weight(self.iscales['Ug'], (self.n_s, self.n_s), 'Ug')
        if self.sparse_gmask:
            self.Wg = sharedX(self.sparse_gmask.mask.T * self.iscales.get('Wg', 1.0), name='Wg')
        else:
            self.Wg = self.init_weight(self.iscales['Wg'], (self.n_s, self.n_g), 'Wg')
        self._Wg = T.dot(self.Ug, self.Wg)

        # allocate shared variables for bias parameters
        self.gbias = sharedX(self.iscales['gbias'] * numpy.ones(self.n_g), name='gbias')
        self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h), name='hbias')
        self.cg = sharedX(0.5 * numpy.ones(self.n_g), name='cg')
        self.ch = sharedX(0.5 * numpy.ones(self.n_h), name='ch')

        # precision (alpha) parameters on s
        self.alpha = sharedX(self.iscales['alpha'] * numpy.ones(self.n_s), name='alpha')
        self.alpha_prec = T.nnet.softplus(self.alpha)

        # diagonal of precision matrix of visible units
        self.lambd = sharedX(self.iscales['lambd'] * numpy.ones(self.n_v), name='lambd')
        self.lambd_prec = T.nnet.softplus(self.lambd)
Example #44
0
    def init_samples(self):

        # allocate shared variable for persistent chain
        self.neg_v  = sharedX(self.rng.rand(self.batch_size, self.n_v), name='neg_v')
        self.neg_ev = sharedX(self.rng.rand(self.batch_size, self.n_v), name='neg_ev')
        self.neg_s  = sharedX(self.rng.rand(self.batch_size, self.n_s), name='neg_s')
        self.neg_h  = sharedX(self.rng.rand(self.batch_size, self.n_h), name='neg_h')
       
        # moving average values for sparsity
        self.sp_pos_v = sharedX(self.rng.rand(1,self.n_v), name='sp_pos_v')
        self.sp_pos_h = sharedX(self.rng.rand(1,self.n_h), name='sp_pog_h')
Example #45
0
def init_params(options):
    params = OrderedDict()
    # LF model params
    params['W_users'] = sharedX(normal((options['n_users'],options['n_factors'])),
                                name='W_users')
    params['W_items'] = sharedX(normal((options['n_items'],options['n_factors'])),
                                name='W_items')
    params['b_users'] = sharedX(np.zeros((options['n_users'],)), name='b_users')
    params['b_items'] = sharedX(np.zeros((options['n_items'],)), name='b_items')
    params['b'] = sharedX(0., name='b')

    # distributed BOW params
    params['W_bow'] = sharedX(normal((options['n_factors'],options['vocab_size'])),
                              name='W_bow')
    params['b_bow'] = sharedX(np.zeros((options['vocab_size'],)), name='b_bow')
    return params
    def init_parameters(self):
        self.n_s = self.n_h * self.bw_s
        self.scalar_norms = sharedX(1.0 * numpy.ones(self.n_s), name='scalar_norms')
        wv_val =  self.rng.randn(self.n_v, self.n_s) * self.iscales['Wv']
        self.Wv = sharedX(wv_val, name='Wv')
        self.Wh = numpy.zeros((self.n_h, self.n_s), dtype=floatX)
        for i in xrange(self.n_h):
            self.Wh[i, i*self.bw_s:(i+1)*self.bw_s] = 1.

        # allocate shared variables for bias parameters
        self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h), name='hbias') 
        self.vbias = sharedX(self.iscales['vbias'] * numpy.ones(self.n_v), name='vbias') 

        # mean (mu) and precision (alpha) parameters on s
        self.mu = sharedX(self.iscales['mu'] * numpy.ones(self.n_s), name='mu')
        self.alpha = sharedX(self.iscales['alpha'] * numpy.ones(self.n_s), name='alpha')
        self.alpha_prec = T.nnet.softplus(self.alpha)
Example #47
0
 def cd_updates(self, pos_v, neg_v, lr, other_cost=0):
     grads = contrastive_grad(self.free_energy_given_v,
                              pos_v,
                              neg_v,
                              wrt=self.params(),
                              other_cost=other_cost)
     stepsizes = lr
     if self.conf.get('momentum', 0.0):
         logger.info('Using momentum %s' % self.conf['momentum'])
         rval = dict(
             sgd_momentum_updates(self.params(),
                                  grads,
                                  stepsizes=stepsizes,
                                  momentum=self.conf['momentum']))
     else:
         rval = dict(sgd_updates(self.params(), grads, stepsizes=stepsizes))
     #DEBUG STORE GRADS
     grad_shared_vars = [
         sharedX(0 * p.get_value(), '') for p in self.params()
     ]
     self.grad_shared_vars = grad_shared_vars
     rval.update(dict(zip(grad_shared_vars, grads)))
     return rval
Example #48
0
 def init_chains(self):
     """ Allocate shared variable for persistent chain """
     # initialize s-chain
     loc = self.mu.get_value()
     scale = numpy.sqrt(1./softplus(self.alpha.get_value()))
     neg_s  = self.rng.normal(loc=loc, scale=scale, size=(self.batch_size, self.n_s))
     self.neg_s  = sharedX(neg_s, name='neg_s')
     # initialize binary g-h-v chains
     pval_g = sigm(self.gbias.get_value())
     pval_h = sigm(self.hbias.get_value())
     pval_l = softmax(self.lbias.get_value())
     neg_g = self.rng.binomial(n=1, p=pval_g, size=(self.batch_size, self.n_g))
     neg_h = self.rng.binomial(n=1, p=pval_h, size=(self.batch_size, self.n_h))
     neg_v = self.rng.binomial(n=1, p=pval_v, size=(self.batch_size, self.n_v))
     neg_l = self.rng.multinomial(n=1, pvals=pval_l, size=(self.batch_size))
     self.neg_h  = sharedX(neg_h, name='neg_h')
     self.neg_g  = sharedX(neg_g, name='neg_g')
     self.neg_v  = sharedX(neg_v, name='neg_v')
     self.neg_l  = sharedX(neg_l, name='neg_l')
     # other misc.
     self.pos_counter  = sharedX(0., name='pos_counter')
     self.odd_even = sharedX(0., name='odd_even')
Example #49
0
def get_updates(grads, momentum_lambda=None):
    """
    Returns an updates dictionary corresponding to a single step of SGD. The learning rate
    for each parameter is computed as lr * multipliers[param]
    :param lr: base learning rate (common to all parameters)
    :param multipliers: dictionary of learning rate multipliers, each being a shared var
                        e.g. {'hbias': sharedX(0.1), 'Wf': sharedX(0.01)}
    """

    updates = OrderedDict()
    momentum = OrderedDict()

    for (param, gparam) in grads.iteritems():

        if momentum_lambda:
            # create storage for momentum term
            momentum[param] = sharedX(numpy.zeros_like(param.get_value()), name=param.name + '_old')
            new_grad = (1.-momentum_lambda) * gparam + momentum_lambda * momentum[param]
            updates[param] = param - new_grad
            updates[momentum[param]] = new_grad
        else:
            updates[param] = param - gparam

    return updates
Example #50
0
    def init_parameters(self):
        assert self.sparse_hmask

        # Init (visible, slabs) weight matrix.
        self.Wv = self.init_weight(self.iscales['Wv'], (self.n_v, self.n_s), 'Wv', normalize=True)
        self.norm_wv = T.sqrt(T.sum(self.Wv**2, axis=0))
        self.mu = sharedX(self.iscales['mu'] * numpy.ones(self.n_s), name='mu')

        # Initialize (slab, hidden) pooling matrix
        self.Wh = sharedX(self.sparse_hmask.mask.T * self.iscales.get('Wh', 1.0), name='Wh')

        # allocate shared variables for bias parameters
        self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h), name='hbias')
        self.ch = sharedX(0.5 * numpy.ones(self.n_h), name='ch')

        # precision (alpha) parameters on s
        self.alpha = sharedX(self.iscales['alpha'] * numpy.ones(self.n_s), name='alpha')
        self.alpha_prec = T.nnet.softplus(self.alpha)

        # diagonal of precision matrix of visible units
        self.lambd = sharedX(self.iscales['lambd'] * numpy.ones(self.n_v), name='lambd')
        self.lambd_prec = T.nnet.softplus(self.lambd)
Example #51
0
    def init_parameters(self):
        # init scalar norm for each entry of Wv
        sn_val = self.iscales['scalar_norms'] * numpy.ones(self.n_s)
        self.scalar_norms = sharedX(sn_val, name='scalar_norms')

        # init weight matrices
        self.Wv = self.init_weight(self.iscales.get('Wv', 1.0),
                                   (self.n_v, self.n_s), 'Wv',
                                   normalize = self.flags['split_norm'])
        if self.sparse_gmask or self.sparse_hmask:
            assert self.sparse_gmask and self.sparse_hmask
            self.Wg = sharedX(self.sparse_gmask.mask * self.iscales.get('Wg', 1.0), name='Wg')
            self.Wh = sharedX(self.sparse_hmask.mask * self.iscales.get('Wh', 1.0), name='Wh')
        else:
            self.Wg = self.init_weight(1.0, (self.n_g, self.n_s), 'Wg')
            self.Wh = self.init_weight(1.0, (self.n_h, self.n_s), 'Wh')

        # allocate shared variables for bias parameters
        self.gbias = sharedX(self.iscales['gbias'] * numpy.ones(self.n_g), name='gbias')
        self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h), name='hbias')

        # diagonal of precision matrix of visible units
        self.beta = sharedX(self.iscales['beta'] * numpy.ones(self.n_v), name='beta')
        self.beta_prec = T.nnet.softplus(self.beta)
Example #52
0
    def __init__(self, 
            numpy_rng = None, theano_rng = None,
            n_h=99, n_v=100, init_from=None,
            min_beta=0.9, num_beta=20, gamma=10, cratio=1, cdelay=0,
            neg_sample_steps=1,
            lr_spec=None, lr_mults = {},
            iscales={}, clip_min={}, clip_max={},
            l1 = {}, l2 = {},
            sp_weight={}, sp_targ={},
            batch_size = 13,
            compile=True, debug=False, seed=1241234,
            flags = {},
            max_updates = 5e5, **kwargs):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr_spec is not None
        for k in ['h']: assert k in sp_weight.keys()
        for k in ['h']: assert k in sp_targ.keys()
        self.validate_flags(flags)

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v)
        for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v)
        for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v)
        for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v)
        for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v)
        for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k,v) in locals().iteritems():
            if k!='self': setattr(self,k,v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng

        ############### ALLOCATE PARAMETERS #################
        # allocate symbolic variable for input
        self.input = T.matrix('input')
        self.init_parameters()
        self.init_chains()

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr_spec['type'] == 'anneal':
            num = lr_spec['init'] * lr_spec['start'] 
            denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter)
            self.lr = T.maximum(lr_spec['floor'], num/denum) 
        elif lr_spec['type'] == '1_t':
            self.lr = npy_floatX(lr_spec['num']) / (self.iter + npy_floatX(lr_spec['denum']))
        elif lr_spec['type'] == 'linear':
            lr_start = npy_floatX(lr_spec['start'])
            lr_end   = npy_floatX(lr_spec['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates)
        elif lr_spec['type'] == 'constant':
            self.lr = sharedX(lr_spec['value'], name='lr')
        else:
            raise ValueError('Incorrect value for lr_spec[type]')

        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0               # incremented on every batch
        self.examples_seen = 0              # incremented on every training example
        self.logz = sharedX(0.0, name='logz')
        self.cpu_time = 0

        self.error_record = []
 
        if compile: self.do_theano()

        if init_from:
            raise NotImplementedError()
Example #53
0
 def init_weight(self, iscale, shape, name, normalize=False, axis=0):
     value =  self.rng.normal(size=shape) * iscale
     if normalize:
         value /= numpy.sqrt(numpy.sum(value**2, axis=axis))
     return sharedX(value, name=name)
    def init_parameters(self):
        assert self.sparse_hmask

        # init scalar norm for each entry of Wv
        sn_val = self.iscales['scalar_norms'] * numpy.ones(self.n_s)
        self.scalar_norms = sharedX(sn_val, name='scalar_norms')

        if self.flags['igo_init']:
            print 'Overriding iscales initialization with 1./sqrt(nv x nh)'
            self.iscales['Wv'] = 1./numpy.sqrt(max(self.n_v, self.n_s))
            self.iscales['Wg'] = 1./numpy.sqrt(max(self.n_g, self.n_s))
            self.iscales['Wh'] = 1./numpy.sqrt(max(self.n_h, self.n_s))

        # init weight matrices
        self.Wv = self.init_weight(self.iscales['Wv'], (self.n_v, self.n_s), 'Wv')
        self.Wh = sharedX(self.sparse_hmask.mask.T * self.iscales.get('Wh', 1.0), name='Wh')
        if self.sparse_gmask:
            self.Wg = sharedX(self.sparse_gmask.mask.T * self.iscales.get('Wg', 1.0), name='Wg')
        else:
            self.Wg = self.init_weight(self.iscales['Wg'], (self.n_s, self.n_g), 'Wg')

        # avg norm (for wgh_norm='roland')
        norm_wg = numpy.sqrt(numpy.sum(self.Wg.get_value()**2, axis=1)).mean()
        norm_wh = numpy.sqrt(numpy.sum(self.Wh.get_value()**2, axis=0)).mean()
        norm_wv = numpy.sqrt(numpy.sum(self.Wv.get_value()**2, axis=0)).mean()
        self.avg_norm_wg = sharedX(norm_wg, name='avg_norm_wg')
        self.avg_norm_wh = sharedX(norm_wh, name='avg_norm_wh')
        self.avg_norm_wv = sharedX(norm_wv, name='avg_norm_wv')

        # allocate shared variables for bias parameters
        self.gbias = sharedX(self.iscales['gbias'] * numpy.ones(self.n_g), name='gbias')
        self.hbias = sharedX(self.iscales['hbias'] * numpy.ones(self.n_h), name='hbias')

        # mean (mu) and precision (alpha) parameters on s
        self.mu = sharedX(self.iscales['mu'] * numpy.ones(self.n_s), name='mu')
        self.alpha = sharedX(self.iscales['alpha'] * numpy.ones(self.n_s), name='alpha')
        self.alpha_prec = T.nnet.softplus(self.alpha)

        # diagonal of precision matrix of visible units
        self.lambd = sharedX(self.iscales['lambd'] * numpy.ones(self.n_v), name='lambd')
        self.lambd_prec = T.nnet.softplus(self.lambd)
Example #55
0
def uniform(shape, scale=0.1, name=None):
    return sharedX(np.random.uniform(low=-scale, high=scale, size=shape),
                   name=name)
Example #56
0
    def __init__(self,
                 input=None,
                 Wv=None,
                 vbias=None,
                 hbias=None,
                 numpy_rng=None,
                 theano_rng=None,
                 n_h=100,
                 bw_s=1,
                 n_v=100,
                 init_from=None,
                 neg_sample_steps=1,
                 lr=None,
                 lr_timestamp=None,
                 lr_mults={},
                 iscales={},
                 clip_min={},
                 clip_max={},
                 vbound=5.,
                 l1={},
                 l2={},
                 orth_lambda=0.,
                 var_param_alpha='exp',
                 var_param_beta='linear',
                 sp_type='kl',
                 sp_weight={},
                 sp_targ={},
                 batch_size=13,
                 scalar_b=False,
                 compile=True,
                 debug=False,
                 seed=1241234,
                 my_save_path=None,
                 save_at=None,
                 save_every=None,
                 flags={},
                 max_updates=5e5):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr is not None
        for k in ['Wv', 'vbias', 'hbias']:
            assert k in iscales.keys()
        iscales.setdefault('mu', 1.)
        iscales.setdefault('alpha', 0.)
        iscales.setdefault('beta', 0.)
        for k in ['h']:
            assert k in sp_weight.keys()
        for k in ['h']:
            assert k in sp_targ.keys()

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k, v) in l1.iteritems():
            l1[k] = npy_floatX(v)
        for (k, v) in l2.iteritems():
            l2[k] = npy_floatX(v)
        for (k, v) in sp_weight.iteritems():
            sp_weight[k] = npy_floatX(v)
        for (k, v) in sp_targ.iteritems():
            sp_targ[k] = npy_floatX(v)
        for (k, v) in clip_min.iteritems():
            clip_min[k] = npy_floatX(v)
        for (k, v) in clip_max.iteritems():
            clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k, v) in locals().iteritems():
            if k != 'self': setattr(self, k, v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(
            seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(
            2**30)) if theano_rng is None else theano_rng

        ############### ALLOCATE PARAMETERS #################
        self.n_s = self.n_h * self.bw_s

        self.wv_norms = sharedX(1.0 * numpy.ones(self.n_s), name='wv_norms')
        if Wv is None:
            wv_val = self.rng.randn(n_v, self.n_s) * iscales['Wv']
            self.Wv = sharedX(wv_val, name='Wv')
        else:
            self.Wv = Wv

        self.Wh = numpy.zeros((self.n_s, self.n_h), dtype=floatX)
        for i in xrange(self.n_h):
            self.Wh[i * bw_s:(i + 1) * bw_s, i] = 1.

        # allocate shared variables for bias parameters
        if hbias is None:
            self.hbias = sharedX(iscales['hbias'] * numpy.ones(n_h),
                                 name='hbias')
        else:
            self.hbias = hbias

        # mean (mu) and precision (alpha) parameters on s
        self.mu = sharedX(iscales['mu'] * numpy.ones(self.n_s), name='mu')
        self.alpha = sharedX(iscales['alpha'] * numpy.ones(self.n_s),
                             name='alpha')
        var_param_func = {
            'exp': T.exp,
            'softplus': T.nnet.softplus,
            'linear': lambda x: x
        }
        self.alpha_prec = var_param_func[self.var_param_alpha](self.alpha)

        # diagonal of precision matrix of visible units
        self.vbound = sharedX(vbound, name='vbound')
        self.beta = sharedX(iscales['beta'] * numpy.ones(n_v), name='beta')
        self.beta_prec = var_param_func[self.var_param_beta](self.beta)

        # allocate shared variable for persistent chain
        self.neg_v = sharedX(self.rng.rand(batch_size, n_v), name='neg_v')
        self.neg_ev = sharedX(self.rng.rand(batch_size, n_v), name='neg_ev')
        self.neg_s = sharedX(self.rng.rand(batch_size, self.n_s), name='neg_s')
        self.neg_h = sharedX(self.rng.rand(batch_size, n_h), name='neg_h')

        # moving average values for sparsity
        self.sp_pos_v = sharedX(self.rng.rand(1, self.n_v), name='sp_pos_v')
        self.sp_pos_h = sharedX(self.rng.rand(1, self.n_h), name='sp_pog_h')

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr['type'] == 'anneal':
            num = lr['init'] * lr['start']
            denum = T.maximum(lr['start'], lr['slope'] * self.iter)
            self.lr = T.maximum(lr['floor'], num / denum)
        elif lr['type'] == 'linear':
            lr_start = npy_floatX(lr['start'])
            lr_end = npy_floatX(lr['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(
                self.max_updates)
        else:
            raise ValueError('Incorrect value for lr[type]')

        # learning rate - implemented as shared parameter for GPU
        self.lr_mults_it = {}
        self.lr_mults_shrd = {}
        for (k, v) in lr_mults.iteritems():
            # make sure all learning rate multipliers are float64
            self.lr_mults_it[k] = tools.HyperParamIterator(
                lr_timestamp, lr_mults[k])
            self.lr_mults_shrd[k] = sharedX(self.lr_mults_it[k].value,
                                            name='lr_mults_shrd' + k)

        # allocate symbolic variable for input
        self.input = T.matrix('input') if input is None else input

        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0  # incremented on every batch
        self.examples_seen = 0  # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size

        self.error_record = []

        if compile: self.do_theano()

        #### load layer 1 parameters from file ####
        if init_from:
            self.load_params(init_from)
Example #57
0
    def __init__(self, numpy_rng = None, theano_rng = None,
            n_h=100, bw_s=1, n_v=100, init_from=None,
            neg_sample_steps=1,
            lr_spec=None, lr_timestamp=None, lr_mults = {},
            iscales={}, clip_min={}, clip_max={}, truncation_bound={},
            l1 = {}, l2 = {}, orth_lambda=0.,
            var_param_alpha='exp', var_param_lambd='linear',
            sp_type='kl', sp_weight={}, sp_targ={},
            batch_size = 13,
            compile=True,
            debug=False,
            seed=1241234,
            my_save_path=None, save_at=None, save_every=None,
            flags = {},
            max_updates = 5e5):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr_spec is not None
        for k in ['Wv', 'hbias']: assert k in iscales.keys()
        iscales.setdefault('mu', 1.)
        iscales.setdefault('alpha', 0.)
        iscales.setdefault('lambd', 0.)
        for k in ['h']: assert k in sp_weight.keys()
        for k in ['h']: assert k in sp_targ.keys()
        self.validate_flags(flags)

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v)
        for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v)
        for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v)
        for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v)
        for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v)
        for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k,v) in locals().iteritems():
            if k!='self': setattr(self,k,v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng

        # allocate symbolic variable for input
        self.input = T.matrix('input')
        self.init_parameters()
        self.init_chains()

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr_spec['type'] == 'anneal':
            num = lr_spec['init'] * lr_spec['start'] 
            denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter)
            self.lr = T.maximum(lr_spec['floor'], num/denum) 
        elif lr_spec['type'] == 'linear':
            lr_start = npy_floatX(lr_spec['start'])
            lr_end   = npy_floatX(lr_spec['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates)
        else:
            raise ValueError('Incorrect value for lr_spec[type]')

        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0                    # incremented on every batch
        self.examples_seen = 0                   # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size

        self.error_record = []
 
        if compile: self.do_theano()

        #### load layer 1 parameters from file ####
        if init_from:
            self.load_params(init_from)
Example #58
0
 def __init__(self, name, kernel_size, num_input, num_output, init_std):
     super(Convolution, self).__init__(name, trainable=True)
     W_shape = (num_output, num_input, kernel_size, kernel_size)
     self.W = sharedX(np.random.randn(*W_shape) * init_std,
                      name=name + '/W')
     self.b = sharedX(np.zeros(num_output), name=name + '/b')
Example #59
0
 def __init__(self, name, inputs_dim, num_output, init_std):
     super(Linear, self).__init__(name, trainable=True)
     self.W = sharedX(np.random.randn(inputs_dim, num_output) * init_std,
                      name=name + '/W')
     self.b = sharedX(np.zeros(num_output), name=name + '/b')