Ejemplo n.º 1
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        w = params[: self.m_end].reshape(self.shape)
        cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0))
        cae *= self.cae

        _, delta = self.score(Z, inpts, error=True, addon=cae)

        g[: self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0] :] = delta.sum(axis=0)

        cae_grad = gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * w
        cae_grad += gdot(inpts.T, (Dsigmoid(hddn) ** 2 * (1 - 2 * hddn))) / m * gpu.sum(w ** 2, axis=0)
        g[: self.m_end] += self.cae * 2 * cae_grad.ravel()

        dsc_dha = Dsigmoid(hddn) * gdot(delta, params[: self.m_end].reshape(self.shape))

        g[: self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end : -self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Ejemplo n.º 2
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.m_end + self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        w = params[:self.m_end].reshape(self.shape)
        cae = gpu.sum(
            gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0))
        cae *= self.cae

        _, delta = self.score(Z, inpts, error=True, addon=cae)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        cae_grad = gpu.mean(Dsigmoid(hddn)**2, axis=0) * w
        cae_grad += (gdot(inpts.T, (Dsigmoid(hddn)**2 * (1 - 2 * hddn))) / m *
                     gpu.sum(w**2, axis=0))
        g[:self.m_end] += self.cae * 2 * cae_grad.ravel()

        dsc_dha = Dsigmoid(hddn) * gdot(
            delta, params[:self.m_end].reshape(self.shape))

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Ejemplo n.º 3
0
    def updateParams(self,scale,update,log=False):
	if log:
	    for w,u in zip(self.stack,update):
		wrms = gp.sqrt(gp.mean(w[0]**2))
		urms = gp.sqrt(gp.mean((scale*u[0])**2))
		print "weight rms=%f -- update rms=%f"%(wrms,urms)

        self.stack = [[ws[0]+scale*wsDelta[0],ws[1]+scale*wsDelta[1]] 
                        for ws,wsDelta in zip(self.stack,update)]
Ejemplo n.º 4
0
    def get_drop_masks(self, mask_count, in_drop=0, hd_drop=0):
        """Get mask_count dropout masks shaped for each layer in self.layers.

        Dropout masks are computed based on drop rates self.drop_input and
        self.drop_hidden, and self.drop_undrop. Masks are scaled so that the
        sum of each mask for a given layer is the same. If in_drop == 1, we do
        dropping on input layer and if hd_drop == 1, we also drop hiddens.
        """
        M = []
        # Generate an 'undrop' mask, which sets some masks to be dropless
        u_mask = (gp.rand(mask_count,1) < self.drop_undrop)
        for i in range(self.layer_count):
            # Set drop_rate based on layer and in_drop/hd_drop
            drop_rate = 0.0
            if ((i == 0) and (in_drop == 1)):
                drop_rate = self.drop_input
            elif (hd_drop == 1):
                drop_rate = self.drop_hidden
            # Get mask dimension for this layer
            mask_dim = self.layers[i].dim_input
            # Generate random 'bit' mask
            d_mask = (gp.rand(mask_count, mask_dim) > drop_rate)
            # Compute bootleg 'or' with the undrop mask
            mask = ((d_mask + u_mask) > 0.1)
            # Rescale mask entries to have unit mean
            scales = 1.0 / gp.mean(mask, axis=1)
            scales = scales[:,gp.newaxis]
            mask = mask * scales
            # Record the generated mask
            M.append(mask)
        return M
Ejemplo n.º 5
0
def cov(x):
    y = gpu.mean(x, axis=1)[:, None]
    x = x.as_numpy_array().__sub__(y.as_numpy_array())
    x_T = x.T.conj()
    result = gpu.dot(x, x_T)
    result = result.__div__(x.shape[1] - 1)
    return result
Ejemplo n.º 6
0
 def constrain_weights(self):
     for i, rms_limit in enumerate(self.rms_limits):
         if not rms_limit:
             continue
         W = self.weights[i]
         rms_scale = rms_limit / gnp.sqrt(gnp.mean(W*W, axis=0))
         limit_rms = W * (1+(rms_scale < 1) * (rms_scale - 1))
         self.weights[i] = limit_rms
Ejemplo n.º 7
0
 def reconstruction_cross_entropy(self, vis):
     """Returns the cross entropy between vis and its reconstruction 
     obtained by one step of Gibbs sampling."""
     _, sampled_p_vis = self.gibbs_sample(vis, 1)
     cross_entropy = gp.mean(vis * gp.log(sampled_p_vis) - 
                             (1 - vis) * gp.log(1-sampled_p_vis),
                             axis=1)
     return cross_entropy
Ejemplo n.º 8
0
def limitColumnRMS(W, rmsLim):
    """
    All columns of W with rms entry above the limit are scaled to equal the limit.
    The limit can either be a row vector or a scalar.
    Apply to 2-d array W.
    """
    columnRMS = lambda W: gnp.sqrt(gnp.mean(W * W, axis=0))
    rmsScale = rmsLim / columnRMS(W)
    return W * (1 + (rmsScale < 1) * (rmsScale - 1))
Ejemplo n.º 9
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        w = params[: self.m_end].reshape(self.shape)
        cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0))
        cae *= self.cae

        sc = self.score(Z, inpts, addon=cae)
        return np.array([sc, cae])
Ejemplo n.º 10
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        if self.rho_hat == None:
            self.rho_hat = hddn.mean(axis=0)
        else:
            self.rho_hat *= 0.9
            self.rho_hat += 0.1*hddn.mean(axis=0)

        sparsity = self.beta * gpu.sum(bKL(self.rho, self.rho_hat))
        sc = self.score(Z, inpts, addon=sparsity)

        return np.array([sc, sc-sparsity, sparsity, gpu.mean(self.rho_hat)])
Ejemplo n.º 11
0
    def train(self):
        self.time_interval = 0
        t1 = time.time()
        cd = 1
        for current_epochs, weight_size in zip(self.epochs,
                                               self.weights_to_do):
            self.initialize_weights(weight_size)
            for epoch in xrange(current_epochs):
                error = 0
                for start_idx in range(0, self.X.shape[0], self.batch_size):
                    self.w_updt = gpu.zeros((self.input, weight_size))
                    self.bias_h_updt = gpu.zeros((1, weight_size))
                    self.bias_v_updt = gpu.zeros((1, self.input))

                    self.allocate_batch(start_idx)
                    self.input_original = self.get_visible_vector(self.batch)
                    self.input_dropped = self.input_original
                    self.positive_phase()
                    self.gibbs_updates(weight_size)
                    for j in range(cd):
                        self.negative_phase()

                    self.w += self.alpha * self.w_updt / float(
                        self.current_batch_size)
                    self.bias_h += self.alpha * self.bias_h_updt / float(
                        self.current_batch_size)
                    self.bias_v += self.alpha * self.bias_v_updt / float(
                        self.current_batch_size)
                    t0 = time.time()
                    error += gpu.mean(
                        (self.input_dropped - self.input_original)**2)
                    self.time_interval += time.time() - t0

                s = 'EPOCH: ' + str(epoch + 1)
                self.log_message(s)
                s = 'Reconstruction error: ' + str(
                    error / (self.X.shape[0] / float(self.batch_size)))
                self.log_message(s)

            self.trained_weights.append(
                [self.w.as_numpy_array(),
                 self.bias_h.as_numpy_array()])
            self.input = self.w.shape[1]

        print 'Time interval: ' + str(self.time_interval)
        print 'Training time: ' + str(time.time() - t1)

        self.free_GPU_memory()

        return self.trained_weights
Ejemplo n.º 12
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.m_end + self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        w = params[:self.m_end].reshape(self.shape)
        cae = gpu.sum(
            gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0))
        cae *= self.cae

        sc = self.score(Z, inpts, addon=cae)
        return np.array([sc, cae])
Ejemplo n.º 13
0
def xe(z, targets, predict=False, error=False, addon=0):
    """
    Cross entropy error.
    """
    if predict:
        return gpu.argmax(z, axis=1)

    _xe = z - logsumexp(z, axis=1)
    n, _ = _xe.shape
    xe = -gpu.mean(_xe[np.arange(n), targets])
    if error:
        err = gpu.exp(_xe)
        err[np.arange(n), targets] -= 1
        return xe + addon, err / n
    else:
        return xe + addon
Ejemplo n.º 14
0
  def train(self):
      epochs = 50
      batches = self.data.shape[0]
      alpha = 0.1         
      self.time_interval = 0
      t1 = time.time()
      cd = 1
      cd3 = 10
      cd10 = 15   
 
   
      for epoch in xrange(epochs):    
          error = 0   
          for i in xrange(batches):
              self.w_updt = gpu.zeros((784, 800))
              self.bias_h_updt = gpu.zeros((1,800))
              self.bias_v_updt = gpu.zeros((1,784)) 
                        
              for j in range(cd):
                  self.v_original = gpu.garray(self.data[i])
                  self.v = self.v_original                  
                  self.positive_phase()
                  self.gibbs_updates()
                  self.negative_phase()                
          
              self.w += alpha*self.w_updt/100.
              self.bias_h += alpha*self.bias_h_updt/100.
              self.bias_v += alpha*self.bias_v_updt/100.
              t0 = time.time()
              error += gpu.mean((self.v-self.v_original)**2)
              self.time_interval += time.time() - t0
              
          print 'EPOCH: ' + str(epoch + 1)
          print 'Reconstruction error: ' + str(error/batches)
          
          if epoch == cd10:
              cd = 10
          elif epoch == cd3:
              cd = 3                   
        
      print 'Time interval: ' + str(self.time_interval)
      print 'Training time: ' + str(time.time() - t1)
      np.save('/home/tim/development/RBM_w1.npy',self.w.as_numpy_array())
Ejemplo n.º 15
0
    def train(self):
        self.time_interval = 0
        t1 = time.time()
        cd = 1
        for current_epochs, weight_size in zip(self.epochs, self.weights_to_do):
            self.initialize_weights(weight_size)
            for epoch in xrange(current_epochs):
                error = 0
                for start_idx in range(0, self.X.shape[0], self.batch_size):
                    self.w_updt = gpu.zeros((self.input, weight_size))
                    self.bias_h_updt = gpu.zeros((1, weight_size))
                    self.bias_v_updt = gpu.zeros((1, self.input))

                    self.allocate_batch(start_idx)
                    self.input_original = self.get_visible_vector(self.batch)
                    self.input_dropped = self.input_original
                    self.positive_phase()
                    self.gibbs_updates(weight_size)
                    for j in range(cd):
                        self.negative_phase()

                    self.w += self.alpha * self.w_updt / float(self.current_batch_size)
                    self.bias_h += self.alpha * self.bias_h_updt / float(self.current_batch_size)
                    self.bias_v += self.alpha * self.bias_v_updt / float(self.current_batch_size)
                    t0 = time.time()
                    error += gpu.mean((self.input_dropped - self.input_original) ** 2)
                    self.time_interval += time.time() - t0

                s = "EPOCH: " + str(epoch + 1)
                self.log_message(s)
                s = "Reconstruction error: " + str(error / (self.X.shape[0] / float(self.batch_size)))
                self.log_message(s)

            self.trained_weights.append([self.w.as_numpy_array(), self.bias_h.as_numpy_array()])
            self.input = self.w.shape[1]

        print "Time interval: " + str(self.time_interval)
        print "Training time: " + str(time.time() - t1)

        self.free_GPU_memory()

        return self.trained_weights
Ejemplo n.º 16
0
Archivo: dbn.py Proyecto: evolu8/gdbn
def columnRMS(W):
    return gnp.sqrt(gnp.mean(W*W,axis=0))
Ejemplo n.º 17
0
    # train rbm
    print "Training ml.rbm..."
    rbm = mnist_rbm.train_rbm(seed=seed, plot_samples=False)

    # estimate PF using AIS
    print "Estimating partition function using %d AIS runs with %d intermediate "\
          "RBMs and %d Gibbs steps..." % (ais_runs, len(ais_betas), ais_gibbs_steps)
    ais = AnnealedImportanceSampler(rbm, ais_base_samples, ais_base_chains,
                                    ais_base_gibbs_steps_between_samples)    
    lpf, lpf_m_3s, lpf_p_3s = ais.log_partition_function(ais_betas, ais_runs, 
                                                         ais_gibbs_steps)    
    ml.rbm.log_pf = lpf

    # calculate log probability of training and test set
    tr_lp = gp.mean(rbm.normalized_log_p_vis(mnist_rbm.X))
    tst_lp = gp.mean(rbm.normalized_log_p_vis(mnist_rbm.TX))
    print "Average log p(x from training set) =  %f" % tr_lp
    print "Average log p(x from test set) =      %f" % tst_lp

    # accumulate statistics
    tr_lps.append(tr_lp)
    tst_lps.append(tst_lp)

    # save statistics
    rbmutil.leave_rbm_plot_directory()
    if cfg.use_pcd:
        pcd_str = "p"
    else:
        pcd_str = ""
    np.savez_compressed("mnist-rbm-%03d-%scd%02d-performance.npz" % 
Ejemplo n.º 18
0
 def mean(A, axis):
     return gp.mean(A, axis=axis)
Ejemplo n.º 19
0
                                 "prob.txt", clean=False)

# Build RBM
rbm = RestrictedBoltzmannMachine(0, cfg.n_vis, cfg.n_hid, 0) 

# load Ruslan's RBM
if use_ruslan:
    print "Loading Ruslan's ml.rbm..."
    mdata = scipy.io.loadmat("matlab_epoch%d.mat" % (epoch + 1))
    ml.rbm.bias_vis = gp.as_garray(mdata['visbiases'][0,:])
    ml.rbm.bias_hid = gp.as_garray(mdata['hidbiases'][0,:])
    ml.rbm.weights = gp.as_garray(mdata['vishid'])
else:
    rbmutil.load_parameters(rbm, "weights-%02i.npz" % epoch)

# load pratition function
if use_ruslan:
    filename = "matlab-lpf-%02d.npz" % (epoch+1)
else:
    filename = "lpf-%02d.npz" % epoch
print "Loading partition function %s" % filename
lpf = np.load(filename)
rbm.log_pf = lpf['lpf']

# calculate log probability of training set
tr_lp = gp.mean(rbm.normalized_log_p_vis(X))
tst_lp = gp.mean(rbm.normalized_log_p_vis(TX))

print "Average log p(x from training set) =  %f" % tr_lp
print "Average log p(x from test set) =      %f" % tst_lp
Ejemplo n.º 20
0
Archivo: ais.py Proyecto: surban/ml
 def init_using_dataset(self, vis_samples):
     "Calculates the biases of the base rate RBM using the given samples"
     epsilon = 1e-2
     vis_mean = gp.mean(vis_samples, axis=0)
     self.base_bias_vis = gp.log((vis_mean + epsilon) / (1 - vis_mean + epsilon))
Ejemplo n.º 21
0
def JointBayesian_Train(trainingset, label, fold="./"):
    if fold[-1] != '/':
        fold += '/'
    print trainingset.shape
    print trainingset[0]
    # the total num of image
    n_image = len(label)
    # the dim of features
    n_dim = trainingset.shape[1]
    # filter the complicate label,for count the total people num
    classes, labels = np.unique(label, return_inverse=True)
    # the total people num
    n_class = len(classes)
    # print classes
    # print labels
    # save each people items
    cur = {}
    withinCount = 0
    # record the count of each people
    numberBuff = np.zeros(n_image, np.float32)
    maxNumberInOneClass = 0
    for i in range(n_class):
        # get the item of i
        cur[i] = trainingset[labels == i]  # 00x
        # cur_gpu = shared(cur[i])
        # get the number of the same label persons
        n_same_label = cur[i].shape[0]

        if n_same_label > 1:
            withinCount += n_same_label
        if numberBuff[n_same_label] == 0:
            numberBuff[n_same_label] = 1
            maxNumberInOneClass = max(maxNumberInOneClass, n_same_label)
    utils.print_info("prepare done, maxNumberInOneClass=" +
                     str(maxNumberInOneClass))

    u = np.zeros([n_dim, n_class], np.float32)
    u_gpu = gpu.garray(u)
    ep = np.zeros([n_dim, withinCount], np.float32)
    ep_gpu = gpu.garray(ep)
    nowp = 0
    for i in range(n_class):
        # the mean of cur[i]
        cur_gpu = gpu.garray(cur[i])
        u_gpu[:, i] = gpu.mean(cur_gpu, 0)
        b = u_gpu[:, i].reshape(n_dim, 1)
        n_same_label = cur[i].shape[0]
        if n_same_label > 1:
            ep_gpu[:, nowp:nowp + n_same_label] = cur_gpu.T - b
            nowp += n_same_label
    utils.print_info("stage1 done")

    Su = cov(u_gpu)
    gpu.status()
    Sw = cov(ep_gpu)
    oldSw = Sw
    SuFG = {}
    SwG = {}
    convergence = 1
    min_convergence = 1
    for l in range(500):
        F = np.linalg.pinv(Sw.as_numpy_array())
        F_gpu = gpu.garray(F)
        u = np.zeros([n_dim, n_class], np.float32)
        u_gpu = gpu.garray(u)
        ep = np.zeros([n_dim, n_image], np.float32)
        ep_gpu = gpu.garray(ep)
        nowp = 0
        for mi in range(maxNumberInOneClass + 1):
            if numberBuff[mi] == 1:
                # G = −(mS μ + S ε )−1*Su*Sw−1
                temp = np.linalg.pinv(mi * Su.as_numpy_array() +
                                      Sw.as_numpy_array())
                temp2 = gpu.dot(gpu.garray(temp), Su)
                G = -gpu.dot(temp2, F_gpu)
                # Su*(F+mi*G) for u
                SuFG[mi] = gpu.dot(Su, (F_gpu + mi * G))
                # Sw*G for e
                SwG[mi] = gpu.dot(Sw, G)
        utils.print_info('stage2 done')
        # print SuFG
        for i in range(n_class):
            # print l, i
            nn_class = cur[i].shape[0]
            # print nn_class
            cur_gpu = gpu.garray(cur[i])
            # formula 7 in suppl_760
            temp = gpu.dot(SuFG[nn_class], cur_gpu.T)
            u_gpu[:, i] = gpu.sum(temp, 1)
            # formula 8 in suppl_760
            ep_gpu[:, nowp:nowp + nn_class] = cur_gpu.T + \
                gpu.sum(gpu.dot(SwG[nn_class], cur_gpu.T), 1).reshape(n_dim, 1)
            nowp = nowp + nn_class
        print 'stage2 done'

        Su = cov(u_gpu)
        Sw = cov(ep_gpu)
        convergence = np.linalg.norm(
            (Sw - oldSw).as_numpy_array()) / np.linalg.norm(
                Sw.as_numpy_array())
        utils.print_info("Iterations-" + str(l) + ": " + str(convergence))
        if convergence < 1e-6:
            print "Convergence: ", l, convergence
            break
        oldSw = Sw

        if convergence < min_convergence:
            min_convergence = convergence
            F = np.linalg.pinv(Sw.as_numpy_array())
            F_gpu = gpu.garray(F)
            G = -gpu.dot(
                gpu.dot(np.linalg.pinv((2 * Su + Sw).as_numpy_array()),
                        Su.as_numpy_array()), F_gpu)
            A = np.linalg.pinv((Su + Sw).as_numpy_array()) - \
                (F + G.as_numpy_array())
            utils.data_to_pkl(G, fold + "G.pkl")
            utils.data_to_pkl(A, fold + "A.pkl")

    F = np.linalg.pinv(Sw.as_numpy_array())
    F_gpu = gpu.garray(F)
    temp = gpu.garray(np.linalg.pinv((2 * Su + Sw).as_numpy_array()))
    G = -gpu.dot(gpu.dot(temp, Su), F_gpu).as_numpy_array()
    A = np.linalg.pinv((Su + Sw).as_numpy_array()) - (F + G)
    utils.data_to_pkl(G, fold + "G_con.pkl")
    utils.data_to_pkl(A, fold + "A_con.pkl")

    return A, G
Ejemplo n.º 22
0
def test_compare_rbm_with_ruslan():
    """Trains own RBM and compares average likelihood on training and test set 
    with RBM trained by Ruslan"""
    ref_file = "test/rbm-for-ais-test.mat"
    iterations = 10
    alpha = 0.10

    mdata = scipy.io.loadmat(ref_file)
    ref_logpf = mdata['logZZ_est'][0,0]
    ref_logpf_low = mdata['logZZ_est_down'][0,0]
    ref_logpf_high = mdata['logZZ_est_up'][0,0]
    ref_ll_training = mdata['loglik_training_est'][0,0]
    ref_ll_test = mdata['loglik_test_est'][0,0]
    n_hid = int(mdata['numhid'][0,0])
    cd = int(mdata['CD'][0,0])
    epochs = int(mdata['maxepoch'][0,0])

    os.chdir("test-tmp")   

    ll_trainings = []
    ll_tests = []
    print "Running %d iterations" % iterations
    for i in range(iterations):
        tcfg = rbm.config.TrainingConfiguration(dataset='rmnist',
                                                n_vis=784, n_hid=n_hid,
                                                batch_size=100,
                                                n_gibbs_steps=cd,
                                                epochs=epochs,
                                                step_rate=0.05,
                                                use_pcd=False,
                                                binarize_data=True,
                                                initial_momentum=0.5, final_momentum=0.9, 
                                                use_final_momentum_from_epoch=5,
                                                weight_cost=0.0002,
                                                init_weight_sigma=0.01, init_bias_sigma=0,
                                                seed=random.randint(0, 100000))
        myrbm = rbm.rbm.train_rbm(tcfg)
   
        ais = rbm.ais.AnnealedImportanceSampler(myrbm)
        ais.init_using_dataset(tcfg.X)
        betas = np.concatenate((np.linspace(0.0, 0.5,   500, endpoint=False),
                                np.linspace(0.5, 0.9,  4000, endpoint=False),
                                np.linspace(0.9, 1.0, 10000)))
        logpf, logpf_low, logpf_high = ais.log_partition_function(betas=betas,
                                                                  ais_runs=100)
        myrbm.log_pf = logpf

        print "Test:      log Z = %g (%g, %g)" % (logpf, logpf_low, logpf_high)
        print "Reference: log Z = %g (%g, %g)" % (ref_logpf, ref_logpf_low, ref_logpf_high)

        ll_training = gp.mean(myrbm.normalized_log_p_vis(tcfg.X))
        ll_test = gp.mean(myrbm.normalized_log_p_vis(tcfg.TX))
        print "Test:      Average log p(x from training set) =  %f" % ll_training
        print "Test:      Average log p(x from test set)     =  %f" % ll_test

        ll_trainings.append(ll_training)
        ll_tests.append(ll_test)


    ll_training_mean, ll_training_pm = common.stats.normal_mean(ll_trainings, alpha)
    ll_test_mean, ll_test_pm = common.stats.normal_mean(ll_tests, alpha)
    print
    print "Reference: Average log p(x from training set)   =  %f" % ref_ll_training
    print "Test:      <Average log p(x from training set)> =  %f +/- %f" % \
        (ll_training_mean, ll_training_pm)
    print "Reference: Average log p(x from test set)       =  %f" % ref_ll_test
    print "Test:      <Average log p(x from test set)>     =  %f +/- %f" % \
        (ll_test_mean, ll_test_pm)

    assert common.util.interval_contains(common.stats.normal_mean_confint(ll_trainings, 
                                                                          alpha),
                                         ref_ll_training) or ll_training_mean > ref_ll_training
    assert common.util.interval_contains(common.stats.normal_mean_confint(ll_tests, 
                                                                          alpha),
                                         ref_ll_test) or ll_test_mean > ref_ll_test
Ejemplo n.º 23
0
def test_compare_rbm_with_lisa():
    """Trains own RBM and compares average likelihood on training and test set 
    with RBM trained by the Deep Learning Tutorials"""
    ref_file = "test/lisa-rbm.npz"
    iterations = 10
    #iterations = 2
    alpha = 0.10

    common.show_progress = True

    n_vis=784
    n_hid=500
    init_weight_sigma = 4 * np.sqrt(6. / (n_hid + n_vis))

    refrbm = rbm.rbm.RestrictedBoltzmannMachine(20, n_vis, n_hid, 0)
    rbm.util.load_parameters(refrbm, ref_file)

    os.chdir("test-tmp")

    lls = []
    ref_lls = []
    print "Running %d iterations" % iterations
    for i in range(iterations):
        tcfg = rbm.config.TrainingConfiguration(dataset='mnistv',
                                                n_vis=n_vis, n_hid=n_hid,
                                                batch_size=20,
                                                n_gibbs_steps=15,
                                                epochs=15,
                                                step_rate=0.1,
                                                use_pcd=True,
                                                binarize_data=False,
                                                initial_momentum=0, final_momentum=0, 
                                                use_final_momentum_from_epoch=0,
                                                weight_cost=0,
                                                init_method='uniform', init_weight_sigma=init_weight_sigma, init_bias_sigma=0,
                                                seed=random.randint(0, 100000))
        rbm.util.enter_rbm_plot_directory(tcfg)
        myrbm = rbm.rbm.train_rbm(tcfg)
   
        # AIS on my RBM
        ais = rbm.ais.AnnealedImportanceSampler(myrbm)
        ais.init_using_dataset(tcfg.X)
        betas = np.concatenate((np.linspace(0.0, 0.5,   500, endpoint=False),
                                np.linspace(0.5, 0.9,  4000, endpoint=False),
                                np.linspace(0.9, 1.0, 10000)))
        logpf, logpf_low, logpf_high = ais.log_partition_function(betas=betas,
                                                                  ais_runs=100)
        myrbm.log_pf = logpf
        print "Test:      log Z = %g (%g, %g)" % (logpf, logpf_low, logpf_high)

        # log likelihood of my RBM
        ll = gp.mean(myrbm.normalized_log_p_vis(tcfg.X))
        lls.append(ll)
        print "Test:      Average log p(x from training set) =  %f" % ll

        # AIS on reference RBM
        refais = rbm.ais.AnnealedImportanceSampler(refrbm)
        refais.init_using_dataset(tcfg.X)
        betas = np.concatenate((np.linspace(0.0, 0.5,   500, endpoint=False),
                                np.linspace(0.5, 0.9,  4000, endpoint=False),
                                np.linspace(0.9, 1.0, 10000)))
        logpf, logpf_low, logpf_high = ais.log_partition_function(betas=betas,
                                                                  ais_runs=100)
        refrbm.log_pf = logpf
        print "Reference: log Z = %g (%g, %g)" % (logpf, logpf_low, logpf_high)

        # log likelihood of my RBM
        ref_ll = gp.mean(refrbm.normalized_log_p_vis(tcfg.X))
        ref_lls.append(ref_ll)
        print "Test:      Average log p(x from training set) =  %f" % ref_ll
     
        # print statistics
        ll_mean, ll_pm = common.stats.normal_mean(lls, alpha)
        ref_ll_mean, ref_ll_pm = common.stats.normal_mean(ref_lls, alpha)
        print "############################################################"
        print "After %d iterations:" % i
        print "Test:      <Average log p(x from training set)> =  %g +/- %g" % \
            (ll_mean, ll_pm)
        print "Reference: <Average log p(x from training set)> =  %g +/- %g" % \
            (ref_ll_mean, ref_ll_pm)

    assert common.util.interval_contains(common.stats.normal_mean_confint(lls, 
                                                                          alpha),
                                         ref_ll_mean) or ll_mean > ref_ll_mean
Ejemplo n.º 24
0
def reject_outliers(data, m=3):
	data = np.array(data)
	outlier_idx = np.where(abs(data - gpu.mean(data)) >= m * np.std(data))
	data[outlier_idx] = np.inf
	return(data)
Ejemplo n.º 25
0
def rbm_train(dataset, H, batch_size, epoch_count, epsilon, momentum, return_hidden=True, verbose=True):
    """
    Train a (binary) restricted boltzmann machine.
    
    dataset: Input data. DataSet instance or matrix of size N (number of data points) x D (input dimension)
    H: Number of hidden units
    batch_size: Number of data points in each batch
    epoch_count: Number of training epochs
    epsilon: Learning rate, either a scalar or an array (one value for each epoch)
    momentum: Momentum parameter, either a scalar or an array (one value for each epoch)
    return_hidden: If True, returns hidden unit activations for training data. 
    verbose: If True, prints progress information
    Returns w_vh (weights between visible-hidden units), w_v (visible unit
    biases), w_h (hidden unit biases), h (hidden unit activations for input data),
    error (reconstruction error at each epoch)
    """
    if isinstance(dataset, ds.DataSet):
        train_x = dataset.train.x
        N = dataset.train.N
        D = dataset.train.D
    else:
        train_x = dataset
        N = train_x.shape[0]
        D = train_x.shape[1]

    batch_count = int(np.ceil(N / float(batch_size)))

    # if momentum is a scalar, create a list with the same value for all epochs
    if not isinstance(momentum, list):
        momentum = [momentum] * epoch_count
    if not isinstance(epsilon, list):
        epsilon = [epsilon] * epoch_count

    # initialize weights
    w_vh = gnp.randn((D, H)) * 0.1
    w_v = gnp.zeros((D, 1))
    w_h = gnp.zeros((H, 1))

    # weight updates
    dw_vh = gnp.zeros((D, H))
    dw_v = gnp.zeros((D, 1))
    dw_h = gnp.zeros((H, 1))

    # hidden unit activations
    if return_hidden:
        h = np.zeros((N, H))  # keep this a numpy array to save memory
    else:
        h = []

    start_time = time.time()
    # reconstruction errors over epochs
    error = []
    batch_order = range(batch_count)
    for e in range(epoch_count):
        if verbose:
            print("Epoch " + repr(e + 1))

        batch_error = []
        processed_batch = 0
        for b in range(batch_count):
            processed_batch += 1
            if verbose:
                print("\r%d/%d" % (processed_batch, batch_count)),

            start = b * batch_size
            end = (b + 1) * batch_size if (b + 1) * batch_size < N else N
            x = train_x[start:end, :].T

            # apply momentum
            dw_vh *= momentum[e]
            dw_v *= momentum[e]
            dw_h *= momentum[e]

            # positive phase
            ahp = gnp.dot(w_vh.T, x) + w_h
            hp = gnp.logistic(ahp)

            # if it is the last epoch, store hidden unit activations
            if return_hidden and e == epoch_count - 1:
                h[start:end, :] = gnp.as_numpy_array(hp.T)

            # add positive gradient term
            dw_vh += gnp.dot(x, hp.T)
            dw_v += gnp.sum(x, axis=1)[:, gnp.newaxis]
            dw_h += gnp.sum(hp, axis=1)[:, gnp.newaxis]

            # sample hiddens
            hs = hp > gnp.rand(hp.shape[0], hp.shape[1])

            # negative phase
            avn = gnp.dot(w_vh, hs) + w_v
            vn = gnp.logistic(avn)
            ahn = gnp.dot(w_vh.T, vn) + w_h
            hn = gnp.logistic(ahn)

            dw_vh -= gnp.dot(vn, hn.T)
            dw_v -= gnp.sum(vn, axis=1)[:, gnp.newaxis]
            dw_h -= gnp.sum(hn, axis=1)[:, gnp.newaxis]

            # update weights
            w_vh += epsilon[e] / (end - start) * dw_vh
            w_v += epsilon[e] / (end - start) * dw_v
            w_h += epsilon[e] / (end - start) * dw_h

            batch_error.append(gnp.mean((vn - x) ** 2))

        # shuffle batch order
        np.random.shuffle(batch_order)

        error.append(np.mean(batch_error))
        if verbose:
            print("\nReconstruction error: " + repr(error[-1]))
            print("Elapsed time: " + str(time.time() - start_time))

    return w_vh, w_v, w_h, h, error
Ejemplo n.º 26
0
    def train(self, fulldata, num_epochs, eta=0.01, hidden=None, sample=False, early_stop=True, verbose = True):
        ''' 
        Method to learn the weights of the RBM.

        args: 
            array fulldata: the training xs
            int num_epochs: the number of times to run through the training xs
            float eta:      the learning rate, default 0.01
            array hidden:   optional array specifying the hidden representation
                            to learn (for use in a translational-RBM)
            bool sample:    specifies whether training should use sampling, 
                            default False
            bool early_stop: whether to use early stopping, default True

        '''
        if len(fulldata) == 0:
            return
        
        if type(fulldata) != self.np_array_type  or type(fulldata[0]) != self.np_array_type:
            fulldata = np.array([np.array(r) for r in fulldata])
        
        if hidden is not None:
            # check that there is a hidden rep for each xs row
            assert hidden.shape[0] == xs.shape[0]
            # check that we have the right number of hidden units
            assert hidden.shape[1] == self.n_hidden

        # these parameters control momentum changes
        initial_momentum = 0.5
        final_momentum = 0.9
        momentum_iter = 5

        # when dealing with large arrays, we have to break the xs into
        # manageable chunks to avoid out of memory mae
        num_rows = fulldata.shape[0]
        
        err_hist = [] # keep track of the errors for early stopping
        
        for epoch in range(num_epochs):
            if epoch <= momentum_iter:
                momentum = initial_momentum
            else:
                momentum = final_momentum
            
            mae = []
            if verbose:
                print "Training epoch %d of %d," %(epoch+1, num_epochs),
            
            num_batches = num_rows/self.batch_size + 1
                    
            xs = gp.garray(fulldata)
            if hidden is not None:
                hid_chunk = gp.garray(hidden)

            for batch in range(num_batches):
                # positive phase
                if num_batches == 1:
                    v1 = xs
                else:
                    v1 = xs[batch*self.batch_size:(batch+1)*self.batch_size]
                
                if len(v1) == 0:
                    continue
                
                if hidden is None:
                    h1 = self.prop_up(v1)
                else:
                    if num_batches == 1:
                        h1 = hid_chunk
                    else:
                        h1 = hid_chunk[batch*self.batch_size:(batch+1)*self.batch_size]

                # negative phase
                if sample:
                    hSampled = h1.rand() < h1
                    v2 = self.prop_down(hSampled)
                else:
                    v2 = self.prop_down(h1)
                h2 = self.prop_up(v2)
                
                # update weights
                self.wu_vh = self.wu_vh * momentum + gp.dot(v1.T, h1) - gp.dot(v2.T, h2)
                self.wu_v = self.wu_v * momentum + v1.sum(0) - v2.sum(0)
                self.wu_h = self.wu_h * momentum + h1.sum(0) - h2.sum(0)

                self.W += self.wu_vh * (eta/self.batch_size)
                self.vbias += self.wu_v * (eta/self.batch_size)
                self.hbias += self.wu_h * (eta/self.batch_size)
                
                # calculate reconstruction error
                error = gp.abs(v2 - v1)
                
                #mae.append(error.euclid_norm()**2/(self.n_visible*self.batch_size))
                mae.append(gp.mean(error))
              
            err_hist.append(np.mean(mae))
            if verbose:
                print " mean absolute error: "+ str(np.mean(mae))
                
            # early stopping
            if early_stop:
                recent_err = np.mean(err_hist[epoch-50:epoch])
                early_err = np.mean(err_hist[epoch-200:epoch-150])
                if (epoch > 250) and ((recent_err * 1.2) > early_err):
                    break
Ejemplo n.º 27
0
def columnRMS(W):
    return gnp.sqrt(gnp.mean(W * W, axis=0))