Esempio n. 1
0
 def visActProb(self):
     # negative phase
     super(DiscriminativeRBM,self).visActProb()
     self.v.apply_sigmoid()
     cm.dot(self.cW, self.h, target = self.c)
     self.c.add_col_vec(self.cb)
     softmax(self.c)
Esempio n. 2
0
    def _sample_h(self, v, x, sample=False, x_is_bias=False):
        # updates self.h
        #

        self.h = cm.empty((v.shape[0], self.output_dim))

        if x_is_bias: # Bias is precalculated
            self.h.assign(x)
        else:
            cm.dot(x, self.bg, self.h)

        self.h.add_dot(v, self.wg)

        # This is a 100 times faster than calling 'add_row_vec' to add biases.
        ones_cut = self._ones.get_col_slice(0, v.shape[0])
        self.h.add_dot(ones_cut.T, self.bhg)

        self.h.apply_sigmoid2(self.h)

        if sample:
            # Sample random values
            sampled = cm.empty((v.shape[0], self.output_dim))
            sampled.fill_with_rand()
            # Sample values of hiddens
            sampled.less_than(self.h, self.h)
Esempio n. 3
0
    def acceleration(self):
        #this sets self.hActProbs and self.normalizedVisMB and self.sqColLens
        self.hidActProbs(vis = self.negVis)
        
        cm.dot(self.factToHid, self.hActProbs, target = self.tempFactMB)
        self.tempFactMB.mult(-1)
        self.tempFactMB.mult(self.factResponses)
        cm.dot(self.visToFact, self.tempFactMB, target = self.normalizedAccel)

        #rename some things to be like Marc'Aurelio's code:
        normcoeff = self.tempRow2
        lengthsq = self.tempRow
        
        #these next few lines repeat some work, but it is too confusing to cache all this stuff at the moment
        self.sqColLens.mult(1.0/self.numVis, target = lengthsq)
        lengthsq.add(small) #self.tempRow is what Marc'Aurelio calls lengthsq
        cm.sqrt(lengthsq, target = normcoeff)
        normcoeff.mult(lengthsq) #now self.tempRow2 has what Marc'Aurelio calls normcoeff
        normcoeff.reciprocal()
        
        self.normalizedAccel.mult(self.negVis, target = self.tempVisMB)
        self.tempVisMB.sum(axis=0, target = self.tempRow3) #this tempRow stuff is getting absurd
        self.tempRow3.mult(-1.0/self.numVis)
        self.negVis.mult_by_row(self.tempRow3, target = self.tempVisMB)
        self.normalizedAccel.mult_by_row(lengthsq, target = self.accel)
        self.accel.add(self.tempVisMB)
        self.accel.mult_by_row(normcoeff)
        
        #quadratic in v term contribution to gradient
        self.accel.add(self.negVis)
        
        self.accel.mult(2) #all parts before this point have a 2 show up because of differentiation
        
        #vis bias contribution
        self.accel.add_col_mult(self.visBias, -1)
Esempio n. 4
0
def get_specrad(Ac):
        """Get spectral radius of A using the power method."""

        m_size = Ac.shape[0]

        x = np.random.normal(0, 1, (m_size, 1))

        x = x / np.linalg.norm(x)
        x = cm.CUDAMatrix(x)

        y = cm.empty((m_size, 1))
        diff = 200
        eps = 1e-3
        b = 1e10
        c = 1e9
        max_its = 1e6

        n_its = 0

        while diff > eps and n_its < max_its:
            cm.dot(Ac, x, target=y)
            norm = y.euclid_norm()
            y.divide(norm, target=x)
            a = cm.dot(y.T, x).asarray()
            c = cm.dot(x.T, x).asarray()
            diff = np.abs(a - b)
            b = float(a)
            n_its += 1

        specrad = float(a / c)
        print 'Spectral radius:', specrad, 'Number of iterations:', n_its
        return float(a / c)
Esempio n. 5
0
def rbmHtoV(m, X) :
    """convey data fron hidden layer to visible layer"""
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasV = cm.CUDAMatrix(cm.reformat(m.biasV))

    nCase = X.shape[0]
    nVis = biasV.asarray().size
    VisActP = cm.CUDAMatrix(np.zeros((nCase, nVis)))

    if m.type == "BB" :
        cm.dot(data, weight.T, target = VisActP)
        VisActP.add_row_vec(biasV)
        VisActP.apply_sigmoid()
    elif m.type == "BG" :
        cm.dot(data, weight.T, target = VisActP)
        VisActP.add_row_vec(biasV)
    elif m.type == "GB" :
        pass

    result = VisActP.asarray()

    #free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    VisActP.free_device_memory()

    cm.shutdown()

    return result
Esempio n. 6
0
File: ais.py Progetto: ANB2/deepnet
def ExactZ_binary_binary(model):
  assert len(model.layer) == 2, 'Only implemented for RBMs.'
  steps = len(schedule)
  input_layer = model.layer[0]
  hidden_layer = model.layer[1]
  edge = model.edge[0]
  w = edge.params['weight']
  a = hidden_layer.params['bias']
  b = input_layer.params['bias']
  numvis, numhid = w.shape
  batchsize = 2**numvis
  input_layer.AllocateBatchsizeDependentMemory(batchsize)
  hidden_layer.AllocateBatchsizeDependentMemory(batchsize)
  all_inputs = GetAll(numvis)
  w_ais = cm.CUDAMatrix(np.zeros((1, batchsize)))
  input_layer.sample.overwrite(all_inputs)
  cm.dot(w.T, input_layer.sample, target=hidden_layer.state)
  hidden_layer.state.add_col_vec(a)
  cm.log_1_plus_exp(hidden_layer.state)
  w_ais.add_sums(hidden_layer.state, axis=0)
  w_ais.add_dot(b.T, input_layer.state)
  offset = float(w_ais.asarray().max())
  w_ais.subtract(offset)
  cm.exp(w_ais)
  z = offset + np.log(w_ais.asarray().sum())
  return z
Esempio n. 7
0
def tests():
    a = np.random.rand(300,500)
    b = np.random.rand(500,300)

    start = timer()
    c = np.dot(a,b)
    nptime = timer()-start
    print('nptime',nptime)

    x = np.array(np.random.rand(600,1500),dtype='float32',order='F')
    y = np.array(np.random.rand(1500,300),dtype='float32',order='F')
    z = np.zeros((1000,1000),order='F',dtype='float32')

    stream = cuda.stream()

    dx = cuda.to_device(x)
    dy = cuda.to_device(y)
    dz = cuda.to_device(z)

    start = timer()
    blas.gemm('N','N',1000,1500,1000,1.0,dx,dy,0.0,dz)
    cutime = timer()-start
    print('cutime',cutime)

    #dz.copy_to_host(z)
    print(dz[0])

    c = np.ones((1000,1000),order='F',dtype='float32')
    print(c.shape)
    dc = cuda.to_device(c)

   # blockDim = (256,256)
    #gridDim = (((1000 + blockDim[0]-1)/blockDim[0]),((1000 + blockDim[1]-1)/blockDim[1]))

    blockDim = (30,30)
    gridDim = ((((c.shape[0] + blockDim[0]) - 1) / blockDim[0]), (((c.shape[1] + blockDim[1]) - 1) / blockDim[1]))

    start = timer()
    mtanh[gridDim,blockDim,stream](dc)
    tantime = timer() - start
    print('tantime',tantime)

    dc.copy_to_host(c,stream=stream)
    stream.synchronize()
    print(c)

    y = cm.CUDAMatrix(np.ones((1000,1000)))

    start = timer()
    cm.tanh(y)
    cmtan = timer()-start
    print('cmtan',cmtan)

    x = cm.CUDAMatrix(np.random.rand(1000,1500))
    y = cm.CUDAMatrix(np.random.rand(1500,1000))

    start = timer()
    cm.dot(x,y)
    cmtime = timer()-start
    print('cmtime',cmtime)
Esempio n. 8
0
 def visActProbs(self, recomputeDynamicBias):
     
     if recomputeDynamicBias:
         self.updateDynamicVisBias()
     
     cm.dot( self.visToHid, self.hActs, target = self.negVis)
     self.negVis.add(self.dynamicVisBias)
     self.negVis.add_col_vec(self.visBias)
Esempio n. 9
0
    def hidActProb(self,vis, target):
        # positive phase
#        print self.W.shape
#        print vis.shape
#        print target.shape
        cm.dot(self.W.T, vis, target = target)
        target.add_col_vec(self.hb)
        target.apply_sigmoid()
Esempio n. 10
0
 def transform(self, v, h):
     """
     Parameters:
     v : the visible input activation
     h : the target to write the hidden activation
     """
     cm.dot(self.W.T, v, target = h)
     h.add_col_vec(self.hidden_bias)
     h.apply_sigmoid()
Esempio n. 11
0
 def reverse_transform(self, h, v):
     """
     Parameters:
     h : the hidden activation
     v : the target to write the visible activation
     """
     cm.dot(self.W, h, target = v)
     v.add_col_vec(self.visible_bias)
     v.apply_sigmoid()
Esempio n. 12
0
 def rmatvec(x):
     if isinstance(x, np.ndarray):
         x.resize((x.size, 1))
         x_gpu = CUDAMatrix(x)
         return cudamat.dot(a_gpu.transpose(), x_gpu).asarray()
     elif isinstance(x, CUDAMatrix):
         x_gpu = x
         return cudamat.dot(a_gpu.transpose(), x_gpu)
     else:
         raise ValueError('Expected CUDAMatrix or ndarray')
Esempio n. 13
0
 def hidActProbsRBM(self, vis = None):
     """
     targ had better be on the gpu or None
     """
     if vis == None:
         vis = self.vis
     targ = self.hActProbsRBM
     
     cm.dot( self.visToHid.T, vis, target = targ)
     targ.add_col_vec(self.hidBiasRBM)
     self.hNetInputsRBM.assign(targ) #needed later for Hamiltonian computation
     targ.apply_sigmoid()
Esempio n. 14
0
 def hidActProbs(self, targ = None, vis = None):
     """
     targ had better be on the gpu or None
     """
     if targ == None:
         targ = self.hActProbs
     if vis == None:
         vis = self.vis
     
     cm.dot( self.visToHid.T, vis, target = targ)
     targ.add_col_vec(self.hidBias)
     targ.apply_sigmoid()
    def forward_p_single(self, single_z):
        self.single_z = single_z
        self.activation_func.apply(self.single_z)

        cm.dot(self.single_z, self.weights, self.next_single_z)

        if self.use_bias:
            self.biases.mult(
                self.activation_func.apply_scalar(1),
                self.active_biases
            )
            self.next_single_z.add_row_vec(self.active_biases)
        return self.next_single_z
    def backward_p(self, next_delta):
        # Compute weights grad.
        cm.dot(self.z.T, next_delta, self.weights_grad)

        # Compute biases grad.
        if self.use_bias:
            next_delta.sum(0, self.biases_grad)

        if self.level != 1:
            cm.dot(next_delta, self.weights.T, self.my_delta)
            self.activation_func.mult_with_derivative(self.my_delta, self.z)

        return self.my_delta
Esempio n. 17
0
  def ComputeUp(self, train=False, step=0, maxsteps=0):
    """
    Computes the state of a layer, given the state of its incoming neighbours.

    Args:
      train: True if this computation is happening during training, False during
        evaluation.
      step: Training step.
      maxsteps: Maximum number of steps that will be taken (Some hyperparameters
        may depend on this.)
    """
    logging.debug('ComputeUp in %s', self.name)
    self.dirty = False
    if self.is_input:
      self.GetData()
    else:
      for i, edge in enumerate(self.incoming_edge):
        if edge in self.outgoing_edge:
          continue
        inputs = self.incoming_neighbour[i].state
        if edge.conv:
          if i == 0:
            self.ConvolveUp(inputs, edge, self.state)
          else:
            self.AddConvoleUp(inputs, edge, self.state)
        else:
          w = edge.params['weight']
          factor = edge.proto.up_factor
          if i == 0:
            cm.dot(w.T, inputs, target=self.state)
            if factor != 1:
              self.state.mult(factor)
          else:
            self.state.add_dot(w.T, inputs, mult=factor)
      b = self.params['bias']
      if self.replicated_neighbour is None:
        self.state.add_col_vec(b)
      else:
        self.state.add_dot(b, self.replicated_neighbour.NN)
      self.ApplyActivation()

    if self.hyperparams.dropout:
      if train and maxsteps - step >= self.hyperparams.stop_dropout_for_last:
        # Randomly set states to zero.
        self.mask.fill_with_rand()
        self.mask.greater_than(self.hyperparams.dropout_prob)
        self.state.mult(self.mask)
      else:
        # Produce expected output.
        self.state.mult(1.0 - self.hyperparams.dropout_prob)
Esempio n. 18
0
    def UpdateStatesGPU(self, sType, _raaW, _raaB, _raaX, _raaY, _baaY, rDropout=0, bSample=False):

        # Compute the scale factor to compensate for dropout so that
        # average activations remain the same
        rScale = 1/(1-rDropout)
        
        # Compute activations
        cudamat.dot(_raaX, _raaW, target=_raaY)
        _raaY = _raaY.mult(rScale)
        _raaY.add_row_vec(_raaB)
            
        # Depending on the activation type...
        if (sType=="Logistic"):

            # Compute the logistic function
            _raaY.apply_sigmoid(_raaY)

        elif (sType=="Linear"):

            # Compute output layer states
            pass

        elif (sType=="HyperbolicTangent"):

            # Compute output layer states
            _raaY.apply_tanh(_raaY)
                                          
        # If stochastic binary states are required...
        if(bSample):

            # Depending on the activation type...
            if (sType=="Logistic"):

                # Sample output layer states
                _baaY.fill_with_rand()
                _baaY.less_than(_raaY)

            elif (sType=="Linear"):

                # Sample output layer states
                _baaY.fill_with_randn()
                _baaY.add(_raaY)

            elif (sType=="HyperbolicTangent"):

                # Sample output layer states
                _baaY.fill_with_rand()
                _baaY.mult(2)
                _baaY.sub(1)
                _baaY.less_than(_raaY)
Esempio n. 19
0
    def hidNetInpts(self, recomputeDynamicBias = True, targ = None, vis = None):
        """
        targ had better be on the gpu or None
        """
        if recomputeDynamicBias:
            self.updateDynamicHidBias()

        if targ == None:
            targ = self.hActProbs
        if vis == None:
            vis = self.vis
        
        cm.dot( self.visToHid.T, vis, target = targ)
        targ.add(self.dynamicHidBias)
        targ.add_col_vec(self.hidBias)
Esempio n. 20
0
def test_T_field():
    m = 256
    n = 128
    cm1 = np.array(np.random.rand(n, m)*10, dtype=np.float32, order='F')
    cm2 = np.array(np.random.rand(m, 1)*10, dtype=np.float32, order='F')
    gm1 = cm.CUDAMatrix(cm1)
    gm2 = cm.CUDAMatrix(cm2)

    # test dot
    gm = cm.dot(gm2.T, gm1.T)
    c = np.dot(cm2.T, cm1.T)
    gm.copy_to_host()

    assert np.max(np.abs(gm.numpy_array - c)) < 10**-2, "Error in CUDAMatrix.dot with TransposedCUDAMatrix exceeded threshold"

    # test add_dot
    cm3 = np.array(np.random.rand(1, n)*10, dtype=np.float32, order='F')
    gm3 = cm.CUDAMatrix(cm3)
    gm3.add_dot(gm2.T, gm1.T)
    c = cm3 + np.dot(cm2.T, cm1.T)
    gm3.copy_to_host()

    assert np.max(np.abs(gm3.numpy_array - c)) < 10**-2, "Error in CUDAMatrix.add_dot TransposedCUDAMatrix exceeded threshold"

    # test add_sums
    gm2.add_sums(gm1.T, axis = 1)
    c = cm2 + np.atleast_2d(cm1.sum(0)).T
    gm2.copy_to_host()

    assert np.max(np.abs(gm2.numpy_array - c)) < 10**-2, "Error in CUDAMatrix.add_sums TransposedCUDAMatrix exceeded threshold"
Esempio n. 21
0
    def negative_free_energy(self,gpu_data):
        """
        Computes the negative free-energy.
        Outputs a reference to a pre-allocated GPU variable
        containing the result.
        """

        cm.dot(self.W,gpu_data,self.gpu_h)
        self.gpu_h.add_col_vec(self.c)
        # to avoid memory creation, using gpu_h
        # and gpu_h_sample for these computations
        cm.exp(self.gpu_h,self.gpu_h_sample)
        self.gpu_h_sample.add(1.)
        cm.log(self.gpu_h_sample,self.gpu_h)
        self.gpu_h.sum(axis=0,target=self.gpu_negative_free_energy)
        self.gpu_negative_free_energy.add_dot(self.b.T,gpu_data)
        return self.gpu_negative_free_energy
Esempio n. 22
0
	def test(self, dev_test, dev_lbl):

		# forward pass
		cm.dot(self.w_w1.T, dev_test, target = self.h)

		self.h.add_col_vec(self.w_b1)
		self.h.apply_sigmoid()

		cm.dot(self.w_w2.T, self.h, target = self.out)

		self.out.add_col_vec(self.w_b2)
		self.out.apply_sigmoid()

		# compute error
		self.out.subtract(dev_lbl)

		print "Testing misclassification rate: " + str(np.mean(np.abs(self.out.asarray())>0.5))
Esempio n. 23
0
    def rbm_update(self,gpu_data):

        # Positive phase
        cm.dot(self.W,gpu_data,self.gpu_h)
        self.gpu_h.add_col_vec(self.c)
        self.gpu_h.apply_sigmoid()

        self.dW.mult(self.momentum)
        self.dc.mult(self.momentum)
        self.db.mult(self.momentum)
        self.dW.add_dot(self.gpu_h,gpu_data.T)
        self.dc.add_sums(self.gpu_h,axis=1,mult=1.)
        self.db.add_sums(gpu_data,axis=1,mult=1.)

        if self.use_persistent_chain:
            cm.dot(self.W,self.gpu_x_sample,self.gpu_h)
            self.gpu_h.add_col_vec(self.c)
            self.gpu_h.apply_sigmoid()

        for it in range(self.n_gibbs_steps):
            self.gpu_h_sample.fill_with_rand()
            self.gpu_h_sample.less_than(self.gpu_h)

            # Down pass
            cm.dot(self.W.T,self.gpu_h_sample,self.gpu_x)
            self.gpu_x.add_col_vec(self.b)
            self.gpu_x.apply_sigmoid()
            self.gpu_x_sample.fill_with_rand()
            self.gpu_x_sample.less_than(self.gpu_x)

            # Up pass
            cm.dot(self.W,self.gpu_x_sample,self.gpu_h)
            self.gpu_h.add_col_vec(self.c)
            self.gpu_h.apply_sigmoid()
        
        self.dW.subtract_dot(self.gpu_h,self.gpu_x_sample.T)
        self.dc.add_sums(self.gpu_h,axis=1,mult=-1.)
        self.db.add_sums(self.gpu_x_sample,axis=1,mult=-1.)

        # Update RBM
        self.W.add_mult(self.dW,alpha=self.learning_rate/self.minibatch_size)
        self.c.add_mult(self.dc,alpha=self.learning_rate/self.minibatch_size)
        self.b.add_mult(self.db,alpha=self.learning_rate/self.minibatch_size)

        #if self.print_first_row:
        #    gpu_data.copy_to_host()
        #    print gpu_data.numpy_array[:,0]
        #    self.gpu_x.copy_to_host()
        #    print self.gpu_x.numpy_array[:,0]

        # Compute reconstruction error
        self.gpu_x.subtract(gpu_data)
        err = self.gpu_x.euclid_norm()
        err = err**2
        err /= self.gpu_x.shape[1]
        return err
Esempio n. 24
0
    def CDStats(self, vis, normalizedVis, hid, posPhase):
        multiplier = 1.0 if posPhase else -1.0
        
        self.dhidBias.add_sums(hid, 1, mult = multiplier)
        self.dvisBias.add_sums(vis, 1, mult = multiplier)
        
        cm.dot(self.factToHid, hid, target = self.tempFactMB)
        self.tempFactMB.mult(self.factResponses)

        #I modified cudamat's add_dot to take a multiplier
        #need to multiply by 0.5 to make finite diffs agree
        #
        self.dfactToHid.add_dot(self.factResponsesSq, hid.T, mult = 0.5*multiplier)
        if posPhase:
            self.dvisToFact.add_dot(normalizedVis, self.tempFactMB.T)
        else:
            self.dvisToFact.subtract_dot(normalizedVis, self.tempFactMB.T)
Esempio n. 25
0
	def calculate_snprank(self, gamma, usegpu):
		"""Runs the SNPrank algorithm on the input data, using gamma as the damping factor.
		   usegpu enables GPU computing (using the CUDAMat library) for the matrix multiplication.
		   Returns the SNPrank scores and diagonal (main effect) of original GAIN matrix."""

		# A GAIN matrix is an NxN matrix
		m,n = self.GAIN.shape
		if m != n:
			raise ValueError("Input is not an NxN matrix")

		# Vector of column sums
		colsum = self.GAIN.sum(axis=0)
		
		# Get indices of c vector that are not zero
		colsum_nzidx = colsum.nonzero()[0]
		
		D = zeros((n,n))
		T_nz = ones(n)
		
		# Where a column doesn't sum to 0, the diagonal in D
		# ought to be the reciprocal of the column sum.
		# Likewize T_nz ought to be 1-gamma rather than 1.
		for i in colsum_nzidx:
			D[i][i] = 1/colsum[i]
			T_nz[i] = 1 - gamma
	
		T = zeros((n,n))
		if usegpu:
			import cudamat as cm	
			# initialize CUDAMat
			cm.init()
			
			# Copy GAIN and D matrices to GPU
			G_gpu = cm.CUDAMatrix(self.GAIN)
			D_gpu = cm.CUDAMatrix(D)
			
			# Do matrix multiplication on the GPU
			GD_prod = cm.dot(G_gpu,D_gpu)

			# Transition matrix
			T = (gamma * GD_prod.asarray() ) + (self.GAIN.diagonal().reshape(n,1) * T_nz) / self.GAIN.trace()
		else:
			# Transition matrix
			T = (gamma * dot(self.GAIN,D) ) + (self.GAIN.diagonal().reshape(n,1) * T_nz) / self.GAIN.trace()
		
		# r is an arbitrary vector, which we initialize to 1/n
		r = (ones(n)).reshape(n,1)/n;
		
		# Cutoff for matrix convergence
		threshold = 10**(-4)
		
		# Multiply r by T until r converges to within the threshold
		while True:
			r_old, r = r, normalize(dot(T,r))
			if all( abs(r-r_old) < threshold ):
				break
		        
		return r.reshape(1,n)[0], self.GAIN.diagonal()
Esempio n. 26
0
    def AccumulateDeriv(self, edge, deriv):
        """Accumulate the derivative w.r.t the outputs of this layer.

    A layer needs to compute derivatives w.r.t its outputs. These outputs may
    have been connected to lots of other nodes through outgoing edges.
    This method adds up the derivatives contributed by each outgoing edge.
    It gets derivatives w.r.t the inputs at the other end of its outgoing edge.
    Args:
      edge: The edge which is sending the derivative.
      deriv: The derivative w.r.t the inputs at the other end of this edge.
    """
        if self.is_input:
            return
        if self.dirty:  # If some derivatives have already been received.
            self.deriv.add_dot(edge.params["weight"], deriv)
        else:  # Receiving derivative for the first time.
            cm.dot(edge.params["weight"], deriv, target=self.deriv)
            self.dirty = True
	def ComputeGradientGPU(self, raaE):

		# For each layer...
		for iLayer in range(self.iLayers-1,-1,-1):

			# Measure the layer input
			# (iSamples, iFeatures) = self.oaStates[iLayer].raaX.shape
			(iFeatures, iSamples) = self.oaStates[iLayer].raaX.shape			

			# Compute the gradient of error with respect to weight
			# self.oaStates[iLayer].raaWg = numpy.dot(self.oaStates[iLayer].raaX.T, raaE)
			self.oaStates[iLayer].raaWg = cudamat.dot(self.oaStates[iLayer].raaX, raaE.T)

			# Compute gradient of error with respect to bias
			# self.oaStates[iLayer].raBg = numpy.sum(raaE,0)
			self.oaStates[iLayer].raBg = raaE.sum(1)

			# If error is needed for next layer...
			if(iLayer>0):

				# Backpropagate the error
				# raaE = numpy.dot(raaE,self.oaLayers[iLayer].raaW.T)
				raaE = cudamat.dot(self.oaLayers[iLayer].raaW.T, raaE)

				# Compute the sample count for prior layer
				# iSamples = raaE.shape[0]*self.oaLayers[iLayer].iDecimation
				iSamples = raaE.shape[1]*self.oaLayers[iLayer].iDecimation

				# Undecimate error
				# raaE = numpy.reshape(raaE,(iSamples,-1))
				iSize = numpy.prod(raaE.shape)
				iN = iSize//iSamples
				raaE.reshape((iN,iSamples))

				# Compute deferred hadamard product with derivative so shapes match
				# raaE = raaE*self.oaStates[iLayer].raaD
				raaE.mult(self.oaStates[iLayer].raaD)				

		# Get the serialized gradient vector
		raG = self.GetGradientVector()

		# Return gradient and error metrics
		# return((raG, rError, rRmse))
		return(raG)
Esempio n. 28
0
File: da.py Progetto: HelenLiGit/POT
def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
    """
    Compute the pairwise euclidean distance between matrices a and b.


    Parameters
    ----------
    a : np.ndarray (n, f)
        first matrice
    b : np.ndarray (m, f)
        second matrice
    returnAsGPU : boolean, optional (default False)
        if True, returns cudamat matrix still on GPU, else return np.ndarray
    squared : boolean, optional (default False)
        if True, return squared euclidean distance matrice


    Returns
    -------
    c : (n x m) np.ndarray or cudamat.CUDAMatrix
        pairwise euclidean distance distance matrix
    """
    # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
    # First compute in c_GPU the squared euclidean distance. And return its
    # square root. At each cell [i,j] of c, we want to have
    # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that
    # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c:
    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2).

    a_GPU = cudamat.CUDAMatrix(a)
    b_GPU = cudamat.CUDAMatrix(b)

    # Multiply a by b transpose to obtain in each cell [i,j] of c the
    # value sum{k in range(f)} ( a[i,k]b[j,k] )
    c_GPU = cudamat.dot(a_GPU, b_GPU.transpose())
    # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] )
    c_GPU.mult(-2)

    # Compute the vectors of the sum of squared elements.
    a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1)
    b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1)

    # Add the vectors in each columns (respectivly rows) of c.
    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] )
    c_GPU.add_col_vec(a_GPU)
    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2)
    c_GPU.add_row_vec(b_GPU.transpose())

    if not squared:
        c_GPU = cudamat.sqrt(c_GPU)

    if returnAsGPU:
        return c_GPU
    else:
        return c_GPU.asarray()
Esempio n. 29
0
    def backProp(self, error):
#        print 'back propagation'
        self.dW[self.H-1].add_dot(self.vis[self.H-1],error.T)
#        print 'self.vis'
#        self.vis[self.H-1].copy_to_host()
#        print self.vis[self.H-1].numpy_array
#        print 'self.dW'
#        self.dW[self.H-1].copy_to_host()
#        print self.dW[self.H-1].numpy_array
#        print 'error 2'
#        error.copy_to_host()
#        print error.numpy_array
        self.db[self.H-1].add_sums(error,axis =1 )
        for i in list(reversed(range(self.H-1))):
            delta = cm.empty((self.W[i+1].shape[0],error.shape[1]))
            cm.dot(self.W[i+1],error,target = delta)# delta : 2000*256
            learn.mult_by_sigmoid_deriv(delta, self.vis[i+1])
            self.dW[i].add_dot(self.vis[i], delta.T)
            self.db[i].add_sums(delta, axis = 1)
            error = delta
Esempio n. 30
0
 def hidActProbs(self, targ = None, vis = None):
     """
     targ had better be on the gpu or None
     """
     if targ == None:
         targ = self.hActProbs
     if vis == None:
         vis = self.vis
     
     #recall that self.acceleration calls self.hidActProbs
     normalizeInputData(vis, self.tempVisMB, self.sqColLens, self.tempRow, self.normalizedVisMB)
     
     #cm.dot(self.visToFact.T, vis, target = self.factResponses)
     cm.dot(self.visToFact.T, self.normalizedVisMB, target = self.factResponses)
     self.factResponses.mult(self.factResponses, target = self.factResponsesSq)
     cm.dot(self.factToHid.T, self.factResponsesSq, target = targ)
     
     targ.add_col_vec(self.hidBias)
     self.hNetInputs.assign(targ) #needed later in Hamiltonian computation
     targ.apply_sigmoid()
Esempio n. 31
0
def test_dot():
    m = 128
    k = 256
    n = 64
    a = np.array(np.random.randn(m, k) * 10, dtype=np.float32, order='F')
    b = np.array(np.random.randn(k, n) * 10, dtype=np.float32, order='F')
    c = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F')

    alpha = 2.
    beta = 0.3
    r = beta * c + alpha * np.dot(a, b)

    m1 = cm.CUDAMatrix(a)
    m2 = cm.CUDAMatrix(b)
    m3 = cm.CUDAMatrix(c)
    m3 = cm.dot(m1, m2, target=m3, alpha=alpha, beta=beta)
    m3.copy_to_host()

    assert np.max(np.abs(r - m3.numpy_array)
                  ) < 10**-2, "Error in CUDAMatrix.dot exceeded threshold"
Esempio n. 32
0
def Test():

    A = np.float32(np.random.randn(*(2000, 2000)))
    A = np.complex64(np.ones((2000, 2000)) + 1j * np.ones((2000, 2000)))
    AT = A.T.copy()

    A_32 = A  #np.float32(A)
    AT_32 = AT  #np.float32(AT)

    T = ClassTimeIt.ClassTimeIt()
    # create two random matrices and copy them to the GPU
    g_A0 = cm.CUDAMatrix(A)
    g_AT0 = cm.CUDAMatrix(AT)

    # perform calculations on the GPU
    P0 = cm.dot(g_AT0, g_A0).asarray()
    #d = cm.sum(axis = 0)
    T.timeit("GPU0")
    del (g_AT0, g_A0)
    #T.reinit()

    # copy d back to the host (CPU) and print

    g_A1 = gpuarray.to_gpu(A)
    g_AT1 = gpuarray.to_gpu(AT)
    #time.sleep(5)

    #T.timeit("tranf0")
    g_P1 = culinalg.dot(g_AT1, g_A1)

    P1 = g_P1.get()

    #T.timeit("tranf1")
    T.timeit("GPU1")

    np_P = np.dot(AT, A)
    T.timeit("np")
    #print g_P-np_P

    print(np.max(np_P - P0))
    print(np.max(np_P - P1))
def search(indices, feat, feature_map, ID_map):
    feature_table = None
    ID_table = None
    indices = indices[0][0:2]
    #print indices
    #print feature_map[0][0],ID_map[0][0:4]
    for category in indices:
        if feature_table is None:
            #print category
            if (feature_map[category]):
                feature_table = np.copy(feature_map[category])
            if (ID_map[category]):
                ID_table = np.copy(ID_map[category])
        else:
            if (feature_map[category]):
                feature_table = np.vstack(
                    (feature_table, feature_map[category]))
            if (ID_map[category]):
                ID_table = np.hstack((ID_table, ID_map[category]))
    #print feature_table[1]
    a = cm.CUDAMatrix(feat)
    #print feat
    c = cm.CUDAMatrix(feature_table)
    d = cm.dot(c, a)
    e = d.asarray()
    #print e
    ind = np.argsort(-e, axis=0)
    ind = ind[0:100]
    #print ind
    ID_result = ID_table[ind]
    '''
    for index in ind:
        if ID_result is None:
            ID_result = np.copy(ID_map[index]) 
        else:
            ID_result = np.hstack((ID_result, ID_map[index]))
    '''
    return ID_result
Esempio n. 34
0
    def setVariables(self):
        n, m, r = self.n, self.m, self.rank

        self.G_gpu = cm.CUDAMatrix(self.G)
        self.W_gpu = cm.CUDAMatrix(self.W)
        self.X_gpu = cm.CUDAMatrix(self.X)

        self.XTX_gpu= cm.dot(self.X_gpu.T, self.X_gpu)
        self.XTXpos_gpu = cm.empty((m,m))
        self.XTX_gpu.greater_than(0, target=self.XTXpos_gpu)
        self.XTXpos_gpu.mult(self.XTX_gpu)
        self.XTXneg_gpu = cm.empty((m,m))
        self.XTXpos_gpu.subtract(self.XTX_gpu, target=self.XTXneg_gpu)

        self.XTXnegW_gpu = cm.empty((m,r))
        self.XTXposW_gpu = cm.empty((m,r))
        self.GWT_gpu = cm.empty((m,m))
        self.update1_gpu = cm.empty((m,r))
        self.update2_gpu = cm.empty((m,r))

        self.GTG_gpu = cm.empty((r,r))
        self.XTXnegG_gpu = cm.empty((m,r))
        self.XTXposG_gpu = cm.empty((m,r))
Esempio n. 35
0
	def feed_forward(self,input=None):
		#optionally allow passing input as an argument
		if input is not None:
			self.input = input

		for index,l in enumerate(self.layer):
			if(index == 0):
				input = self.input
			else:
				input = self.layer[index-1].output

			l.input = input
			#print(str(index) + " " + str(l.weights.shape) + " " + str(l.input.shape))
			l.weighted_sums = cm.dot(l.weights,l.input)
			
			#apply activation function
			if(l.activation == 'squash'):
				pass
				#TODO: write kernal for this
				#l.output = l.weighted_sums / (1+np.abs(l.weighted_sums))
			elif(l.activation == 'sigmoid'):
				l.output = l.weighted_sums.apply_sigmoid()
			#elif(l.activation == 'linear_rectifier'):
			#	l.output = np.maximum(0,l.weighted_sums)
			else: #base case is linear
				l.output = l.weighted_sums
			#if(l.dropout is not None and self.train == True):
			#	if(l.dropout == 0.5):
			#		l.output = l.output*np.random.randint(0,2,l.output.shape);
			#	else:
			#		l.output = l.output*np.random.binomial(1,l.dropout,l.output.shape);
			#elif(l.dropout is not None and self.train == False):
			#	l.output = l.output*(1.0 - l.dropout);
		self.output = self.layer[len(self.layer)-1].output
		self.output.copy_to_host()
		self.output = self.output.numpy_array
		self.output = self.output[0:-1,:]
Esempio n. 36
0
def test_T_field():
    m = 256
    n = 128
    cm1 = np.array(np.random.rand(n, m) * 10, dtype=np.float32, order='F')
    cm2 = np.array(np.random.rand(m, 1) * 10, dtype=np.float32, order='F')
    gm1 = cm.CUDAMatrix(cm1)
    gm2 = cm.CUDAMatrix(cm2)

    # test dot
    gm = cm.dot(gm2.T, gm1.T)
    c = np.dot(cm2.T, cm1.T)
    gm.copy_to_host()

    assert np.max(
        np.abs(gm.numpy_array - c)
    ) < 10**-2, "Error in CUDAMatrix.dot with TransposedCUDAMatrix exceeded threshold"

    # test add_dot
    cm3 = np.array(np.random.rand(1, n) * 10, dtype=np.float32, order='F')
    gm3 = cm.CUDAMatrix(cm3)
    gm3.add_dot(gm2.T, gm1.T)
    c = cm3 + np.dot(cm2.T, cm1.T)
    gm3.copy_to_host()

    assert np.max(
        np.abs(gm3.numpy_array - c)
    ) < 10**-2, "Error in CUDAMatrix.add_dot TransposedCUDAMatrix exceeded threshold"

    # test add_sums
    gm2.add_sums(gm1.T, axis=1)
    c = cm2 + np.atleast_2d(cm1.sum(0)).T
    gm2.copy_to_host()

    assert np.max(
        np.abs(gm2.numpy_array - c)
    ) < 10**-2, "Error in CUDAMatrix.add_sums TransposedCUDAMatrix exceeded threshold"
Esempio n. 37
0
 def compute_energy_mcRBM_visual(self, data, normdata, energy, VF, FH,
                                 bias_cov, bias_vis, w_mean, bias_mean, t1,
                                 t2, t6, feat, featsq, feat_mean, length,
                                 lengthsq, normcoeff, small, num_vis):
     # normalize input data vectors
     data.mult(data, target=t6)  # DxP (nr input dims x nr samples)
     t6.sum(axis=0, target=lengthsq)  # 1xP
     lengthsq.mult(0.5,
                   target=energy)  # energy of quadratic regularization term
     lengthsq.mult(1. /
                   num_vis)  # normalize by number of components (like std)
     lengthsq.add(small)  # small prevents division by 0
     cmt.sqrt(lengthsq, target=length)
     length.reciprocal(target=normcoeff)  # 1xP
     data.mult_by_row(normcoeff, target=normdata)  # normalized data
     ## potential
     # covariance contribution
     cmt.dot(VF.T, normdata, target=feat)  # HxP (nr factors x nr samples)
     feat.mult(feat, target=featsq)  # HxP
     cmt.dot(FH.T, featsq, target=t1)  # OxP (nr cov hiddens x nr samples)
     t1.mult(-0.5)
     t1.add_col_vec(bias_cov)  # OxP
     cmt.exp(t1)  # OxP
     t1.add(1, target=t2)  # OxP
     cmt.log(t2)
     t2.mult(-1)
     energy.add_sums(t2, axis=0)
     # mean contribution
     cmt.dot(w_mean.T, data,
             target=feat_mean)  # HxP (nr mean hiddens x nr samples)
     feat_mean.add_col_vec(bias_mean)  # HxP
     cmt.exp(feat_mean)
     feat_mean.add(1)
     cmt.log(feat_mean)
     feat_mean.mult(-1)
     energy.add_sums(feat_mean, axis=0)
     # visible bias term
     data.mult_by_col(bias_vis, target=t6)
     t6.mult(-1)  # DxP
     energy.add_sums(t6, axis=0)  # 1xP
     # kinetic
     data.mult(data, target=t6)
     energy.add_sums(t6, axis=0, mult=.5)
Esempio n. 38
0
    def compute_output(self, gpu_data):
        """
        Computes p(y|x). Puts the result in self.gpu_p_y_given_x.
        """

        cm.dot(self.W, gpu_data, self.gpu_act_from_x)
        self.gpu_act_from_x.add_col_vec(self.c)
        for c in range(self.n_classes):
            cm.dot(self.U, self.gpu_target_vectors.slice(c, c + 1),
                   self.gpu_act_from_y)
            # to avoid memory creation, using gpu_h
            # and gpu_h_sample for these computations
            self.gpu_act_from_x.add_col_vec(self.gpu_act_from_y,
                                            target=self.gpu_h)
            cm.exp(self.gpu_h, self.gpu_h_sample)
            self.gpu_h_sample.add(1.)
            cm.log(self.gpu_h_sample, self.gpu_h)
            self.gpu_h.sum(axis=0, target=self.gpu_negative_free_energy_for_y)
            cm.dot(self.d.T,
                   self.gpu_target_vectors.slice(c, c + 1),
                   target=self.gpu_bias_from_y)
            self.gpu_negative_free_energy_for_y.add_col_vec(
                self.gpu_bias_from_y)
            self.gpu_negative_free_energy_for_y.transpose(
                target=self.gpu_negative_free_energy.slice(c, c + 1))
        # Subtracting mean for more stable softmax computation
        self.gpu_negative_free_energy.sum(
            axis=1, target=self.gpu_mean_negative_free_energy)
        self.gpu_mean_negative_free_energy.divide(-self.n_classes)
        self.gpu_negative_free_energy.add_col_vec(
            self.gpu_mean_negative_free_energy)

        cm.exp(self.gpu_negative_free_energy,
               target=self.gpu_negative_free_energy)
        self.gpu_negative_free_energy.sum(axis=1,
                                          target=self.gpu_p_y_given_x_norm)
        for c in range(self.n_classes):
            self.gpu_negative_free_energy.slice(c, c + 1).divide(
                self.gpu_p_y_given_x_norm,
                target=self.gpu_p_y_given_x.slice(c, c + 1))
        self.gpu_p_y_given_x.transpose(target=self.gpu_p_y_given_x_trans)
Esempio n. 39
0
def ff(x0_cpu):
    data_size = x0_cpu.shape[1]
    x_l0 = cm.empty((num_input, data_size))
    x_l0.assign(cm.CUDAMatrix(x0_cpu))

    x_l1 = cm.empty((num_hid, data_size))

    cm.dot(w1.T, x_l0, target=x_l1)
    x_l1.add_col_vec(b1)
    x_l1.apply_sigmoid()

    x_l2 = cm.empty((num_hid, data_size))
    del x_l0

    cm.dot(w2.T, x_l1, target=x_l2)
    x_l2.add_col_vec(b2)
    x_l2.apply_sigmoid()

    x_l3 = cm.empty((num_hid, data_size))
    del x_l1

    cm.dot(w3.T, x_l2, target=x_l3)
    x_l3.add_col_vec(b3)
    x_l3.apply_sigmoid()

    x_l4 = cm.empty((num_hid, data_size))
    del x_l2

    cm.dot(w4.T, x_l3, target=x_l4)
    x_l4.add_col_vec(b4)
    x_l4.apply_sigmoid()

    x_l5 = cm.empty((num_hid, data_size))
    del x_l3

    cm.dot(w5.T, x_l4, target=x_l5)
    x_l5.add_col_vec(b5)
    x_l5.apply_sigmoid()

    x_output = cm.empty((num_output, data_size))
    del x_l4

    tmp_x_output = cm.empty((num_output, data_size))
    tmp_x_output_sums = cm.empty((1, data_size))

    cm.dot(wo.T, x_l5, target=tmp_x_output)
    tmp_x_output.add_col_vec(bo)
    cm.exp(tmp_x_output)
    tmp_x_output.sum(axis=0, target=tmp_x_output_sums)
    tmp_x_output_sums.reciprocal()
    tmp_x_output.mult_by_row(tmp_x_output_sums)
    x_output.assign(tmp_x_output)

    x_output.mult_by_col(state_prior_gpu_rec)
    cm.log(x_output)

    x_output.mult(1. / np.log(10))

    xo = x_output.asarray()

    return xo
Esempio n. 40
0
import cudamat as cm
import numpy as np
cm.cuda_set_device(0)
cm.cublas_init()
t = np.load('/home/scw4750/frelam_20161027/get_feature/data/feature_0w-5w.npy')
t.dtype = '<f'
feat = t[0:40000]

print t
a = cm.CUDAMatrix(feat)
c = cm.dot(a, a.T)
e = cm.sqrt(c)
e = e.asarray()
#e.dtype = 'float'
print len(e)
dioa = None
for index, item in enumerate(e):
    if dioa is None:
        temp = np.array(item[index])
        dioa = np.copy(temp)
    else:
        temp = np.array(item[index])
        dioa = np.vstack((dioa, temp))
feat = t[40000:50000]

a = cm.CUDAMatrix(feat)
c = cm.dot(a, a.T)
e_2 = cm.sqrt(c)
e_2 = e_2.asarray()
print len(e_2)
for index, item in enumerate(e_2):
def heatup(duration):
    """Heat-up the GPU for a while so it enters full-performance mode"""
    t1 = time.time()
    while time.time() - t1 < duration:
        cmt.dot(cmt.empty((200, 200)), cmt.empty((200, 200)))
def bench_dot(X, Y, col, row):
    cmt.dot(X.T, Y)
Esempio n. 43
0
 def visActProbs(self):
     cm.dot(self.visToHid, self.hActs, target=self.negVis)
     self.negVis.add_col_vec(self.visBias)
Esempio n. 44
0
    def run(self, iterations):

        for i in range(0,iterations):
            # update H
            cm.dot(self.W_gpu.T, self.X_gpu, target=self.WTX_gpu)
            cm.dot(self.W_gpu.T, self.W_gpu, target=self.WTW_gpu)
            cm.dot(self.WTW_gpu, self.H_gpu, target=self.WTWH_gpu)
            self.H_gpu.mult(self.WTX_gpu).divide(self.WTWH_gpu)

            # update W
            cm.dot(self.X_gpu, self.H_gpu.T, target=self.XHT_gpu)
            cm.dot(self.W_gpu, self.H_gpu, target=self.WH_gpu)
            cm.dot(self.WH_gpu, self.H_gpu.T, target=self.WHHT_gpu)
            self.W_gpu.mult(self.XHT_gpu).divide(self.WHHT_gpu)

            # test for convergence
            if (i % self.niter_test_conv == 0) and self.checkConvergence():
                print "NMF converged after %i iterations" % i
                break
Esempio n. 45
0
    def run(self, iterations):

        for i in range(0,iterations):
            # F = XG(G.T G)^-1
            cm.dot(self.G_gpu.T, self.G_gpu, target=self.GTG_gpu)
            try:
                self.GTGpinv_gpu = cm.CUDAMatrix(np.linalg.inv(
                                                    self.GTG_gpu.asarray()))
            except LinAlgError:
                self.GTGpinv_gpu = cm.CUDAMatrix(np.linalg.pinv(
                                                    self.GTG_gpu.asarray()))
            cm.dot(self.X_gpu, self.G_gpu, target=self.XG_gpu)
            cm.dot(self.XG_gpu, self.GTGpinv_gpu, target=self.F_gpu)

            # preparation and calculation of the matrix separations
            cm.dot(self.X_gpu.T, self.F_gpu, target=self.XTF_gpu)
            cm.dot(self.F_gpu.T, self.F_gpu, target=self.FTF_gpu)

            self.XTF_gpu.greater_than(0, target=self.XTFgreater_gpu)
            self.XTF_gpu.mult(self.XTFgreater_gpu, target=self.XTFpos_gpu)
            self.XTFpos_gpu.subtract(self.XTF_gpu, target=self.XTFneg_gpu)

            self.FTF_gpu.greater_than(0, target=self.FTFgreater_gpu)
            self.FTF_gpu.mult(self.FTFgreater_gpu, target=self.FTFpos_gpu)
            self.FTFpos_gpu.subtract(self.FTF_gpu, target=self.FTFneg_gpu)

            # compute the G update
            cm.dot(self.G_gpu, self.FTFpos_gpu, target=self.GFTFpos_gpu)
            cm.dot(self.G_gpu, self.FTFneg_gpu, target=self.GFTFneg_gpu)

            self.XTFpos_gpu.add(self.GFTFneg_gpu)
            self.XTFneg_gpu.add(self.GFTFpos_gpu)
            self.XTFpos_gpu.add_scalar(10**-9)
            self.XTFneg_gpu.add_scalar(10**-9)
            self.XTFpos_gpu.divide(self.XTFneg_gpu)
            cm.sqrt(self.XTFpos_gpu)

            self.G_gpu.mult(self.XTFpos_gpu)

            # test for convergence
            if (i % self.niter_test_conv == 0) and self.checkConvergence():
                print "NMF converged after %i iterations" % i
                break
Esempio n. 46
0
    def get_gradient(self, x, n_updates=1):
        """Use Gibbs sampling to estimate the contrastive divergence gradient.

            - x: a cuda matrix having different variables on different columns and observations on the rows (context)
            - n_updates: number of CD iterations. Default value: 1

        Returns a tuple (dw, dbv, dbh, da, db) that contains the gradients of the
        weights and the biases of the visibles and the hidden respectively and
        the autoregressive gradients da and db.


        This is not the true gradient anymore as I didn't explicitly divide by
        n for the gradients that are based on sums over n datapoints.

        The BPTT gradient with respect to the reservoir recurrent and input
        weight is computed as well.
        """

        # useful quantities
        n = x.shape[0]

        w, a, b, bv, bh = self.wg, self.ag, self.bg, self.bvg, self.bhg

        # Pre-calculate dynamic biases.
        dynamic_h = cm.empty((n, self.output_dim))
        dynamic_v = cm.empty((n, self.visible_dim))

        cm.dot(x, self.ag, dynamic_v)
        cm.dot(x, self.bg, dynamic_h)

        # first update of the hidden units for the data term
        self._sample_h(self.v, dynamic_h, sample=False, x_is_bias=True)
        # n updates of both v and h for the model term
        self.h_data = cm.empty(self.h.shape)
        self.v_data = cm.empty(self.v.shape)
        self.h_data.assign(self.h)
        self.v_data.assign(self.v)
        #self._sample_h(self.v, dynamic_h, sample=True, x_is_bias=True)
        for i in range(n_updates):
            self._stochastic_h()
            self._sample_v(self.h, dynamic_v, x_is_bias=True)
            self._sample_h(self.v, dynamic_h, sample=False, x_is_bias=True)

        # Is preallocating really that "bad" for for example data_term?
        # find dw
        dw = cm.empty(self.w.shape)
        cm.dot(self.v_data.T, self.h_data, dw)
        dw.subtract_dot(self.v.T, self.h)

        # find da
        d_v = cm.empty(self.v.shape)  # TODO: perhaps this is inefficient...
        da = cm.empty(self.a.shape)
        self.v_data.subtract(self.v, d_v)
        cm.dot(x.T, d_v, da)

        # find db
        d_h = cm.empty(self.h.shape)  # TODO: perhaps this is inefficient...
        # TODO: I should probably just compute the gradient with respect to the
        # biases once and use that for both updating matrix b and the biases
        # itself.
        db = cm.empty(self.b.shape)
        self.h_data.subtract(self.h, d_h)
        cm.dot(x.T, d_h, db)

        # find dbv
        dbv = cm.empty((1, self.visible_dim))
        self.v_data.sum(axis=0, target=dbv)
        dbv.add_sums(self.v, axis=0, mult=-1.0)  # Subtract sum

        # find dbh
        dbh = cm.empty((1, self.output_dim))
        self.h_data.sum(axis=0, target=dbh)
        dbh.add_sums(self.h, axis=0, mult=-1.0)  # Subtract sum

        #### BPTT code ####
        # TODO: Some of the computations above should be combined with the
        # gradient calculation here.

        d_reservoir = cm.empty((self.context_dim, n))

        # Do some transposes because get_col_slice is faster than get_row_slice.
        x_T = x.transpose()
        d_h_T = d_h.transpose()
        d_v_T = d_v.transpose()

        # Pre-calculate the tanh derivatives
        dtanh = cm.empty(x_T.shape)
        x_T.apply_dtanh(target=dtanh)

        # Last state gets no gradient information from the future
        drt = d_reservoir.get_col_slice(n - 1, n)
        drt.assign(0)

        # Main BPTT loop
        for i in range(n - 1, 0, -1):
            drt = d_reservoir.get_col_slice(i, i + 1)
            dr_pre_t = d_reservoir.get_col_slice(i - 1, i)
            d_vt = d_v_T.get_col_slice(i, i + 1)
            d_ht = d_h_T.get_col_slice(i, i + 1)

            # Add visible component
            # TODO: I could actually pre-calculate this outside the loop
            drt.add_dot(self.ag, d_vt)

            # Add hidden component
            drt.add_dot(self.bg, d_ht)

            # Mult with derivative
            drt.mult(dtanh.get_col_slice(i, i + 1))

            # Backpropagate
            cm.dot(self.reservoir.w.T, drt, dr_pre_t)

        d_vt = d_v_T.get_col_slice(0, 1)
        d_ht = d_h_T.get_col_slice(0, 1)
        dr_pre_t = d_reservoir.get_col_slice(0, 1)

        # Add visible component
        dr_pre_t.add_dot(self.ag, d_vt)

        # Add hidden component
        dr_pre_t.add_dot(self.bg, d_ht)

        # Mult with derivative
        dr_pre_t.mult(dtanh.get_col_slice(0, 1))

        # Compute weight derivatives
        dw_res = cm.empty(self.reservoir.w.shape)
        dw_res_in = cm.empty(self.reservoir.w_in.shape)

        # dw_res <- d_reservoir * x(t-1)
        # The first state has obviously no previous state so we can ignore it.
        cm.dot(d_reservoir.get_col_slice(1, n),
               x_T.get_col_slice(0, n - 1).T,
               target=dw_res)
        # dw_res_in <- d_reservoir * v
        cm.dot(d_reservoir, self.v_data, target=dw_res_in)

        ###################

        return (dw, dbv, dbh, da, db, dw_res, dw_res_in)
Esempio n. 47
0
    def get_CD_gradient(self, x, n_updates=1):
        """Use Gibbs sampling to estimate the contrastive divergence gradient.

            - x: a cuda matrix having different variables on different columns and observations on the rows (context)
            - n_updates: number of CD iterations. Default value: 1

        Returns a tuple (dw, dbv, dbh, da, db) that contains the gradients of the
        weights and the biases of the visibles and the hidden respectively and
        the autoregressive gradients da and db.


        This is not the true gradient anymore as I didn't explicitly divide by
        n for the gradients that are based on sums over n datapoints.
        """

        # useful quantities
        n = x.shape[0]

        w, a, b, bv, bh = self.wg, self.ag, self.bg, self.bvg, self.bhg

        # Pre-calculate dynamic biases.
        dynamic_h = cm.empty((n, self.output_dim))
        dynamic_v = cm.empty((n, self.visible_dim))

        cm.dot(x, self.ag, dynamic_v)
        cm.dot(x, self.bg, dynamic_h)

        # first update of the hidden units for the data term
        self._sample_h(self.v, dynamic_h, sample=False, x_is_bias=True)
        # n updates of both v and h for the model term

        # TODO: I set things back to sutskever's way of sampling but should
        # really compare it to Ben's method some time.
        self.h_data = cm.empty(self.h.shape)
        self.v_data = cm.empty(self.v.shape)
        self.h_data.assign(self.h)
        self.v_data.assign(self.v)
        for i in range(n_updates):
            self._stochastic_h()
            self._sample_v(self.h, dynamic_v, x_is_bias=True)
            self._sample_h(self.v, dynamic_h, sample=False, x_is_bias=True)

        # Is preallocating really that "bad" for for example data_term?
        # find dw
        dw = cm.empty(self.w.shape)
        cm.dot(self.v_data.T, self.h_data, dw)
        dw.subtract_dot(self.v.T, self.h)

        # find da
        temp = cm.empty(self.v.shape)  # TODO: perhaps this is inefficient...
        da = cm.empty(self.a.shape)
        self.v_data.subtract(self.v, temp)
        cm.dot(x.T, temp, da)

        # find db
        temp = cm.empty(self.h.shape)  # TODO: perhaps this is inefficient...
        db = cm.empty(self.b.shape)
        self.h_data.subtract(self.h, temp)
        cm.dot(x.T, temp, db)

        # find dbv
        dbv = cm.empty((1, self.visible_dim))
        self.v_data.sum(axis=0, target=dbv)
        dbv.add_sums(self.v, axis=0, mult=-1.0)  # Subtract sum

        # find dbh
        dbh = cm.empty((1, self.output_dim))
        self.h_data.sum(axis=0, target=dbh)
        dbh.add_sums(self.h, axis=0, mult=-1.0)  # Subtract sum

        return (dw, dbv, dbh, da, db)
Esempio n. 48
0
for epoch in range(epochs):
    for xt, yt in mdp.utils.progressinfo(zip(x[0:n_train_samples],
                                                y[0:n_train_samples])):
        batch_size = xt.shape[0] / 2
        state = crbm.reservoir.simulate(cm.CUDAMatrix(xt))

        crbm.v = cm.CUDAMatrix(xt[:batch_size, :])
        crbm.train(state.get_row_slice(0, batch_size), decay=0, epsilon=.001, momentum=.9)
        crbm.v = cm.CUDAMatrix(xt[batch_size:, :])
        crbm.train(state.get_row_slice(batch_size, xt.shape[0]), decay=0, epsilon=.001, momentum=.9)
        
    print 'epoch', epoch, 'finished'
    error = 0
    for xt, yt in mdp.utils.progressinfo(zip(x[0:n_train_samples],
                                                y[0:n_train_samples])):
        state = crbm.reservoir.simulate(cm.CUDAMatrix(xt))
        v = cm.CUDAMatrix(sp.random.normal(0, 1, (xt.shape)))
        crbm.v = v
        n = xt.shape[0]
        dynamic_h = cm.empty((n, crbm.output_dim))
        dynamic_v = cm.empty((n, crbm.visible_dim))
        cm.dot(state, crbm.ag, dynamic_v)
        cm.dot(state, crbm.bg, dynamic_h)
        for i in range(25):
            crbm._sample_h(crbm.v, dynamic_h, sample=True, x_is_bias=True)
            crbm._sample_v(crbm.h, dynamic_v, x_is_bias=True)
        error += sp.mean((crbm.v.asarray() - xt) ** 2)
    print error / n_train_samples

    # Evaluate reconstruction error
Esempio n. 49
0
 def encode(self):
     cm.dot(self.visToHid.T, self.inp, target = self.hid)
Esempio n. 50
0
 def decode(self):
     cm.dot(self.hidToVis.T, self.hid, target = self.out)
Esempio n. 51
0
import numpy as np
import cudamat as cm

cm.cublas_init()

# create two random matrices and copy them to the GPU
a = cm.CUDAMatrix(np.random.rand(32, 256))
b = cm.CUDAMatrix(np.random.rand(256, 32))

# perform calculations on the GPU
c = cm.dot(a, b)
d = c.sum(axis = 0)

# copy d back to the host (CPU) and print
print d.asarray()
Esempio n. 52
0
    def forward_prop(self, x, thetas):
        num_thetas = len(thetas)
        # add ones to end
        cm.dot(x, self.layer_expand_mask[0], self.activ_layers_temp[0])
        cm.dot(self.activ_layers[0], self.clear_vec, self.activ_layers[0])
        self.activ_layers[0].add(self.activ_layers_temp[0])
        cm.dot(self.activ_layers[0], thetas[0].T, self.z[1])
        for i in range(1, num_thetas):
            cm.dot(self.z[i].apply_sigmoid(), self.layer_expand_mask[i],
                   self.activ_layers_temp[i])
            cm.dot(self.activ_layers[i], self.clear_vec2, self.activ_layers[i])

            self.activ_layers[i].add(self.activ_layers_temp[i])
            cm.dot(self.activ_layers[i], thetas[i].T, self.z[i + 1])
        self.z[num_thetas].apply_sigmoid(self.activ_layers[num_thetas])
        #print self.activ_layers[num_thetas].asarray()
        return self.activ_layers[num_thetas], self.activ_layers
Esempio n. 53
0
    
    W1 = np.array(W1, order="F")
    #a = np.array(a, order="C")

    W2=hfl_data['W2'] 
    #W2 = np.array(W2, order="F")

    W3=hfl_data['W3'] 
    #W3 = np.array(W3, order="F")


    #a=cm.CUDAMatrix(feat_mat)
    b=cm.CUDAMatrix(W1.T)

    
    a1=cm.dot(a.transpose(),b)
    #a1=cm.dot(a,b)
    a1.mult(1.7159)
    a1.mult(2.0/3.0)

    a1=a1.asarray()



    a1=np.tanh(a1)
    a1=a1*0.5 # accounting for droput
    a1=np.c_[np.ones(num_candidates),a1] #adding bias


    a=cm.CUDAMatrix(a1.T)
    b=cm.CUDAMatrix(W2)
Esempio n. 54
0
    def _calculate_moments_ns(self, x, ws, quick=False):
        """Calculate moments based on the weights and samples. We also calculate and save MI, TC, additivity, and
        the value of the objective. Note it is assumed that <X_i^2> = 1! """
        m = {}  # Dictionary of moments
        eps = 10**-8
        if self.gpu:
            y = cm.empty((self.n_samples, self.m))
            wc = cm.CUDAMatrix(ws)
            cm.dot(x, wc.T,
                   target=y)  # + noise, but it is included analytically
            del wc
            tmp_sum = np.einsum(
                'lj,lj->j', y.asarray(),
                y.asarray())  # TODO: Should be able to do on gpu...
        else:
            y = x.dot(ws.T)
            tmp_sum = np.einsum('lj,lj->j', y, y)
        m["uj"] = (
            1 - self.eps**2) * tmp_sum / self.n_samples + self.eps**2 * np.sum(
                ws**2, axis=1)
        #if quick and np.max(m["uj"]) >= 1.:
        #    return False
        if self.gpu:
            tmp = cm.empty((self.nv, self.m))
            cm.dot(x.T, y, target=tmp)
            tmp_dot = tmp.asarray()
            del tmp
            del y
        else:
            tmp_dot = x.T.dot(y)
        m["rho"] = (
            1 - self.eps**
            2) * tmp_dot.T / self.n_samples + self.eps**2 * ws  # m by nv
        m["ry"] = ws.dot(m["rho"].T)  # normalized covariance of Y
        m["Y_j^2"] = self.yscale**2 / (1. - m["uj"] + eps)
        np.fill_diagonal(m["ry"], 1)
        m["invrho"] = 1. / (1. - m["rho"]**2 + eps)
        m["rhoinvrho"] = m["rho"] * m["invrho"]
        m["Qij"] = np.dot(m['ry'], m["rhoinvrho"])
        m["Qi"] = np.einsum('ki,ki->i', m["rhoinvrho"], m["Qij"])
        #m["Qi-Si^2"] = np.einsum('ki,ki->i', m["rhoinvrho"], m["Qij"])
        m["Si"] = np.sum(m["rho"] * m["rhoinvrho"], axis=0)

        # This is the objective, a lower bound for TC
        m["TC"] = np.sum(np.log(1 + m["Si"])) \
                     - 0.5 * np.sum(np.log(1 - m["Si"]**2 + m["Qi"]+eps)) \
                     + 0.5 * np.sum(np.log(1 - m["uj"]+eps))

        if not quick:
            m["MI"] = -0.5 * np.log1p(-m["rho"]**2)
            m["X_i Y_j"] = m["rho"].T * np.sqrt(m["Y_j^2"])
            m["X_i Z_j"] = np.linalg.solve(m["ry"], m["rho"]).T
            m["X_i^2 | Y"] = (
                1. - np.einsum('ij,ji->i', m["X_i Z_j"], m["rho"])).clip(1e-6)
            m['I(Y_j ; X)'] = 0.5 * np.log(m["Y_j^2"]) - 0.5 * np.log(
                self.yscale**2)
            m['I(X_i ; Y)'] = -0.5 * np.log(m["X_i^2 | Y"])
            m["TCs"] = m["MI"].sum(axis=1) - m['I(Y_j ; X)']
            m["TC_no_overlap"] = m["MI"].max(axis=0).sum(
            ) - m['I(Y_j ; X)'].sum(
            )  # A direct calculation of TC where each variable is in exactly one group.
            m["TC_direct"] = m['I(X_i ; Y)'].sum() - m[
                'I(Y_j ; X)']  # A direct calculation of TC. Should be upper bound for "TC", "TC_no_overlap"
            m["additivity"] = (m["MI"].sum(axis=0) - m['I(X_i ; Y)']).sum()
        return m
Esempio n. 55
0
 def getW(self):
     return(cm.dot(self.X_gpu, self.W_gpu).asarray())
Esempio n. 56
0
    def costAndGrad(self, data, labels=None):

        T = data.shape[1]
        self.setViews(T)

        if self.temporalLayer > 0:
            stack = self.stack[:-1]
            wt, _ = self.stack[-1]
            if self.train:
                grad = self.grad[:-1]
                dwt, _ = self.grad[-1]
        else:
            stack = self.stack
            if self.train:
                grad = self.grad

        # forward prop
        self.hActs[0].assign(cm.CUDAMatrix(data))

        i = 1
        for w, b in stack:
            cm.dot(w, self.hActs[i - 1], self.hActs[i])
            self.hActs[i].add_col_vec(b)

            # forward prop through time
            if i == self.temporalLayer:
                for t in xrange(1, T):
                    self.hActs[i].minmax(0.0, self.maxAct, col=t - 1)
                    cm.mvdot_col_slice(wt,
                                       self.hActs[i],
                                       t - 1,
                                       self.hActs[i],
                                       t,
                                       beta=1.0)
                self.hActs[i].minmax(0.0, self.maxAct, col=T - 1)

            if i <= self.numLayers and i != self.temporalLayer:
                # hard relu
                self.hActs[i].maximum(0.0)
            i += 1

        # Subtract max activation
        self.hActs[-1].max(axis=0, target=self.rowVec)
        self.hActs[-1].add_row_mult(self.rowVec, -1.0, target=self.probs)

        # Softmax
        cm.exp(self.probs)
        self.probs.sum(axis=0, target=self.rowVec)
        cm.pow(self.rowVec, -1.0, target=self.rowVec)
        self.probs.mult_by_row(self.rowVec)

        self.probs.copy_to_host()
        if not self.train:
            return ctc.decode_best_path(
                self.probs.numpy_array.astype(np.float64))

        cost, deltas, skip = ctc.ctc_loss(self.probs.numpy_array.astype(
            np.float64),
                                          labels,
                                          blank=0)

        if skip:
            return cost, self.grad, skip

        self.deltasC.assign(cm.CUDAMatrix(deltas))

        # back prop
        i = self.numLayers
        deltasIn, deltasOut = self.deltasC, self.deltasOut
        for w, b in reversed(stack):
            # compute gradient
            cm.dot(deltasIn, self.hActs[i].T, target=grad[i][0])
            deltasIn.sum(axis=1, target=grad[i][1])

            # compute next layer deltas
            if i > 0:
                cm.dot(w.T, deltasIn, target=deltasOut)

            # backprop through time
            if i == self.temporalLayer:
                self.hActs[i].within(0.0, self.maxAct, target=self.tmpGrad)
                self.deltaTemp.assign(0.0)
                for t in xrange(T - 1, 0, -1):
                    # Add in temporal delta
                    cm.mvdot_col_slice(wt.T,
                                       self.deltaTemp,
                                       t,
                                       deltasOut,
                                       t,
                                       beta=1.0)

                    # Push through activation fn
                    deltasOut.mult_slice(t, self.tmpGrad, t)
                    self.deltaTemp.set_single_col(t - 1, deltasOut, t)

                # Accumulate temporal gradient
                cm.dot(self.deltaTemp, self.hActs[i].T, target=dwt)

                cm.mvdot_col_slice(wt.T,
                                   self.deltaTemp,
                                   0,
                                   deltasOut,
                                   0,
                                   beta=1.0)
                deltasOut.mult_slice(0, self.tmpGrad, 0)

            if i > 0 and i != self.temporalLayer:
                self.hActs[i].sign(target=self.tmpGrad)
                deltasOut.mult(self.tmpGrad)

            if i == self.numLayers:
                deltasIn = self.deltasIn

            deltasIn, deltasOut = deltasOut, deltasIn
            i -= 1

        return cost, self.grad, skip
Esempio n. 57
0
    def run(self, iterations):

        for i in range(0,iterations):

            cm.dot(self.XTXneg_gpu, self.W_gpu, target=self.XTXnegW_gpu)
            cm.dot(self.XTXpos_gpu, self.W_gpu, target=self.XTXposW_gpu)

            # Update G
            cm.dot(self.G_gpu, self.W_gpu.T, target=self.GWT_gpu)
            # G *= np.sqrt((XTXposW + np.dot(GWT, XTXnegW))
            #              /(XTXnegW+np.dot(GWT, XTXposW)))
            cm.dot(self.GWT_gpu, self.XTXnegW_gpu, target=self.update1_gpu)
            cm.dot(self.GWT_gpu, self.XTXposW_gpu, target=self.update2_gpu)
            self.update1_gpu.add(self.XTXposW_gpu)
            self.update2_gpu.add(self.XTXnegW_gpu)
            self.update2_gpu.add_scalar(10**-9)
            self.update1_gpu.divide(self.update2_gpu)
            cm.sqrt(self.update1_gpu)
            self.G_gpu.mult(self.update1_gpu)

            # Update W
            cm.dot(self.G_gpu.T, self.G_gpu, target=self.GTG_gpu)
            #W *= np.sqrt((np.dot(XTXpos, G) + np.dot(XTXnegW, GTG))
            #                                  / (np.dot(XTXneg, G)
            #                                + np.dot(XTXposW, GTG)))
            cm.dot(self.XTXpos_gpu, self.G_gpu, target=self.XTXposG_gpu)
            cm.dot(self.XTXneg_gpu, self.G_gpu, target=self.XTXnegG_gpu)
            cm.dot(self.XTXnegW_gpu, self.GTG_gpu, target=self.update1_gpu)
            cm.dot(self.XTXposW_gpu, self.GTG_gpu, target=self.update2_gpu)
            self.update1_gpu.add(self.XTXposG_gpu)
            self.update2_gpu.add(self.XTXnegG_gpu)
            self.update2_gpu.add_scalar(10**-9)
            self.update1_gpu.divide(self.update2_gpu)
            cm.sqrt(self.update1_gpu)
            self.W_gpu.mult(self.update1_gpu)

            # test for convergence
            if (i % self.niter_test_conv == 0) and self.checkConvergence():
                print "NMF converged after %i iterations" % i
                break
Esempio n. 58
0
    def train(self):
        '''
        Main train function : modified version of the original train function.
        Additions : GPU selection (useful for multi-GPU machines)
					Saving the sum of the square of the data for post-processing
					Visible data are saved
					Data samples are permuted for training
					Weights are saved every 100 training epochs
					Training energy is visualized every 100 training epochs
		NOTE : anneal learning rate used in the initial code, is NOT used here!
        '''
        #plt.ion()
        f1 = plt.figure()
        ax1 = f1.add_subplot(111)
        #ax2 = f1.add_subplot(122)
        #plt.show()

        cmt.cuda_set_device(self.gpuId)
        cmt.cublas_init()
        cmt.CUDAMatrix.init_random(1)

        np.random.seed(self.npRandSeed)
        prng = RandomState(self.npRandState)

        ################################################################
        ##################### CHANGE PATH ##############################
        # Move to current experiment path:
        os.chdir(self.saveDir)
        # Get current path:
        os.getcwd()

        self.plotsDir = 'plots'
        #self.probabilitiesDir = 'p_all'
        if not os.path.isdir(self.plotsDir):
            os.makedirs(self.plotsDir)
        if not os.path.isdir(self.plotsDir + '/energy'):
            os.makedirs(self.plotsDir + '/energy')
        #if not os.path.isdir(self.probabilitiesDir):
        #	os.makedirs(self.probabilitiesDir)
        if not os.path.isdir('weights'):
            os.makedirs('weights')

        d = self.d.astype(np.float32)
        print("visible size: ", d.shape)

        dsq = np.square(d)
        lsq = np.sum(dsq, axis=0)
        with open('lsqComplete.pkl', 'wb') as pklFile:
            cPickle.dump(lsq, pklFile)

        del dsq, lsq

        # Save visible data :
        visData = d
        np.savez('visData.npz',
                 data=d,
                 obsKeys=self.obsKeys,
                 epochTime=self.epochTime)

        with open('visData.txt', 'w') as f:
            f.write("\n Dataset : %s" % (self.dataFilename))
            f.write("\n visData size: %s " % str(visData.shape))
            f.write("\n visData type: %s " % str(visData.dtype))
            f.write("\n \n visData Range: %s " %
                    str(np.max(visData, axis=0) - np.min(visData, axis=0)))
            f.write("\n \n visData min: %s " % str(np.min(visData, axis=0)))
            f.write("\n \n visData max: %s " % str(np.max(visData, axis=0)))
            f.write("\n \n visData mean: %s " % str(np.mean(visData, axis=0)))
            f.write("\n \n visData std: %s " % str(np.std(visData, axis=0)))
            f.close()

        del visData  #if not needed for computing the latent states

        permIdx = prng.permutation(d.shape[0])

        d = d[permIdx, :]

        #subsetting train and test datasets
        #trainPerc = 0.7
        #trainSampNum = int(np.ceil(trainPerc*d.shape[0]))
        #trainSampNum = int(np.floor(trainSampNum/self.batch_size)*self.batch_size)
        #testSampNum = int(d.shape[0]-trainSampNum-1)

        # The test dataset is not used at the moment, it can be used as
        # a validation set to check for overfitting. To use it, uncomment
        # all the variables with 'test' in their name

        #~ d_test = d[trainSampNum+1:,:]
        #d = d[:trainSampNum,:]
        #obsKeys = self.obsKeys[:trainSampNum]

        totnumcases = d.shape[0]
        num_vis = d.shape[1]

        num_batches = int(totnumcases / self.batch_size)
        print("num_batches: ", num_batches)
        dev_dat = cmt.CUDAMatrix(d.T)  # VxP
        #~ test_dat = cmt.CUDAMatrix(d_test.T)

        del d, self.d, self.epochTime, self.obsKeys

        # training parameters (as in the original code by Ranzato)
        epsilon = self.epsilon
        epsilonVF = 2 * epsilon
        epsilonFH = 0.02 * epsilon
        epsilonb = 0.02 * epsilon
        epsilonw_mean = 0.2 * epsilon
        epsilonb_mean = 0.1 * epsilon
        weightcost_final = self.weightcost_final

        # HMC setting
        hmc_step_nr = self.hmc_step_nr
        hmc_step = 0.01
        hmc_target_ave_rej = self.hmc_target_ave_rej
        hmc_ave_rej = hmc_target_ave_rej

        # initialize weights
        VF = cmt.CUDAMatrix(
            np.array(0.02 * prng.randn(num_vis, self.num_fac),
                     dtype=np.float32,
                     order='F'))  # VxH
        if self.apply_mask == 0:
            FH = cmt.CUDAMatrix(
                np.array(np.eye(self.num_fac, self.num_hid_cov),
                         dtype=np.float32,
                         order='F'))  # HxO
        else:
            dd = loadmat(
                'your_FHinit_mask_file.mat'
            )  # see CVPR2010paper_material/topo2D_3x3_stride2_576filt.mat for an example
            FH = cmt.CUDAMatrix(np.array(dd["FH"], dtype=np.float32,
                                         order='F'))
        bias_cov = cmt.CUDAMatrix(
            np.array(2.0 * np.ones((self.num_hid_cov, 1)),
                     dtype=np.float32,
                     order='F'))
        bias_vis = cmt.CUDAMatrix(
            np.array(np.zeros((num_vis, 1)), dtype=np.float32, order='F'))
        w_mean = cmt.CUDAMatrix(
            np.array(0.05 * prng.randn(num_vis, self.num_hid_mean),
                     dtype=np.float32,
                     order='F'))  # VxH
        bias_mean = cmt.CUDAMatrix(
            np.array(-2.0 * np.ones((self.num_hid_mean, 1)),
                     dtype=np.float32,
                     order='F'))

        # initialize variables to store derivatives
        VFinc = cmt.CUDAMatrix(
            np.array(np.zeros((num_vis, self.num_fac)),
                     dtype=np.float32,
                     order='F'))
        FHinc = cmt.CUDAMatrix(
            np.array(np.zeros((self.num_fac, self.num_hid_cov)),
                     dtype=np.float32,
                     order='F'))
        bias_covinc = cmt.CUDAMatrix(
            np.array(np.zeros((self.num_hid_cov, 1)),
                     dtype=np.float32,
                     order='F'))
        bias_visinc = cmt.CUDAMatrix(
            np.array(np.zeros((num_vis, 1)), dtype=np.float32, order='F'))
        w_meaninc = cmt.CUDAMatrix(
            np.array(np.zeros((num_vis, self.num_hid_mean)),
                     dtype=np.float32,
                     order='F'))
        bias_meaninc = cmt.CUDAMatrix(
            np.array(np.zeros((self.num_hid_mean, 1)),
                     dtype=np.float32,
                     order='F'))

        # initialize temporary storage
        data = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # VxP
        normdata = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # VxP
        negdataini = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # VxP
        feat = cmt.CUDAMatrix(
            np.array(np.empty((self.num_fac, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        featsq = cmt.CUDAMatrix(
            np.array(np.empty((self.num_fac, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        negdata = cmt.CUDAMatrix(
            np.array(prng.randn(num_vis, self.batch_size),
                     dtype=np.float32,
                     order='F'))
        old_energy = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        new_energy = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        energy = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        gradient = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # VxP
        normgradient = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # VxP
        thresh = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        feat_mean = cmt.CUDAMatrix(
            np.array(np.empty((self.num_hid_mean, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        vel = cmt.CUDAMatrix(
            np.array(prng.randn(num_vis, self.batch_size),
                     dtype=np.float32,
                     order='F'))
        length = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # 1xP
        lengthsq = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # 1xP
        normcoeff = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # 1xP

        # commented to avoid computing the energy on test data
        #~ data_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F')) # Vxtest_batch
        #~ normdata_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F')) # Vxtest_batch
        #~ length_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch
        #~ lengthsq_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch
        #~ normcoeff_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch
        #~ vel_test = cmt.CUDAMatrix( np.array(prng.randn(num_vis, testSampNum), dtype=np.float32, order='F'))
        #~ feat_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F'))
        #~ featsq_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F'))
        #~ feat_mean_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_mean, testSampNum)), dtype=np.float32, order='F'))
        #~ energy_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F'))

        if self.apply_mask == 1:  # this used to constrain very large FH matrices only allowing to change values in a neighborhood
            dd = loadmat('your_FHinit_mask_file.mat')
            mask = cmt.CUDAMatrix(
                np.array(dd["mask"], dtype=np.float32, order='F'))
        normVF = 1
        small = 0.5

        # other temporary vars
        t1 = cmt.CUDAMatrix(
            np.array(np.empty((self.num_hid_cov, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t2 = cmt.CUDAMatrix(
            np.array(np.empty((self.num_hid_cov, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t3 = cmt.CUDAMatrix(
            np.array(np.empty((self.num_fac, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t4 = cmt.CUDAMatrix(
            np.array(np.empty((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t5 = cmt.CUDAMatrix(
            np.array(np.empty((1, 1)), dtype=np.float32, order='F'))
        t6 = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t7 = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t8 = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.num_fac)),
                     dtype=np.float32,
                     order='F'))
        t9 = cmt.CUDAMatrix(
            np.array(np.zeros((self.num_fac, self.num_hid_cov)),
                     dtype=np.float32,
                     order='F'))
        t10 = cmt.CUDAMatrix(
            np.array(np.empty((1, self.num_fac)), dtype=np.float32, order='F'))
        t11 = cmt.CUDAMatrix(
            np.array(np.empty((1, self.num_hid_cov)),
                     dtype=np.float32,
                     order='F'))

        # commented to avoid computing the energy on test data
        #~ t1_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, testSampNum)), dtype=np.float32, order='F'))
        #~ t2_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, testSampNum)), dtype=np.float32, order='F'))
        #~ t3_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F'))
        #~ t4_test = cmt.CUDAMatrix( np.array(np.empty((1,testSampNum)), dtype=np.float32, order='F'))
        #~ t5_test = cmt.CUDAMatrix( np.array(np.empty((1,1)), dtype=np.float32, order='F'))
        #~ t6_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F'))

        meanEnergy = np.zeros(self.num_epochs)
        minEnergy = np.zeros(self.num_epochs)
        maxEnergy = np.zeros(self.num_epochs)
        #~ meanEnergy_test = np.zeros(self.num_epochs)
        #~ minEnergy_test = np.zeros(self.num_epochs)
        #~ maxEnergy_test = np.zeros(self.num_epochs)

        # start training
        for epoch in range(self.num_epochs):

            print "Epoch " + str(epoch)

            # anneal learning rates as found in the original code -
            # uncomment if you wish to use annealing!
            #~ epsilonVFc    = epsilonVF/max(1,epoch/20)
            #~ epsilonFHc    = epsilonFH/max(1,epoch/20)
            #~ epsilonbc    = epsilonb/max(1,epoch/20)
            #~ epsilonw_meanc = epsilonw_mean/max(1,epoch/20)
            #~ epsilonb_meanc = epsilonb_mean/max(1,epoch/20)

            # no annealing is used in our experiments because learning
            # was stopping too early
            epsilonVFc = epsilonVF
            epsilonFHc = epsilonFH
            epsilonbc = epsilonb
            epsilonw_meanc = epsilonw_mean
            epsilonb_meanc = epsilonb_mean

            weightcost = weightcost_final

            if epoch <= self.startFH:
                epsilonFHc = 0
            if epoch <= self.startwd:
                weightcost = 0

            # commented to avoid computing the energy on test data
            #~ data_test = test_dat

            #~ data_test.mult(data_test, target = t6_test) # DxP
            #~ t6_test.sum(axis = 0, target = lengthsq_test) # 1xP
            #~ lengthsq_test.mult(1./num_vis) # normalize by number of components (like std)
            #~ lengthsq_test.add(small) # small avoids division by 0
            #~ cmt.sqrt(lengthsq_test, target = length_test)
            #~ length_test.reciprocal(target = normcoeff_test) # 1xP
            #~ data_test.mult_by_row(normcoeff_test, target = normdata_test) # normalized data

            for batch in range(num_batches):

                # get current minibatch
                data = dev_dat.slice(
                    batch * self.batch_size, (batch + 1) *
                    self.batch_size)  # DxP (nr dims x nr samples)

                # normalize input data
                data.mult(data, target=t6)  # DxP
                t6.sum(axis=0, target=lengthsq)  # 1xP
                lengthsq.mult(
                    1. /
                    num_vis)  # normalize by number of components (like std)
                lengthsq.add(small)  # small avoids division by 0
                cmt.sqrt(lengthsq, target=length)
                length.reciprocal(target=normcoeff)  # 1xP
                data.mult_by_row(normcoeff, target=normdata)  # normalized data
                ## compute positive sample derivatives
                # covariance part
                cmt.dot(VF.T, normdata,
                        target=feat)  # HxP (nr facs x nr samples)
                feat.mult(feat, target=featsq)  # HxP
                cmt.dot(FH.T, featsq,
                        target=t1)  # OxP (nr cov hiddens x nr samples)
                t1.mult(-0.5)
                t1.add_col_vec(bias_cov)  # OxP
                t1.apply_sigmoid(target=t2)  # OxP
                cmt.dot(featsq, t2.T, target=FHinc)  # HxO
                cmt.dot(FH, t2, target=t3)  # HxP
                t3.mult(feat)
                cmt.dot(normdata, t3.T, target=VFinc)  # VxH
                t2.sum(axis=1, target=bias_covinc)
                bias_covinc.mult(-1)
                # visible bias
                data.sum(axis=1, target=bias_visinc)
                bias_visinc.mult(-1)
                # mean part
                cmt.dot(w_mean.T, data,
                        target=feat_mean)  # HxP (nr mean hiddens x nr samples)
                feat_mean.add_col_vec(bias_mean)  # HxP
                feat_mean.apply_sigmoid()  # HxP
                feat_mean.mult(-1)
                cmt.dot(data, feat_mean.T, target=w_meaninc)
                feat_mean.sum(axis=1, target=bias_meaninc)

                # HMC sampling: draw an approximate sample from the model
                if self.doPCD == 0:  # CD-1 (set negative data to current training samples)
                    hmc_step, hmc_ave_rej = self.draw_HMC_samples(
                        data, negdata, normdata, vel, gradient, normgradient,
                        new_energy, old_energy, VF, FH, bias_cov, bias_vis,
                        w_mean, bias_mean, hmc_step, hmc_step_nr, hmc_ave_rej,
                        hmc_target_ave_rej, t1, t2, t3, t4, t5, t6, t7, thresh,
                        feat, featsq, self.batch_size, feat_mean, length,
                        lengthsq, normcoeff, small, num_vis)
                else:  # PCD-1 (use previous negative data as starting point for chain)
                    negdataini.assign(negdata)
                    hmc_step, hmc_ave_rej = self.draw_HMC_samples(
                        negdataini, negdata, normdata, vel, gradient,
                        normgradient, new_energy, old_energy, VF, FH, bias_cov,
                        bias_vis, w_mean, bias_mean, hmc_step, hmc_step_nr,
                        hmc_ave_rej, hmc_target_ave_rej, t1, t2, t3, t4, t5,
                        t6, t7, thresh, feat, featsq, self.batch_size,
                        feat_mean, length, lengthsq, normcoeff, small, num_vis)

                # compute derivatives at the negative samples
                # normalize input data
                negdata.mult(negdata, target=t6)  # DxP
                t6.sum(axis=0, target=lengthsq)  # 1xP
                lengthsq.mult(
                    1. /
                    num_vis)  # normalize by number of components (like std)
                lengthsq.add(small)
                cmt.sqrt(lengthsq, target=length)
                length.reciprocal(target=normcoeff)  # 1xP
                negdata.mult_by_row(normcoeff,
                                    target=normdata)  # normalized data
                # covariance part
                cmt.dot(VF.T, normdata, target=feat)  # HxP
                feat.mult(feat, target=featsq)  # HxP
                cmt.dot(FH.T, featsq, target=t1)  # OxP
                t1.mult(-0.5)
                t1.add_col_vec(bias_cov)  # OxP
                t1.apply_sigmoid(target=t2)  # OxP
                FHinc.subtract_dot(featsq, t2.T)  # HxO
                FHinc.mult(0.5)
                cmt.dot(FH, t2, target=t3)  # HxP
                t3.mult(feat)
                VFinc.subtract_dot(normdata, t3.T)  # VxH
                bias_covinc.add_sums(t2, axis=1)
                # visible bias
                bias_visinc.add_sums(negdata, axis=1)
                # mean part
                cmt.dot(w_mean.T, negdata, target=feat_mean)  # HxP
                feat_mean.add_col_vec(bias_mean)  # HxP
                feat_mean.apply_sigmoid()  # HxP
                w_meaninc.add_dot(negdata, feat_mean.T)
                bias_meaninc.add_sums(feat_mean, axis=1)

                # update parameters
                VFinc.add_mult(VF.sign(), weightcost)  # L1 regularization
                VF.add_mult(VFinc, -epsilonVFc / self.batch_size)
                # normalize columns of VF: normalize by running average of their norm
                VF.mult(VF, target=t8)
                t8.sum(axis=0, target=t10)
                cmt.sqrt(t10)
                t10.sum(axis=1, target=t5)
                t5.copy_to_host()
                normVF = .95 * normVF + (
                    .05 / self.num_fac) * t5.numpy_array[0, 0]  # estimate norm
                t10.reciprocal()
                VF.mult_by_row(t10)
                VF.mult(normVF)
                bias_cov.add_mult(bias_covinc, -epsilonbc / self.batch_size)
                bias_vis.add_mult(bias_visinc, -epsilonbc / self.batch_size)

                if epoch > self.startFH:
                    FHinc.add_mult(FH.sign(), weightcost)  # L1 regularization
                    FH.add_mult(FHinc, -epsilonFHc / self.batch_size)  # update
                    # set to 0 negative entries in FH
                    FH.greater_than(0, target=t9)
                    FH.mult(t9)
                    if self.apply_mask == 1:
                        FH.mult(mask)
                    # normalize columns of FH: L1 norm set to 1 in each column
                    FH.sum(axis=0, target=t11)
                    t11.reciprocal()
                    FH.mult_by_row(t11)
                w_meaninc.add_mult(w_mean.sign(), weightcost)
                w_mean.add_mult(w_meaninc, -epsilonw_meanc / self.batch_size)
                bias_mean.add_mult(bias_meaninc,
                                   -epsilonb_meanc / self.batch_size)

            if self.verbose == 1:
                print "VF: " + '%3.2e' % VF.euclid_norm(
                ) + ", DVF: " + '%3.2e' % (
                    VFinc.euclid_norm() * (epsilonVFc / self.batch_size)
                ) + ", FH: " + '%3.2e' % FH.euclid_norm(
                ) + ", DFH: " + '%3.2e' % (
                    FHinc.euclid_norm() * (epsilonFHc / self.batch_size)
                ) + ", bias_cov: " + '%3.2e' % bias_cov.euclid_norm(
                ) + ", Dbias_cov: " + '%3.2e' % (
                    bias_covinc.euclid_norm() * (epsilonbc / self.batch_size)
                ) + ", bias_vis: " + '%3.2e' % bias_vis.euclid_norm(
                ) + ", Dbias_vis: " + '%3.2e' % (
                    bias_visinc.euclid_norm() * (epsilonbc / self.batch_size)
                ) + ", wm: " + '%3.2e' % w_mean.euclid_norm(
                ) + ", Dwm: " + '%3.2e' % (
                    w_meaninc.euclid_norm() *
                    (epsilonw_meanc / self.batch_size)
                ) + ", bm: " + '%3.2e' % bias_mean.euclid_norm(
                ) + ", Dbm: " + '%3.2e' % (
                    bias_meaninc.euclid_norm() *
                    (epsilonb_meanc / self.batch_size)
                ) + ", step: " + '%3.2e' % hmc_step + ", rej: " + '%3.2e' % hmc_ave_rej
                with open('terminal.txt', 'a') as f:
                    f.write('\n' + "epoch: %s" % str(epoch) + ", VF: " +
                            '%3.2e' % VF.euclid_norm() + ", DVF: " + '%3.2e' %
                            (VFinc.euclid_norm() *
                             (epsilonVFc / self.batch_size)) + ", FH: " +
                            '%3.2e' % FH.euclid_norm() + ", DFH: " + '%3.2e' %
                            (FHinc.euclid_norm() *
                             (epsilonFHc / self.batch_size)) + ", bias_cov: " +
                            '%3.2e' % bias_cov.euclid_norm() +
                            ", Dbias_cov: " + '%3.2e' %
                            (bias_covinc.euclid_norm() *
                             (epsilonbc / self.batch_size)) + ", bias_vis: " +
                            '%3.2e' % bias_vis.euclid_norm() +
                            ", Dbias_vis: " + '%3.2e' %
                            (bias_visinc.euclid_norm() *
                             (epsilonbc / self.batch_size)) + ", wm: " +
                            '%3.2e' % w_mean.euclid_norm() + ", Dwm: " +
                            '%3.2e' % (w_meaninc.euclid_norm() *
                                       (epsilonw_meanc / self.batch_size)) +
                            ", bm: " + '%3.2e' % bias_mean.euclid_norm() +
                            ", Dbm: " + '%3.2e' %
                            (bias_meaninc.euclid_norm() *
                             (epsilonb_meanc / self.batch_size)) + ", step: " +
                            '%3.2e' % hmc_step + ", rej: " +
                            '%3.2e' % hmc_ave_rej)
                sys.stdout.flush()

            # commented to avoid computing the energy on trainig data
            self.compute_energy_mcRBM_visual(data, normdata, energy, VF, FH,
                                             bias_cov, bias_vis, w_mean,
                                             bias_mean, t1, t2, t6, feat,
                                             featsq, feat_mean, length,
                                             lengthsq, normcoeff, small,
                                             num_vis)
            energy.copy_to_host()
            meanEnergy[epoch] = np.mean(energy.numpy_array)
            minEnergy[epoch] = np.min(energy.numpy_array)
            maxEnergy[epoch] = np.max(energy.numpy_array)

            # commented to avoid computing the energy on test data
            #~ self.compute_energy_mcRBM_visual(data_test,normdata_test,energy_test,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,t1_test,t2_test,t6_test,feat_test,featsq_test,feat_mean_test,length_test,lengthsq_test,normcoeff_test,small,num_vis)
            #~ energy_test.copy_to_host()
            #~ meanEnergy_test[epoch] = np.mean(energy_test.numpy_array)
            #~ minEnergy_test[epoch] = np.min(energy_test.numpy_array)
            #~ maxEnergy_test[epoch] = np.max(energy_test.numpy_array)

            ax1.cla()
            ax1.plot(range(epoch), meanEnergy[0:epoch])
            ax1.plot(range(epoch), maxEnergy[0:epoch])
            ax1.plot(range(epoch), minEnergy[0:epoch])

            if np.mod(epoch, 100) == 0:
                #f1.savefig(output_folder + str(epoch)+'_'+'fig.png')
                f1.savefig(self.plotsDir +
                           '/energy/energyAt_%s.png' % str(epoch))

            # back-up every once in a while
            if np.mod(epoch, 100) == 0:
                VF.copy_to_host()
                FH.copy_to_host()
                bias_cov.copy_to_host()
                w_mean.copy_to_host()
                bias_mean.copy_to_host()
                bias_vis.copy_to_host()
                savemat(
                    "./weights/ws_temp%s" % str(epoch), {
                        'VF': VF.numpy_array,
                        'FH': FH.numpy_array,
                        'bias_cov': bias_cov.numpy_array,
                        'bias_vis': bias_vis.numpy_array,
                        'w_mean': w_mean.numpy_array,
                        'bias_mean': bias_mean.numpy_array,
                        'epoch': epoch
                    })

                # uncomment if computing the energy in order to store its evolution throghout training
                #~ savemat(self.refDir + '/' + "training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy,'meanEnergy_test':meanEnergy_test,'maxEnergy': maxEnergy, 'maxEnergy_test': maxEnergy_test, 'minEnergy': minEnergy, 'minEnergy_test': minEnergy_test, 'epoch':epoch})
                #savemat("training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy, 'maxEnergy': maxEnergy, 'minEnergy': minEnergy, 'epoch':epoch})

            # in order to stop the training gracefully, create an empty file
            # named 'stop_now' in the folder containing the experiment
            # configuration file
            if os.path.isfile('stop_now'):
                break

        # final back-up
        VF.copy_to_host()
        FH.copy_to_host()
        bias_cov.copy_to_host()
        bias_vis.copy_to_host()
        w_mean.copy_to_host()
        bias_mean.copy_to_host()
        savemat(
            "ws_fac%s" % str(self.num_fac) + "_cov%s" % str(self.num_hid_cov) +
            "_mean%s" % str(self.num_hid_mean), {
                'VF': VF.numpy_array,
                'FH': FH.numpy_array,
                'bias_cov': bias_cov.numpy_array,
                'bias_vis': bias_vis.numpy_array,
                'w_mean': w_mean.numpy_array,
                'bias_mean': bias_mean.numpy_array,
                'epoch': epoch
            })

        # uncomment if computing the energy in order to store its evolution throghout training
        #~ savemat(self.refDir + '/' + "training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy,'meanEnergy_test':meanEnergy_test,'maxEnergy': maxEnergy, 'maxEnergy_test': maxEnergy_test, 'minEnergy': minEnergy, 'minEnergy_test': minEnergy_test, 'epoch':epoch})
        savemat(
            "training_energy_" + str(self.num_fac) + "_cov" +
            str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {
                'meanEnergy': meanEnergy,
                'maxEnergy': maxEnergy,
                'minEnergy': minEnergy,
                'epoch': epoch
            })

        # Compute states if desired:
        # normalise data for covariance hidden:
        #dsq = np.square(visData)
        #lsq = np.sum(dsq, axis=0)
        #lsq /= visData.shape[1]
        #lsq += np.spacing(1)
        #l = np.sqrt(lsq)
        #normD = visData/l

        #logisticArg_c = (-0.5*np.dot(FH.numpy_array.T, np.square(np.dot(VF.numpy_array.T, normD.T))) + bias_cov.numpy_array).T
        #p_hc = logisticFunc(logisticArg_c)

        #logisticArg_m = np.dot(visData, w_mean.numpy_array) + bias_mean.numpy_array.T
        #p_hm = logisticFunc(logisticArg_m)

        #p_all = np.concatenate((p_hc, p_hm), axis=1)
        #savemat(self.probabilitiesDir + '/pAll_%i.mat' % epoch, mdict={'p_all':p_all})

        with open('done', 'w') as doneFile:
            doneFile.write(
                datetime.strftime(datetime.now(), '%d/%m/%Y %H:%M:%S'))
Esempio n. 59
0
    def run(self, iterations):

        for i in range(0,iterations):
            # update W
            cm.dot(self.W_gpu, self.H_gpu, target=self.WH_gpu)
            cm.dot(self.X_gpu, self.H_gpu.T, target=self.XHT_gpu)
            cm.dot(self.WH_gpu, self.H_gpu.T, target=self.WHHT_gpu)
            self.WHHT_gpu.add(self.sparseW)
            self.W_gpu.mult(self.XHT_gpu).divide(self.WHHT_gpu)

            # normalize W
            cm.dot(self.nones_gpu, self.W_gpu, target=self.Wrowsum_gpu) # slower correct version: W_gpu.sum(0, target=rowsum_gpu)
            self.W_gpu.div_by_row(self.Wrowsum_gpu)

            # update H
            cm.dot(self.W_gpu.T, self.X_gpu, target=self.WTX_gpu)
            cm.dot(self.W_gpu.T, self.WH_gpu, target=self.WTWH_gpu)
            self.WTWH_gpu.add(self.sparseH)
            self.H_gpu.mult(self.WTX_gpu).divide(self.WTWH_gpu)

            # test for convergence
            if (i % self.niter_test_conv == 0) and self.checkConvergence():
                print "NMF converged after %i iterations" % i
                break
Esempio n. 60
0
out = cm.empty((dim_out, batch_size))
delta = cm.empty((num_hid, batch_size))

# Train neural network.
start_time = time.time()
for epoch in range(num_epochs):
    print("Epoch %i" % (epoch + 1))
    err = []

    for batch in range(num_batches):
        # get current minibatch
        inp = dev_train.slice(batch * batch_size, (batch + 1) * batch_size)
        target = dev_lbl.slice(batch * batch_size, (batch + 1) * batch_size)

        # forward pass
        cm.dot(w_w1.T, inp, target=h)

        h.add_col_vec(w_b1)
        h.apply_sigmoid()

        cm.dot(w_w2.T, h, target=out)

        out.add_col_vec(w_b2)
        out.apply_sigmoid()

        # back prop errors
        out.subtract(target)  # compute error

        # gradients for w_w2 and w_b2
        wu_w2.add_dot(h, out.T, beta=momentum)
        wu_b2.add_sums(out, axis=1, beta=momentum)