Example #1
0
    def TrainAutoencoderGPU(self, _raaX, oOptions):

        # Copy to device
        raaX = cudamat.CUDAMatrix(_raaX)

        # Count the number of training samples
        iSamples = raaX.shape[0]
                  
        # For each layer pair...
        for iLayer in range(len(self.oaLayer)):

            # Clone layer weights on device
            raaW = cudamat.CUDAMatrix(self.oaLayer[iLayer].raaW)
            raV  = cudamat.CUDAMatrix(numpy.atleast_2d(self.oaLayer[iLayer].raV))
            raH  = cudamat.CUDAMatrix(numpy.atleast_2d(self.oaLayer[iLayer].raH))

            # Measure this layer
            iVs = self.oaLayer[iLayer].iV
            iHs = self.oaLayer[iLayer].iH

            # Create a delta array to retain momentum state
            raaDelta = cudamat.zeros((iVs,iHs))
            raDeltaV = cudamat.zeros((iVs,1))
            raDeltaH = cudamat.zeros((iHs,1))

            # Create a diff array to retain current update
            raaDiff  = cudamat.empty((iVs,iHs))
            raDiffV  = cudamat.empty((1,iVs))
            raDiffH  = cudamat.empty((1,iHs))
            
            # Create an array to retain the layer output for 
            # training the next layer
            raaY = cudamat.empty((iSamples, iHs))
            
            # Get short references to layer parameters
            sActivationUp = self.oaLayer[iLayer].sActivationUp
            sActivationDn = self.oaLayer[iLayer].sActivationDn

            junk = None;
            
            # For each training epoch...
            for iEpoch in range(oOptions.iEpochs):

                # Get short references to epoch parameters
                (rRate, rMomentum, rDropV, rDropH, bSample, rDecay) = oOptions.fTrainingParameters(iLayer,iEpoch)

                # Clear the sample index
                iIndex   = 0
                
                # Clear error accumulators for this layer
                rTotalSe = 0
                rTotalE  = 0

                # While training samples remain...
                while (iIndex<iSamples):

                    # Number of samples to process in this batch
                    iBatch = min(self.iBatchSamples, iSamples-iIndex)

                    # Create working arrays on the device
                    baaH   = cudamat.empty((iBatch,iHs))
                    raaH1d = cudamat.empty((iBatch,iHs))
                    raaH1s = cudamat.empty((iBatch,iHs))
                    raaH3  = cudamat.empty((iBatch,iHs))

                    baaV   = cudamat.empty((iBatch,iVs))
                    raaV0  = cudamat.empty((iBatch,iVs))
                    raaV2  = cudamat.empty((iBatch,iVs))

                    # Get a batch of inputs in raaV0
                    raaX.get_row_slice(iIndex, iIndex+iBatch, target=raaV0)
                    
                    # If we need to drop visible units...
                    if(rDropV>0):
                    
                        # Compute a mask
                        baaV.fill_with_rand()
                        baaV.greater_than(rDropV)
                        raaV0.mult(baaV)

                    # Advance the markov chain V0->H1
                    # raaH1d, raaH1s = self.UpdateStatesGPU(sActivationUp, raaW, raH, raaV0, rDropV, True)
                    self.UpdateStatesGPU(sActivationUp, raaW, raH, raaV0, raaH1d, raaH1s, rDropV, True)

                    # If stochastic sampling is enabled...
                    if (bSample):

                        # Use sampled states
                        raaH1 = raaH1s

                    else:

                        # Use deterministic states
                        raaH1 = raaH1d

                    # If we need to drop hidden units...
                    if(rDropH>0):
                        
                        # Compute a mask
                        baaH.fill_with_rand()
                        baaH.greater_than(rDropH)
                        raaH1.mult(baaH)

                    # Advance the markov chain H1->V2
                    # raaV2, junk  = self.UpdateStatesGPU(sActivationDn, raaW.T, raV, raaH1, rDropH)
                    self.UpdateStatesGPU(sActivationDn, raaW.T, raV, raaH1, raaV2, junk, rDropH)

                    # If we need to drop visible units...
                    if(rDropV>0):
                        
                        # Clear dropped states
                        raaV2.mult(baaV)

                    # Advance the markov chain V2->H3
                    # raaH3, junk  = self.UpdateStatesGPU(sActivationUp, raaW, raH, raaV2, rDropV)
                    self.UpdateStatesGPU(sActivationUp, raaW, raH, raaV2, raaH3, junk, rDropV)

                    # If we need to drop hidden units...
                    if(rDropH>0):
                        
                        # Clear dropped states
                        raaH3.mult(baaH)

                    # Scale factor to average the gradient estimates
                    rScale = 1/iBatch
                                           
                    # Scale all weights uniformly
                    #raaDiff = ( numpy.dot(raaV0.T,raaH1) - numpy.dot(raaV2.T,raaH3) )*rScale 
                    cudamat.dot(raaV0.T,raaH1,raaDiff)
                    raaDiff.subtract_dot(raaV2.T,raaH3)
                    raaDiff.mult(rScale)

                    # Update the weight delta array using the current momentum and
                    # learning rate
                    # raaDelta = raaDelta*rMomentum + raaDiff*rRate
                    raaDelta.mult(rMomentum)
                    raaDiff.mult(rRate)
                    raaDelta.add(raaDiff)

                    # Updated the weights
                    #self.oaLayer[iLayer].raaW = self.oaLayer[iLayer].raaW + raaDelta
                    raaW.add(raaDelta)

                    # Compute bias gradients
                    #raDiffV = numpy.sum(raaV0-raaV2,axis=0)*rScale              
                    #raDiffH = numpy.sum(raaH1-raaH3,axis=0)*rScale
                    raaV0.sum(axis=0,mult=rScale).subtract(raaV2.sum(axis=0,mult=rScale),target=raDiffV)
                    raaH1.sum(axis=0,mult=rScale).subtract(raaH3.sum(axis=0,mult=rScale),target=raDiffH)

                    # Update the biases
                    #raV += raDiffV*rRate
                    #raH += raDiffH*rRate
                    raV.add_mult(raDiffV, rRate)
                    raH.add_mult(raDiffH, rRate)

                    # Apply weight decay
                    raaW.mult(rDecay)
                    raV.mult(rDecay)
                    raH.mult(rDecay)

                    # Advance to the next minibatch
                    iIndex = iIndex + iBatch

                # Create storage for reconstuction
                raaXr = cudamat.empty((iSamples, iVs))

                # Compute hidden layer
                self.UpdateStatesGPU(sActivationUp, raaW, raH, raaX, raaY, junk)

                # Compute visible layer
                self.UpdateStatesGPU(sActivationDn, raaW.T, raV, raaY, raaXr, junk)

                # Compute error metrics
                rTotalSe, rTotalE = self.GetErrorsGPU(raaX, raaXr, sActivationDn)
                    
                # Finish the rmse calculation
                rRmse = math.sqrt(rTotalSe/(raaX.shape[0]*raaX.shape[1]))
                
                # Finish rmse calculation
                rError = rTotalE/(raaX.shape[0]*raaX.shape[1])

                # Report training progress
                oOptions.fEpochReport(iLayer, iEpoch, bSample, rDropV, rDropH, rRate, rMomentum, rRmse)
            
            # Current layer outputs are the next layer inputs
            raaX = raaY

            self.oaLayer[iLayer].raaW = raaW.asarray()
            self.oaLayer[iLayer].raV  = raV.asarray()
            self.oaLayer[iLayer].raH  = raH.asarray()
Example #2
0
    def TrainAutoencoder(self, _raaX, oOptions):

        # initialize cudamat
        cudamat.init()
        cudamat.CUDAMatrix.init_random(seed = 42)

        # Count the number of training samples
        raaX = cudamat.CUDAMatrix(_raaX)
        iSamples = raaX.shape[0]
                  
        # For each layer pair...
        for iLayer in range(len(self.oaLayer)-1):

            # Clone layer weights on device
            raaW = cudamat.CUDAMatrix(self.oaLayer[iLayer].raaW)
            raV  = cudamat.CUDAMatrix(numpy.atleast_2d(self.oaLayer[iLayer].raV))
            raH  = cudamat.CUDAMatrix(numpy.atleast_2d(self.oaLayer[iLayer].raH))

            # Measure this layer
            iVs = self.oaLayer[iLayer].raaW.shape[0]
            iHs = self.oaLayer[iLayer].raaW.shape[1]

            # Create a delta array to retain momentum state
            raaDelta = cudamat.zeros((iVs,iHs))
            raDeltaV = cudamat.zeros((iVs,1))
            raDeltaH = cudamat.zeros((iHs,1))

            # Create a diff array to retain current update
            raaDiff  = cudamat.empty((iVs,iHs))
            raDiffV  = cudamat.empty((1,iVs))
            raDiffH  = cudamat.empty((1,iHs))
            
            # Create an array to retain the layer output for 
            # training the next layer
            raaY = cudamat.empty((iSamples, iHs))
            
            # Get short references to layer parameters
            sActivationUp = self.oaLayer[iLayer].sActivationUp
            sActivationDn = self.oaLayer[iLayer].sActivationDn

            junk = None;
            
            # For each training epoch...
            for iEpoch in range(oOptions.iEpochs):

                # Get short references to epoch parameters
                rDropV    = oOptions.oaLayer[iLayer].raDropV[iEpoch]
                rDropH    = oOptions.oaLayer[iLayer].raDropH[iEpoch]
                rMomentum = oOptions.oaLayer[iLayer].raMomentum[iEpoch]
                rRate     = oOptions.oaLayer[iLayer].raRate[iEpoch]
                bSample   = oOptions.oaLayer[iLayer].baSample[iEpoch]

                # Clear the sample index
                iIndex   = 0
                
                # Clear error accumulators for this layer
                rTotalSe = 0
                rTotalE  = 0

                # While training samples remain...
                while (iIndex<iSamples):

                    # Number of samples to process in this batch
                    iBatch = min(self.iBatchSamples, iSamples-iIndex)

                    # Create working arrays on the device
                    baaH   = cudamat.empty((iBatch,iHs))
                    raaH1d = cudamat.empty((iBatch,iHs))
                    raaH1s = cudamat.empty((iBatch,iHs))
                    raaH3  = cudamat.empty((iBatch,iHs))

                    baaV   = cudamat.empty((iBatch,iVs))
                    raaV0  = cudamat.empty((iBatch,iVs))
                    raaV2  = cudamat.empty((iBatch,iVs))

                    # Get a batch of inputs in raaV0
                    raaX.get_row_slice(iIndex, iIndex+iBatch, target=raaV0)
                    
                    # If we need to drop visible units...
                    if(rDropV>0):
                    
                        # Compute a mask
                        baaV.fill_with_rand()
                        baaV.greater_than(rDropV)
                        raaV0.mult(baaV)

                    # Advance the markov chain V0->H1
                    # raaH1d, raaH1s = self._UpdateStates(sActivationUp, raaW, raH, raaV0, rDropV, True)
                    self._UpdateStates(sActivationUp, raaW, raH, raaV0, raaH1d, raaH1s, rDropV, True)

                    # If stochastic sampling is enabled...
                    if (bSample):

                        # Use sampled states
                        raaH1 = raaH1s

                    else:

                        # Use deterministic states
                        raaH1 = raaH1d

                    # If we need to drop hidden units...
                    if(rDropH>0):
                        
                        # Compute a mask
                        baaH.fill_with_rand()
                        baaH.greater_than(rDropH)
                        raaH1.mult(baaH)

                    # Advance the markov chain H1->V2
                    # raaV2, junk  = self._UpdateStates(sActivationDn, raaW.T, raV, raaH1, rDropH)
                    self._UpdateStates(sActivationDn, raaW.T, raV, raaH1, raaV2, junk, rDropH)

                    # If we need to drop visible units...
                    if(rDropV>0):
                        
                        # Clear dropped states
                        raaV2.mult(baaV)

                    # Advance the markov chain V2->H3
                    # raaH3, junk  = self._UpdateStates(sActivationUp, raaW, raH, raaV2, rDropV)
                    self._UpdateStates(sActivationUp, raaW, raH, raaV2, raaH3, junk, rDropV)

                    # If we need to drop hidden units...
                    if(rDropH>0):
                        
                        # Clear dropped states
                        raaH3.mult(baaH)

                    # Scale factor to average this batch
                    rScale = 1/iBatch
                    
                    # If normalizing the dropout gradient by the number
                    # of weight updates rather the number of batch
                    # samples.
                    if (self.bNormalizeDropoutGradient):
                        
                        # If no visible layer dropout...
                        if (not rDropV):
                            
                            # Construct a null dropout matrix
                            baaV.assign(1)
                        
                        # If no hidden layer dropout...
                        if (not rDropH):
                            
                            # Construct a null dropout matrix 
                            baaH.assign(1)   
                        
                        # Compute normalizer matrix
                        #raaN = 1./(double(~baaV).T*(~baaH))
                        
                        cudamat.dot(baaV.T,baaH,raaN)
                        raaN.reciprocal()

                        # Compute the average difference between positive phase 
                        # up(0,1) and negative phase up(2,3) correlations
                        # raaDiff = numpy.multiply( numpy.dot(raaV0.T,raaH1) - numpy.dot(raaV2.T,raaH3) , raaN)
                        cudamat.dot(raaV0.T,raaH1,raaDiff)
                        raaDiff.subtract_dot(raaV2.T,raaH3)
                        raaDiff.mult(raaN)

                    else:
                        
                        # Scale all weights uniformly
                        #raaDiff = ( numpy.dot(raaV0.T,raaH1) - numpy.dot(raaV2.T,raaH3) )*rScale 
                        cudamat.dot(raaV0.T,raaH1,raaDiff)
                        raaDiff.subtract_dot(raaV2.T,raaH3)
                        raaDiff.mult(rScale)

                    # Compute bias gradients
                    #raDiffV = numpy.sum(raaV0-raaV2,axis=0)*rScale              
                    #raDiffH = numpy.sum(raaH1-raaH3,axis=0)*rScale

                    raaV0.sum(axis=0,mult=rScale).subtract(raaV2.sum(axis=0,mult=rScale),target=raDiffV)
                    raaH1.sum(axis=0,mult=rScale).subtract(raaH3.sum(axis=0,mult=rScale),target=raDiffH)

                    # Update the weight delta array using the current momentum and
                    # learning rate
                    # raaDelta = raaDelta*rMomentum + raaDiff*rRate
                    raaDelta.mult(rMomentum)
                    raaDiff.mult(rRate)
                    raaDelta.add(raaDiff)

                    # Updated the weights
                    #self.oaLayer[iLayer].raaW = self.oaLayer[iLayer].raaW + raaDelta
                    raaW.add(raaDelta)
                    
                    # Advance to the next minibatch
                    iIndex = iIndex + iBatch

                #
                raaXr = cudamat.empty((iSamples, iVs))

                # raaV2, junk  = self._UpdateStates(sActivationDn, raaW.T, raV, raaH1, 0)
                self._UpdateStates(sActivationUp, raaW, raH, raaX, raaY, junk, 0)
                
                # raaV2, junk  = self._UpdateStates(sActivationDn, raaW.T, raV, raaH1, 0)
                self._UpdateStates(sActivationDn, raaW.T, raV, raaY, raaXr, junk, 0)

                rTotalSe, rTotalE = self.GetErrors(raaX, raaXr, sActivationDn)
                    
                # Finish the rmse calculation
                rRmse = math.sqrt(rTotalSe/(raaX.shape[0]*raaX.shape[1]))
                
                # Finish rmse calculation
                rError = rTotalE/(raaX.shape[0]*raaX.shape[1])

                # Report training progress
                oOptions.fEvent(iLayer, iEpoch, bSample, rDropV, rDropH, rRate, rMomentum, rRmse, rError)
            
            # Current layer outputs are the next layer inputs
            raaX = raaY

            self.oaLayer[iLayer].raaW = raaW.asarray()
            self.oaLayer[iLayer].raV  = raV.asarray()
            self.oaLayer[iLayer].raH  = raH.asarray()