def TrainAutoencoderGPU(self, _raaX, oOptions): # Copy to device raaX = cudamat.CUDAMatrix(_raaX) # Count the number of training samples iSamples = raaX.shape[0] # For each layer pair... for iLayer in range(len(self.oaLayer)): # Clone layer weights on device raaW = cudamat.CUDAMatrix(self.oaLayer[iLayer].raaW) raV = cudamat.CUDAMatrix(numpy.atleast_2d(self.oaLayer[iLayer].raV)) raH = cudamat.CUDAMatrix(numpy.atleast_2d(self.oaLayer[iLayer].raH)) # Measure this layer iVs = self.oaLayer[iLayer].iV iHs = self.oaLayer[iLayer].iH # Create a delta array to retain momentum state raaDelta = cudamat.zeros((iVs,iHs)) raDeltaV = cudamat.zeros((iVs,1)) raDeltaH = cudamat.zeros((iHs,1)) # Create a diff array to retain current update raaDiff = cudamat.empty((iVs,iHs)) raDiffV = cudamat.empty((1,iVs)) raDiffH = cudamat.empty((1,iHs)) # Create an array to retain the layer output for # training the next layer raaY = cudamat.empty((iSamples, iHs)) # Get short references to layer parameters sActivationUp = self.oaLayer[iLayer].sActivationUp sActivationDn = self.oaLayer[iLayer].sActivationDn junk = None; # For each training epoch... for iEpoch in range(oOptions.iEpochs): # Get short references to epoch parameters (rRate, rMomentum, rDropV, rDropH, bSample, rDecay) = oOptions.fTrainingParameters(iLayer,iEpoch) # Clear the sample index iIndex = 0 # Clear error accumulators for this layer rTotalSe = 0 rTotalE = 0 # While training samples remain... while (iIndex<iSamples): # Number of samples to process in this batch iBatch = min(self.iBatchSamples, iSamples-iIndex) # Create working arrays on the device baaH = cudamat.empty((iBatch,iHs)) raaH1d = cudamat.empty((iBatch,iHs)) raaH1s = cudamat.empty((iBatch,iHs)) raaH3 = cudamat.empty((iBatch,iHs)) baaV = cudamat.empty((iBatch,iVs)) raaV0 = cudamat.empty((iBatch,iVs)) raaV2 = cudamat.empty((iBatch,iVs)) # Get a batch of inputs in raaV0 raaX.get_row_slice(iIndex, iIndex+iBatch, target=raaV0) # If we need to drop visible units... if(rDropV>0): # Compute a mask baaV.fill_with_rand() baaV.greater_than(rDropV) raaV0.mult(baaV) # Advance the markov chain V0->H1 # raaH1d, raaH1s = self.UpdateStatesGPU(sActivationUp, raaW, raH, raaV0, rDropV, True) self.UpdateStatesGPU(sActivationUp, raaW, raH, raaV0, raaH1d, raaH1s, rDropV, True) # If stochastic sampling is enabled... if (bSample): # Use sampled states raaH1 = raaH1s else: # Use deterministic states raaH1 = raaH1d # If we need to drop hidden units... if(rDropH>0): # Compute a mask baaH.fill_with_rand() baaH.greater_than(rDropH) raaH1.mult(baaH) # Advance the markov chain H1->V2 # raaV2, junk = self.UpdateStatesGPU(sActivationDn, raaW.T, raV, raaH1, rDropH) self.UpdateStatesGPU(sActivationDn, raaW.T, raV, raaH1, raaV2, junk, rDropH) # If we need to drop visible units... if(rDropV>0): # Clear dropped states raaV2.mult(baaV) # Advance the markov chain V2->H3 # raaH3, junk = self.UpdateStatesGPU(sActivationUp, raaW, raH, raaV2, rDropV) self.UpdateStatesGPU(sActivationUp, raaW, raH, raaV2, raaH3, junk, rDropV) # If we need to drop hidden units... if(rDropH>0): # Clear dropped states raaH3.mult(baaH) # Scale factor to average the gradient estimates rScale = 1/iBatch # Scale all weights uniformly #raaDiff = ( numpy.dot(raaV0.T,raaH1) - numpy.dot(raaV2.T,raaH3) )*rScale cudamat.dot(raaV0.T,raaH1,raaDiff) raaDiff.subtract_dot(raaV2.T,raaH3) raaDiff.mult(rScale) # Update the weight delta array using the current momentum and # learning rate # raaDelta = raaDelta*rMomentum + raaDiff*rRate raaDelta.mult(rMomentum) raaDiff.mult(rRate) raaDelta.add(raaDiff) # Updated the weights #self.oaLayer[iLayer].raaW = self.oaLayer[iLayer].raaW + raaDelta raaW.add(raaDelta) # Compute bias gradients #raDiffV = numpy.sum(raaV0-raaV2,axis=0)*rScale #raDiffH = numpy.sum(raaH1-raaH3,axis=0)*rScale raaV0.sum(axis=0,mult=rScale).subtract(raaV2.sum(axis=0,mult=rScale),target=raDiffV) raaH1.sum(axis=0,mult=rScale).subtract(raaH3.sum(axis=0,mult=rScale),target=raDiffH) # Update the biases #raV += raDiffV*rRate #raH += raDiffH*rRate raV.add_mult(raDiffV, rRate) raH.add_mult(raDiffH, rRate) # Apply weight decay raaW.mult(rDecay) raV.mult(rDecay) raH.mult(rDecay) # Advance to the next minibatch iIndex = iIndex + iBatch # Create storage for reconstuction raaXr = cudamat.empty((iSamples, iVs)) # Compute hidden layer self.UpdateStatesGPU(sActivationUp, raaW, raH, raaX, raaY, junk) # Compute visible layer self.UpdateStatesGPU(sActivationDn, raaW.T, raV, raaY, raaXr, junk) # Compute error metrics rTotalSe, rTotalE = self.GetErrorsGPU(raaX, raaXr, sActivationDn) # Finish the rmse calculation rRmse = math.sqrt(rTotalSe/(raaX.shape[0]*raaX.shape[1])) # Finish rmse calculation rError = rTotalE/(raaX.shape[0]*raaX.shape[1]) # Report training progress oOptions.fEpochReport(iLayer, iEpoch, bSample, rDropV, rDropH, rRate, rMomentum, rRmse) # Current layer outputs are the next layer inputs raaX = raaY self.oaLayer[iLayer].raaW = raaW.asarray() self.oaLayer[iLayer].raV = raV.asarray() self.oaLayer[iLayer].raH = raH.asarray()
def TrainAutoencoder(self, _raaX, oOptions): # initialize cudamat cudamat.init() cudamat.CUDAMatrix.init_random(seed = 42) # Count the number of training samples raaX = cudamat.CUDAMatrix(_raaX) iSamples = raaX.shape[0] # For each layer pair... for iLayer in range(len(self.oaLayer)-1): # Clone layer weights on device raaW = cudamat.CUDAMatrix(self.oaLayer[iLayer].raaW) raV = cudamat.CUDAMatrix(numpy.atleast_2d(self.oaLayer[iLayer].raV)) raH = cudamat.CUDAMatrix(numpy.atleast_2d(self.oaLayer[iLayer].raH)) # Measure this layer iVs = self.oaLayer[iLayer].raaW.shape[0] iHs = self.oaLayer[iLayer].raaW.shape[1] # Create a delta array to retain momentum state raaDelta = cudamat.zeros((iVs,iHs)) raDeltaV = cudamat.zeros((iVs,1)) raDeltaH = cudamat.zeros((iHs,1)) # Create a diff array to retain current update raaDiff = cudamat.empty((iVs,iHs)) raDiffV = cudamat.empty((1,iVs)) raDiffH = cudamat.empty((1,iHs)) # Create an array to retain the layer output for # training the next layer raaY = cudamat.empty((iSamples, iHs)) # Get short references to layer parameters sActivationUp = self.oaLayer[iLayer].sActivationUp sActivationDn = self.oaLayer[iLayer].sActivationDn junk = None; # For each training epoch... for iEpoch in range(oOptions.iEpochs): # Get short references to epoch parameters rDropV = oOptions.oaLayer[iLayer].raDropV[iEpoch] rDropH = oOptions.oaLayer[iLayer].raDropH[iEpoch] rMomentum = oOptions.oaLayer[iLayer].raMomentum[iEpoch] rRate = oOptions.oaLayer[iLayer].raRate[iEpoch] bSample = oOptions.oaLayer[iLayer].baSample[iEpoch] # Clear the sample index iIndex = 0 # Clear error accumulators for this layer rTotalSe = 0 rTotalE = 0 # While training samples remain... while (iIndex<iSamples): # Number of samples to process in this batch iBatch = min(self.iBatchSamples, iSamples-iIndex) # Create working arrays on the device baaH = cudamat.empty((iBatch,iHs)) raaH1d = cudamat.empty((iBatch,iHs)) raaH1s = cudamat.empty((iBatch,iHs)) raaH3 = cudamat.empty((iBatch,iHs)) baaV = cudamat.empty((iBatch,iVs)) raaV0 = cudamat.empty((iBatch,iVs)) raaV2 = cudamat.empty((iBatch,iVs)) # Get a batch of inputs in raaV0 raaX.get_row_slice(iIndex, iIndex+iBatch, target=raaV0) # If we need to drop visible units... if(rDropV>0): # Compute a mask baaV.fill_with_rand() baaV.greater_than(rDropV) raaV0.mult(baaV) # Advance the markov chain V0->H1 # raaH1d, raaH1s = self._UpdateStates(sActivationUp, raaW, raH, raaV0, rDropV, True) self._UpdateStates(sActivationUp, raaW, raH, raaV0, raaH1d, raaH1s, rDropV, True) # If stochastic sampling is enabled... if (bSample): # Use sampled states raaH1 = raaH1s else: # Use deterministic states raaH1 = raaH1d # If we need to drop hidden units... if(rDropH>0): # Compute a mask baaH.fill_with_rand() baaH.greater_than(rDropH) raaH1.mult(baaH) # Advance the markov chain H1->V2 # raaV2, junk = self._UpdateStates(sActivationDn, raaW.T, raV, raaH1, rDropH) self._UpdateStates(sActivationDn, raaW.T, raV, raaH1, raaV2, junk, rDropH) # If we need to drop visible units... if(rDropV>0): # Clear dropped states raaV2.mult(baaV) # Advance the markov chain V2->H3 # raaH3, junk = self._UpdateStates(sActivationUp, raaW, raH, raaV2, rDropV) self._UpdateStates(sActivationUp, raaW, raH, raaV2, raaH3, junk, rDropV) # If we need to drop hidden units... if(rDropH>0): # Clear dropped states raaH3.mult(baaH) # Scale factor to average this batch rScale = 1/iBatch # If normalizing the dropout gradient by the number # of weight updates rather the number of batch # samples. if (self.bNormalizeDropoutGradient): # If no visible layer dropout... if (not rDropV): # Construct a null dropout matrix baaV.assign(1) # If no hidden layer dropout... if (not rDropH): # Construct a null dropout matrix baaH.assign(1) # Compute normalizer matrix #raaN = 1./(double(~baaV).T*(~baaH)) cudamat.dot(baaV.T,baaH,raaN) raaN.reciprocal() # Compute the average difference between positive phase # up(0,1) and negative phase up(2,3) correlations # raaDiff = numpy.multiply( numpy.dot(raaV0.T,raaH1) - numpy.dot(raaV2.T,raaH3) , raaN) cudamat.dot(raaV0.T,raaH1,raaDiff) raaDiff.subtract_dot(raaV2.T,raaH3) raaDiff.mult(raaN) else: # Scale all weights uniformly #raaDiff = ( numpy.dot(raaV0.T,raaH1) - numpy.dot(raaV2.T,raaH3) )*rScale cudamat.dot(raaV0.T,raaH1,raaDiff) raaDiff.subtract_dot(raaV2.T,raaH3) raaDiff.mult(rScale) # Compute bias gradients #raDiffV = numpy.sum(raaV0-raaV2,axis=0)*rScale #raDiffH = numpy.sum(raaH1-raaH3,axis=0)*rScale raaV0.sum(axis=0,mult=rScale).subtract(raaV2.sum(axis=0,mult=rScale),target=raDiffV) raaH1.sum(axis=0,mult=rScale).subtract(raaH3.sum(axis=0,mult=rScale),target=raDiffH) # Update the weight delta array using the current momentum and # learning rate # raaDelta = raaDelta*rMomentum + raaDiff*rRate raaDelta.mult(rMomentum) raaDiff.mult(rRate) raaDelta.add(raaDiff) # Updated the weights #self.oaLayer[iLayer].raaW = self.oaLayer[iLayer].raaW + raaDelta raaW.add(raaDelta) # Advance to the next minibatch iIndex = iIndex + iBatch # raaXr = cudamat.empty((iSamples, iVs)) # raaV2, junk = self._UpdateStates(sActivationDn, raaW.T, raV, raaH1, 0) self._UpdateStates(sActivationUp, raaW, raH, raaX, raaY, junk, 0) # raaV2, junk = self._UpdateStates(sActivationDn, raaW.T, raV, raaH1, 0) self._UpdateStates(sActivationDn, raaW.T, raV, raaY, raaXr, junk, 0) rTotalSe, rTotalE = self.GetErrors(raaX, raaXr, sActivationDn) # Finish the rmse calculation rRmse = math.sqrt(rTotalSe/(raaX.shape[0]*raaX.shape[1])) # Finish rmse calculation rError = rTotalE/(raaX.shape[0]*raaX.shape[1]) # Report training progress oOptions.fEvent(iLayer, iEpoch, bSample, rDropV, rDropH, rRate, rMomentum, rRmse, rError) # Current layer outputs are the next layer inputs raaX = raaY self.oaLayer[iLayer].raaW = raaW.asarray() self.oaLayer[iLayer].raV = raV.asarray() self.oaLayer[iLayer].raH = raH.asarray()