class MlpManager(QObject): '''This class gets the data extracted from the UI and pass it to multi-layer perceptron, then gets and stores the result. ''' updateGraph = pyqtSignal(float, float) # Train error, val. error updateMinValErr = pyqtSignal(float) # Min validation error updateDeltaRMS = pyqtSignal( float) # Delta of RMS: min(valError) - currentValError updateKappa = pyqtSignal(float) # Kappa value processFinished = pyqtSignal() processInterrupted = pyqtSignal() logMessage = pyqtSignal(str) errorReport = pyqtSignal(str) rangeChanged = pyqtSignal(str, int) updateProgress = pyqtSignal() def __init__(self, ns=0, MLP=None): QObject.__init__(self) self.MLP = MLP self.interrupted = False self.layers = None if self.MLP: self.layers = self.getMlpTopology() self.ns = ns # Neighbourhood size of training rasters. self.data = None # Training data self.catlist = None # List of unique output values of the output raster self.train_error = None # Error on training set self.val_error = None # Error on validation set self.minValError = None # The minimum error that is achieved on the validation set self.valKappa = 0 # Kappa on on the validation set self.sampler = None # Sampler # Results of the MLP prediction self.prediction = None # Raster of the MLP prediction results self.confidence = None # Raster of the MLP results confidence (1 = the maximum confidence, 0 = the least confidence) self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...} # Outputs of the activation function for small and big numbers self.sigmax, self.sigmin = sigmoid(100), sigmoid( -100) # Max and Min of the sigmoid function self.sigrange = self.sigmax - self.sigmin # Range of the sigmoid def computeMlpError(self, sample): '''Get MLP error on the sample''' input = np.hstack((sample['state'], sample['factors'])) out = self.getOutput(input) err = ((sample['output'] - out)**2).sum() / len(out) return err def computePerformance(self, train_indexes, val_ind): '''Check errors of training and validation sets @param train_indexes Tuple that contains indexes of the first and last elements of the training set. @param val_ind Tuple that contains indexes of the first and last elements of the validation set. ''' train_error = 0 train_sampl = train_indexes[1] - train_indexes[ 0] # Count of training samples for i in range(train_indexes[0], train_indexes[1]): train_error = train_error + self.computeMlpError( sample=self.data[i]) self.setTrainError(train_error / train_sampl) if val_ind: val_error = 0 val_sampl = val_ind[1] - val_ind[0] answers = np.ma.zeros(val_sampl) out = np.ma.zeros(val_sampl) for i in xrange(val_ind[0], val_ind[1]): sample = self.data[i] val_error = val_error + self.computeMlpError( sample=self.data[i]) input = np.hstack((sample['state'], sample['factors'])) output = self.getOutput(input) out[i - val_ind[0]] = self.outCategory(output) answers[i - val_ind[0]] = self.outCategory(sample['output']) self.setValError(val_error / val_sampl) depCoef = DependenceCoef(out, answers, expand=True) self.valKappa = depCoef.kappa(mode=None) def copyWeights(self): '''Deep copy of the MLP weights''' return copy.deepcopy(self.MLP.weights) def createMlp(self, state, factors, output, hidden_layers): ''' @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). @param hidden_layers List of neuron counts in hidden layers. @param ns Neighbourhood size. ''' if output.getBandsCount() != 1: raise MlpManagerError('Output layer must have one band!') input_neurons = 0 for raster in factors: input_neurons = input_neurons + raster.getNeighbourhoodSize( self.ns) # state raster contains categories. We need use n-1 dummy variables (where n = number of categories) input_neurons = input_neurons + (len(state.getBandGradation(1)) - 1) * state.getNeighbourhoodSize( self.ns) # Output category's (neuron) list and count self.catlist = output.getBandGradation(1) categories = len(self.catlist) # set neuron counts in the MLP layers self.layers = hidden_layers self.layers.insert(0, input_neurons) self.layers.append(categories) self.MLP = MLP(*self.layers) def getConfidence(self): return self.confidence def getInputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[0] def getOutput(self, input_vector): out = self.MLP.propagate_forward(input_vector) return out def getOutputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[-1] def getOutputVector(self, val): '''Convert a number val into vector, for example, let self.catlist = [1, 3, 4] then if val = 1, result = [ 1, -1, -1] if val = 3, result = [-1, 1, -1] if val = 4, result = [-1, -1, 1] where -1 is minimum of the sigmoid, 1 is max of the sigmoid ''' size = self.getOutputVectLen() res = np.ones(size) * (self.sigmin) ind = np.where(self.catlist == val) res[ind] = self.sigmax return res def getMinValError(self): return self.minValError def getMlpTopology(self): return self.MLP.shape def getKappa(self): return self.valKappa def getPrediction(self, state, factors, calcTransitions=False): self._predict(state, factors, calcTransitions) return self.prediction def getTrainError(self): return self.train_error def getTransitionPotentials(self): return self.transitionPotentials def getValError(self): return self.val_error def outCategory(self, out_vector): # Get index of the biggest output value as the result biggest = max(out_vector) res = list(out_vector).index(biggest) res = self.catlist[res] return res def outputConfidence(self, output, scale=True): ''' Return confidence (difference between 2 biggest values) of the MLP output. @param output: The confidence @param scale: If True, then scale the confidence to int [0, 1, ..., 100] percent ''' out_scl = self.scaleOutput(output, percent=scale) out_scl.sort() return out_scl[-1] - out_scl[-2] def outputTransitions(self, output, scale=True): ''' Return transition potencial of the outputs scaled to [0,1] or 1-100 @param output: The output of MLP @param scale: If True, then scale the transitions to int ([0, 1, ..., 100]) percent ''' out_scl = self.scaleOutput(output, percent=scale) result = {} for r, v in enumerate(out_scl): cat = self.catlist[r] result[cat] = v return result def scaleOutput(self, output, percent=True): ''' Scale the output to range [0,1] or 1-100 @param output: Output of a MLP @param percent: If True, then scale the output to int [0, 1, ..., 100] percent ''' res = 1.0 * (output - self.sigmin) / self.sigrange if percent: res = [int(100 * x) for x in res] return res def _predict(self, state, factors, calcTransitions=False): ''' Calculate output and confidence rasters using MLP model and input rasters @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). ''' try: self.rangeChanged.emit(self.tr("Initialize model %p%"), 1) geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] for r in factors: if not state.geoDataMatch(r): raise MlpManagerError( 'Geometries of the input rasters are different!') self.transitionPotentials = None # Reset tr.potentials if they exist # Normalize factors before prediction: for f in factors: f.normalize(mode='mean') predicted_band = np.zeros([rows, cols], dtype=np.uint8) confidence_band = np.zeros([rows, cols], dtype=np.uint8) if calcTransitions: self.transitionPotentials = {} for cat in self.catlist: self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8) self.sampler = Sampler(state, factors, ns=self.ns) mask = state.getBand(1).mask.copy() if mask.shape == (): mask = np.zeros([rows, cols], dtype=np.bool) self.updateProgress.emit() self.rangeChanged.emit(self.tr("Prediction %p%"), rows) for i in xrange(rows): for j in xrange(cols): if not mask[i, j]: input = self.sampler.get_inputs(state, i, j) if input != None: out = self.getOutput(input) res = self.outCategory(out) predicted_band[i, j] = res confidence = self.outputConfidence(out) confidence_band[i, j] = confidence if calcTransitions: potentials = self.outputTransitions(out) for cat in self.catlist: map = self.transitionPotentials[cat] map[i, j] = potentials[cat] else: # Input sample is incomplete => mask this pixel mask[i, j] = True self.updateProgress.emit() predicted_bands = [ np.ma.array(data=predicted_band, mask=mask, dtype=np.uint8) ] confidence_bands = [ np.ma.array(data=confidence_band, mask=mask, dtype=np.uint8) ] self.prediction = Raster() self.prediction.create(predicted_bands, geodata) self.confidence = Raster() self.confidence.create(confidence_bands, geodata) if calcTransitions: for cat in self.catlist: band = [ np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8) ] self.transitionPotentials[cat] = Raster() self.transitionPotentials[cat].create(band, geodata) except MemoryError: self.errorReport.emit( self.tr("The system out of memory during ANN prediction")) raise except: self.errorReport.emit( self.tr("An unknown error occurs during ANN prediction")) raise def readMlp(self): pass def resetErrors(self): self.val_error = np.finfo(np.float).max self.train_error = np.finfo(np.float).max def resetMlp(self): self.MLP.reset() self.resetErrors() def saveMlp(self): pass def saveSamples(self, fileName): self.sampler.saveSamples(fileName) def setMlpWeights(self, w): '''Set weights of the MLP''' self.MLP.weights = w def setTrainingData(self, state, factors, output, shuffle=True, mode='All', samples=None): ''' @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). @param output Raster that contains categories to predict. @param shuffle Perform random shuffle. @param mode Type of sampling method: All Get all pixels Random Get samples. Count of samples in the data=samples. Stratified Undersampling of major categories and/or oversampling of minor categories. @samples Sample count of the training data (doesn't used in 'All' mode). ''' if not self.MLP: raise MlpManagerError('You must create a MLP before!') # Normalize factors before sampling: for f in factors: f.normalize(mode='mean') self.sampler = Sampler(state, factors, output, self.ns) self.sampler.setTrainingData(state=state, output=output, shuffle=shuffle, mode=mode, samples=samples) outputVecLen = self.getOutputVectLen() stateVecLen = self.sampler.stateVecLen factorVectLen = self.sampler.factorVectLen size = len(self.sampler.data) self.data = np.zeros(size, dtype=[('coords', float, 2), ('state', float, stateVecLen), ('factors', float, factorVectLen), ('output', float, outputVecLen)]) self.data['coords'] = self.sampler.data['coords'] self.data['state'] = self.sampler.data['state'] self.data['factors'] = self.sampler.data['factors'] self.data['output'] = [ self.getOutputVector(sample['output']) for sample in self.sampler.data ] def setTrainError(self, error): self.train_error = error def setValError(self, error): self.val_error = error def setEpochs(self, epochs): self.epochs = epochs def setValPercent(self, value=20): self.valPercent = value def setLRate(self, value=0.1): self.lrate = value def setMomentum(self, value=0.01): self.momentum = value def setContinueTrain(self, value=False): self.continueTrain = value def startTrain(self): self.train(self.epochs, self.valPercent, self.lrate, self.momentum, self.continueTrain) def stopTrain(self): self.interrupted = True def train(self, epochs, valPercent=20, lrate=0.1, momentum=0.01, continue_train=False): '''Perform the training procedure on the MLP and save the best neural net @param epoch Max iteration count. @param valPercent Percent of the validation set. @param lrate Learning rate. @param momentum Learning momentum. @param continue_train If False then it is new training cycle, reset weights training and validation error. If True, then continue training. ''' try: samples_count = len(self.data) val_sampl_count = samples_count * valPercent / 100 apply_validation = True if val_sampl_count > 0 else False # Use or not use validation set train_sampl_count = samples_count - val_sampl_count # Set first train_sampl_count as training set, the other as validation set train_indexes = (0, train_sampl_count) val_indexes = (train_sampl_count, samples_count) if apply_validation else None if not continue_train: self.resetMlp() self.minValError = self.getValError( ) # The minimum error that is achieved on the validation set last_train_err = self.getTrainError() best_weights = self.copyWeights( ) # The MLP weights when minimum error that is achieved on the validation set self.rangeChanged.emit(self.tr("Train model %p%"), epochs) for epoch in range(epochs): self.trainEpoch(train_indexes, lrate, momentum) self.computePerformance(train_indexes, val_indexes) self.updateGraph.emit(self.getTrainError(), self.getValError()) self.updateDeltaRMS.emit(self.getMinValError() - self.getValError()) self.updateKappa.emit(self.getKappa()) QCoreApplication.processEvents() if self.interrupted: self.processInterrupted.emit() break last_train_err = self.getTrainError() self.setTrainError(last_train_err) if apply_validation and (self.getValError() < self.getMinValError()): self.minValError = self.getValError() best_weights = self.copyWeights() self.updateMinValErr.emit(self.getMinValError()) self.updateProgress.emit() self.setMlpWeights(best_weights) except MemoryError: self.errorReport.emit( self.tr("The system out of memory during ANN training")) raise except: self.errorReport.emit( self.tr("An unknown error occurs during ANN trainig")) raise finally: self.processFinished.emit() def trainEpoch(self, train_indexes, lrate=0.1, momentum=0.01): '''Perform a training epoch on the MLP @param train_ind Tuple of the min&max indexes of training samples in the samples data. @param val_ind Tuple of the min&max indexes of validation samples in the samples data. @param lrate Learning rate. @param momentum Learning momentum. ''' train_sampl = train_indexes[1] - train_indexes[0] for i in range(train_sampl): n = np.random.randint(*train_indexes) sample = self.data[n] input = np.hstack((sample['state'], sample['factors'])) self.getOutput(input) # Forward propagation self.MLP.propagate_backward(sample['output'], lrate, momentum)
class MlpManager(QObject): '''This class gets the data extracted from the UI and pass it to multi-layer perceptron, then gets and stores the result. ''' updateGraph = pyqtSignal(float, float) # Train error, val. error updateMinValErr = pyqtSignal(float) # Min validation error updateDeltaRMS = pyqtSignal(float) # Delta of RMS: min(valError) - currentValError processFinished = pyqtSignal() logMessage = pyqtSignal(str) def __init__(self, ns=0, MLP=None): QObject.__init__(self) self.MLP = MLP self.layers = None if self.MLP: self.layers = self.getMlpTopology() self.ns = ns # Neighbourhood size of training rasters. self.data = None # Training data self.classlist = None # List of unique output values of the output raster self.train_error = None # Error on training set self.val_error = None # Error on validation set self.minValError = None # The minimum error that is achieved on the validation set # Results of the MLP prediction self.prediction = None # Raster of the MLP prediction results self.confidence = None # Raster of the MLP results confidence # Outputs of the activation function for small and big numbers self.sigmax, self.sigmin = sigmoid(100), sigmoid(-100) # Max and Min of the sigmoid function self.sigrange = self.sigmax - self.sigmin # Range of the sigmoid def computeMlpError(self, sample): '''Get MLP error on the sample''' input = np.hstack( (sample['state'], sample['factors']) ) out = self.getOutput( input ) err = ((sample['output'] - out)**2).sum()/len(out) return err def computePerformance(self, train_indexes, val_ind): '''Check errors of training and validation sets @param train_indexes Tuple that contains indexes of the first and last elements of the training set. @param val_ind Tuple that contains indexes of the first and last elements of the validation set. ''' train_error = 0 train_sampl = train_indexes[1] - train_indexes[0] # Count of training samples for i in range(train_indexes[0], train_indexes[1]): train_error = train_error + self.computeMlpError(sample = self.data[i]) self.setTrainError(train_error/train_sampl) if val_ind: val_error = 0 val_sampl = val_ind[1] - val_ind[0] for i in xrange(val_ind[0], val_ind[1]): val_error = val_error + self.computeMlpError(sample = self.data[i]) self.setValError(val_error/val_sampl) def copyWeights(self): '''Deep copy of the MLP weights''' return copy.deepcopy(self.MLP.weights) def createMlp(self, state, factors, output, hidden_layers): ''' @param state Raster of the current state (classes) values. @param factors List of the factor rasters (predicting variables). @param hidden_layers List of neuron counts in hidden layers. @param ns Neighbourhood size. ''' if output.getBandsCount() != 1: raise MplManagerError('Output layer must have one band!') input_neurons = 0 for raster in [state] + factors: input_neurons = input_neurons+ raster.getNeighbourhoodSize(self.ns) # Output class (neuron) count band = output.getBand(1) self.classlist = np.unique(band.compressed()) classes = len(self.classlist) # set neuron counts in the MLP layers self.layers = hidden_layers self.layers.insert(0, input_neurons) self.layers.append(classes) self.MLP = MLP(*self.layers) def getConfidence(self): return self.confidence def getInputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[0] def getOutput(self, input_vector): out = self.MLP.propagate_forward( input_vector ) return out def getOutputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[-1] def getOutputVector(self, val): '''Convert a number val into vector, for example, let self.classlist = [1, 3, 4] then if val = 1, result = [ 1, -1, -1] if val = 3, result = [-1, 1, -1] if val = 4, result = [-1, -1, 1] where -1 is minimum of the sigmoid, 1 is max of the sigmoid ''' size = self.getOutputVectLen() res = np.ones(size) * (self.sigmin) ind = np.where(self.classlist==val) res[ind] = self.sigmax return res def getMinValError(self): return self.minValError def getMlpTopology(self): return self.MLP.shape def getPrediction(self, state, factors): self._predict(state, factors) return self.prediction def getTrainError(self): return self.train_error def getValError(self): return self.val_error def outputConfidence(self, output): ''' Return confidence (difference between 2 biggest values) of the MLP output. ''' # Scale the output to range [0,1] out_scl = 1.0 * (output - self.sigmin) / self.sigrange # Calculate the confidence: out_scl.sort() return out_scl[-1] - out_scl[-2] def _predict(self, state, factors): ''' Calculate output and confidence rasters using MLP model and input rasters @param state Raster of the current state (classes) values. @param factors List of the factor rasters (predicting variables). ''' geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] for r in factors: if not state.geoDataMatch(r): raise MlpManagerError('Geometries of the input rasters are different!') # Normalize factors before prediction: for f in factors: f.normalize(mode = 'mean') predicted_band = np.zeros([rows, cols]) confidence_band = np.zeros([rows, cols]) sampler = Sampler(state, factors, ns=self.ns) mask = state.getBand(1).mask.copy() for i in xrange(rows): for j in xrange(cols): if not mask[i,j]: input = sampler.get_inputs(state, factors, i,j) if input != None: out = self.getOutput(input) # Get index of the biggest output value as the result biggest = max(out) res = list(out).index(biggest) predicted_band[i, j] = self.classlist[res] confidence = self.outputConfidence(out) confidence_band[i, j] = confidence else: # Input sample is incomplete => mask this pixel mask[i, j] = True predicted_bands = [np.ma.array(data = predicted_band, mask = mask)] confidence_bands = [np.ma.array(data = confidence_band, mask = mask)] self.prediction = Raster() self.prediction.create(predicted_bands, geodata) self.confidence = Raster() self.confidence.create(confidence_bands, geodata) def readMlp(self): pass def resetErrors(self): self.val_error = np.finfo(np.float).max self.train_error = np.finfo(np.float).max def resetMlp(self): self.MLP.reset() self.resetErrors() def saveMlp(self): pass def setMlpWeights(self, w): '''Set weights of the MLP''' self.MLP.weights = w def setTrainingData(self, state, factors, output, shuffle=True, mode='All', samples=None): ''' @param state Raster of the current state (classes) values. @param factors List of the factor rasters (predicting variables). @param output Raster that contains classes to predict. @param shuffle Perform random shuffle. @param mode Type of sampling method: All Get all pixels Normal Get samples. Count of samples in the data=samples. Balanced Undersampling of major classes and/or oversampling of minor classes. @samples Sample count of the training data (doesn't used in 'All' mode). ''' if not self.MLP: raise MlpManagerError('You must create a MLP before!') # Normalize factors before sampling: for f in factors: f.normalize(mode = 'mean') sampler = Sampler(state, factors, output, self.ns) sampler.setTrainingData(state, factors, output, shuffle, mode, samples) outputVecLen = self.getOutputVectLen() stateVecLen = sampler.stateVecLen factorVectLen = sampler.factorVectLen size = len(sampler.data) self.data = np.zeros(size, dtype=[('state', float, stateVecLen), ('factors', float, factorVectLen), ('output', float, outputVecLen)]) self.data['state'] = sampler.data['state'] self.data['factors'] = sampler.data['factors'] self.data['output'] = [self.getOutputVector(sample['output']) for sample in sampler.data] def setTrainError(self, error): self.train_error = error def setValError(self, error): self.val_error = error def setEpochs(self, epochs): self.epochs = epochs def setValPercent(self, value=20): self.valPercent = value def setLRate(self, value=0.1): self.lrate = value def setMomentum(self, value=0.01): self.momentum = value def setContinueTrain(self, value=False): self.continueTrain = value def startTrain(self): self.train(self.epochs, self.valPercent, self.lrate, self.momentum, self.continueTrain) def train(self, epochs, valPercent=20, lrate=0.1, momentum=0.01, continue_train=False): '''Perform the training procedure on the MLP and save the best neural net @param epoch Max iteration count. @param valPercent Percent of the validation set. @param lrate Learning rate. @param momentum Learning momentum. @param continue_train If False then it is new training cycle, reset weights training and validation error. If True, then continue training. ''' samples_count = len(self.data) val_sampl_count = samples_count*valPercent/100 apply_validation = True if val_sampl_count>0 else False # Use or not use validation set train_sampl_count = samples_count - val_sampl_count # Set first train_sampl_count as training set, the other as validation set train_indexes = (0, train_sampl_count) val_indexes = (train_sampl_count, samples_count) if apply_validation else None if not continue_train: self.resetMlp() self.minValError = self.getValError() # The minimum error that is achieved on the validation set last_train_err = self.getTrainError() best_weights = self.copyWeights() # The MLP weights when minimum error that is achieved on the validation set for epoch in range(epochs): self.trainEpoch(train_indexes, lrate, momentum) self.computePerformance(train_indexes, val_indexes) self.updateGraph.emit(self.getTrainError(), self.getValError()) self.updateDeltaRMS.emit(self.getMinValError() - self.getValError()) last_train_err = self.getTrainError() self.setTrainError(last_train_err) if apply_validation and (self.getValError() < self.getMinValError()): self.minValError = self.getValError() best_weights = self.copyWeights() self.updateMinValErr.emit(self.getMinValError()) self.setMlpWeights(best_weights) self.processFinished.emit() def trainEpoch(self, train_indexes, lrate=0.1, momentum=0.01): '''Perform a training epoch on the MLP @param train_ind Tuple of the min&max indexes of training samples in the samples data. @param val_ind Tuple of the min&max indexes of validation samples in the samples data. @param lrate Learning rate. @param momentum Learning momentum. ''' train_sampl = train_indexes[1] - train_indexes[0] for i in range(train_sampl): n = np.random.randint( *train_indexes ) sample = self.data[n] input = np.hstack( (sample['state'],sample['factors']) ) self.getOutput( input ) self.MLP.propagate_backward( sample['output'], lrate, momentum )
class MlpManager(QObject): '''This class gets the data extracted from the UI and pass it to multi-layer perceptron, then gets and stores the result. ''' updateGraph = pyqtSignal(float, float) # Train error, val. error updateMinValErr = pyqtSignal(float) # Min validation error updateDeltaRMS = pyqtSignal(float) # Delta of RMS: min(valError) - currentValError updateKappa = pyqtSignal(float) # Kappa value processFinished = pyqtSignal() processInterrupted = pyqtSignal() logMessage = pyqtSignal(str) errorReport = pyqtSignal(str) rangeChanged = pyqtSignal(str, int) updateProgress = pyqtSignal() def __init__(self, ns=0, MLP=None): QObject.__init__(self) self.MLP = MLP self.interrupted = False self.layers = None if self.MLP: self.layers = self.getMlpTopology() self.ns = ns # Neighbourhood size of training rasters. self.data = None # Training data self.catlist = None # List of unique output values of the output raster self.train_error = None # Error on training set self.val_error = None # Error on validation set self.minValError = None # The minimum error that is achieved on the validation set self.valKappa = 0 # Kappa on on the validation set self.sampler = None # Sampler # Results of the MLP prediction self.prediction = None # Raster of the MLP prediction results self.confidence = None # Raster of the MLP results confidence (1 = the maximum confidence, 0 = the least confidence) self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...} # Outputs of the activation function for small and big numbers self.sigmax, self.sigmin = sigmoid(100), sigmoid(-100) # Max and Min of the sigmoid function self.sigrange = self.sigmax - self.sigmin # Range of the sigmoid def computeMlpError(self, sample): '''Get MLP error on the sample''' input = np.hstack( (sample['state'], sample['factors']) ) out = self.getOutput( input ) err = ((sample['output'] - out)**2).sum()/len(out) return err def computePerformance(self, train_indexes, val_ind): '''Check errors of training and validation sets @param train_indexes Tuple that contains indexes of the first and last elements of the training set. @param val_ind Tuple that contains indexes of the first and last elements of the validation set. ''' train_error = 0 train_sampl = train_indexes[1] - train_indexes[0] # Count of training samples for i in range(train_indexes[0], train_indexes[1]): train_error = train_error + self.computeMlpError(sample = self.data[i]) self.setTrainError(train_error/train_sampl) if val_ind: val_error = 0 val_sampl = val_ind[1] - val_ind[0] answers = np.ma.zeros(val_sampl) out = np.ma.zeros(val_sampl) for i in xrange(val_ind[0], val_ind[1]): sample = self.data[i] val_error = val_error + self.computeMlpError(sample = self.data[i]) input = np.hstack( (sample['state'],sample['factors']) ) output = self.getOutput(input) out[i-val_ind[0]] = self.outCategory(output) answers[i-val_ind[0]] = self.outCategory(sample['output']) self.setValError(val_error/val_sampl) depCoef = DependenceCoef(out, answers, expand=True) self.valKappa = depCoef.kappa(mode=None) def copyWeights(self): '''Deep copy of the MLP weights''' return copy.deepcopy(self.MLP.weights) def createMlp(self, state, factors, output, hidden_layers): ''' @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). @param hidden_layers List of neuron counts in hidden layers. @param ns Neighbourhood size. ''' if output.getBandsCount() != 1: raise MlpManagerError('Output layer must have one band!') input_neurons = 0 for raster in factors: input_neurons = input_neurons+ raster.getNeighbourhoodSize(self.ns) # state raster contains categories. We need use n-1 dummy variables (where n = number of categories) input_neurons = input_neurons + (len(state.getBandGradation(1))-1) * state.getNeighbourhoodSize(self.ns) # Output category's (neuron) list and count self.catlist = output.getBandGradation(1) categories = len(self.catlist) # set neuron counts in the MLP layers self.layers = hidden_layers self.layers.insert(0, input_neurons) self.layers.append(categories) self.MLP = MLP(*self.layers) def getConfidence(self): return self.confidence def getInputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[0] def getOutput(self, input_vector): out = self.MLP.propagate_forward( input_vector ) return out def getOutputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[-1] def getOutputVector(self, val): '''Convert a number val into vector, for example, let self.catlist = [1, 3, 4] then if val = 1, result = [ 1, -1, -1] if val = 3, result = [-1, 1, -1] if val = 4, result = [-1, -1, 1] where -1 is minimum of the sigmoid, 1 is max of the sigmoid ''' size = self.getOutputVectLen() res = np.ones(size) * (self.sigmin) ind = np.where(self.catlist==val) res[ind] = self.sigmax return res def getMinValError(self): return self.minValError def getMlpTopology(self): return self.MLP.shape def getKappa(self): return self.valKappa def getPrediction(self, state, factors, calcTransitions=False): self._predict(state, factors, calcTransitions) return self.prediction def getTrainError(self): return self.train_error def getTransitionPotentials(self): return self.transitionPotentials def getValError(self): return self.val_error def outCategory(self, out_vector): # Get index of the biggest output value as the result biggest = max(out_vector) res = list(out_vector).index(biggest) res = self.catlist[res] return res def outputConfidence(self, output, scale=True): ''' Return confidence (difference between 2 biggest values) of the MLP output. @param output: The confidence @param scale: If True, then scale the confidence to int [0, 1, ..., 100] percent ''' out_scl = self.scaleOutput(output, percent=scale) out_scl.sort() return out_scl[-1] - out_scl[-2] def outputTransitions(self, output, scale=True): ''' Return transition potencial of the outputs scaled to [0,1] or 1-100 @param output: The output of MLP @param scale: If True, then scale the transitions to int ([0, 1, ..., 100]) percent ''' out_scl = self.scaleOutput(output, percent=scale) result = {} for r, v in enumerate(out_scl): cat = self.catlist[r] result[cat] = v return result def scaleOutput(self, output, percent=True): ''' Scale the output to range [0,1] or 1-100 @param output: Output of a MLP @param percent: If True, then scale the output to int [0, 1, ..., 100] percent ''' res = 1.0 * (output - self.sigmin) / self.sigrange if percent: res = [ int(100 * x) for x in res] return res def _predict(self, state, factors, calcTransitions=False): ''' Calculate output and confidence rasters using MLP model and input rasters @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). ''' try: self.rangeChanged.emit(self.tr("Initialize model %p%"), 1) geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] for r in factors: if not state.geoDataMatch(r): raise MlpManagerError('Geometries of the input rasters are different!') self.transitionPotentials = None # Reset tr.potentials if they exist # Normalize factors before prediction: for f in factors: f.normalize(mode = 'mean') predicted_band = np.zeros([rows, cols], dtype=np.uint8) confidence_band = np.zeros([rows, cols], dtype=np.uint8) if calcTransitions: self.transitionPotentials = {} for cat in self.catlist: self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8) self.sampler = Sampler(state, factors, ns=self.ns) mask = state.getBand(1).mask.copy() if mask.shape == (): mask = np.zeros([rows, cols], dtype=np.bool) self.updateProgress.emit() self.rangeChanged.emit(self.tr("Prediction %p%"), rows) for i in xrange(rows): for j in xrange(cols): if not mask[i,j]: input = self.sampler.get_inputs(state, i,j) if input != None: out = self.getOutput(input) res = self.outCategory(out) predicted_band[i, j] = res confidence = self.outputConfidence(out) confidence_band[i, j] = confidence if calcTransitions: potentials = self.outputTransitions(out) for cat in self.catlist: map = self.transitionPotentials[cat] map[i, j] = potentials[cat] else: # Input sample is incomplete => mask this pixel mask[i, j] = True self.updateProgress.emit() predicted_bands = [np.ma.array(data = predicted_band, mask = mask, dtype=np.uint8)] confidence_bands = [np.ma.array(data = confidence_band, mask = mask, dtype=np.uint8)] self.prediction = Raster() self.prediction.create(predicted_bands, geodata) self.confidence = Raster() self.confidence.create(confidence_bands, geodata) if calcTransitions: for cat in self.catlist: band = [np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8)] self.transitionPotentials[cat] = Raster() self.transitionPotentials[cat].create(band, geodata) except MemoryError: self.errorReport.emit(self.tr("The system out of memory during ANN prediction")) raise except: self.errorReport.emit(self.tr("An unknown error occurs during ANN prediction")) raise def readMlp(self): pass def resetErrors(self): self.val_error = np.finfo(np.float).max self.train_error = np.finfo(np.float).max def resetMlp(self): self.MLP.reset() self.resetErrors() def saveMlp(self): pass def saveSamples(self, fileName): self.sampler.saveSamples(fileName) def setMlpWeights(self, w): '''Set weights of the MLP''' self.MLP.weights = w def setTrainingData(self, state, factors, output, shuffle=True, mode='All', samples=None): ''' @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). @param output Raster that contains categories to predict. @param shuffle Perform random shuffle. @param mode Type of sampling method: All Get all pixels Random Get samples. Count of samples in the data=samples. Stratified Undersampling of major categories and/or oversampling of minor categories. @samples Sample count of the training data (doesn't used in 'All' mode). ''' if not self.MLP: raise MlpManagerError('You must create a MLP before!') # Normalize factors before sampling: for f in factors: f.normalize(mode = 'mean') self.sampler = Sampler(state, factors, output, self.ns) self.sampler.setTrainingData(state=state, output=output, shuffle=shuffle, mode=mode, samples=samples) outputVecLen = self.getOutputVectLen() stateVecLen = self.sampler.stateVecLen factorVectLen = self.sampler.factorVectLen size = len(self.sampler.data) self.data = np.zeros(size, dtype=[('coords', float, 2), ('state', float, stateVecLen), ('factors', float, factorVectLen), ('output', float, outputVecLen)]) self.data['coords'] = self.sampler.data['coords'] self.data['state'] = self.sampler.data['state'] self.data['factors'] = self.sampler.data['factors'] self.data['output'] = [self.getOutputVector(sample['output']) for sample in self.sampler.data] def setTrainError(self, error): self.train_error = error def setValError(self, error): self.val_error = error def setEpochs(self, epochs): self.epochs = epochs def setValPercent(self, value=20): self.valPercent = value def setLRate(self, value=0.1): self.lrate = value def setMomentum(self, value=0.01): self.momentum = value def setContinueTrain(self, value=False): self.continueTrain = value def startTrain(self): self.train(self.epochs, self.valPercent, self.lrate, self.momentum, self.continueTrain) def stopTrain(self): self.interrupted = True def train(self, epochs, valPercent=20, lrate=0.1, momentum=0.01, continue_train=False): '''Perform the training procedure on the MLP and save the best neural net @param epoch Max iteration count. @param valPercent Percent of the validation set. @param lrate Learning rate. @param momentum Learning momentum. @param continue_train If False then it is new training cycle, reset weights training and validation error. If True, then continue training. ''' try: samples_count = len(self.data) val_sampl_count = samples_count*valPercent/100 apply_validation = True if val_sampl_count>0 else False # Use or not use validation set train_sampl_count = samples_count - val_sampl_count # Set first train_sampl_count as training set, the other as validation set train_indexes = (0, train_sampl_count) val_indexes = (train_sampl_count, samples_count) if apply_validation else None if not continue_train: self.resetMlp() self.minValError = self.getValError() # The minimum error that is achieved on the validation set last_train_err = self.getTrainError() best_weights = self.copyWeights() # The MLP weights when minimum error that is achieved on the validation set self.rangeChanged.emit(self.tr("Train model %p%"), epochs) for epoch in range(epochs): self.trainEpoch(train_indexes, lrate, momentum) self.computePerformance(train_indexes, val_indexes) self.updateGraph.emit(self.getTrainError(), self.getValError()) self.updateDeltaRMS.emit(self.getMinValError() - self.getValError()) self.updateKappa.emit(self.getKappa()) QCoreApplication.processEvents() if self.interrupted: self.processInterrupted.emit() break last_train_err = self.getTrainError() self.setTrainError(last_train_err) if apply_validation and (self.getValError() < self.getMinValError()): self.minValError = self.getValError() best_weights = self.copyWeights() self.updateMinValErr.emit(self.getMinValError()) self.updateProgress.emit() self.setMlpWeights(best_weights) except MemoryError: self.errorReport.emit(self.tr("The system out of memory during ANN training")) raise except: self.errorReport.emit(self.tr("An unknown error occurs during ANN trainig")) raise finally: self.processFinished.emit() def trainEpoch(self, train_indexes, lrate=0.1, momentum=0.01): '''Perform a training epoch on the MLP @param train_ind Tuple of the min&max indexes of training samples in the samples data. @param val_ind Tuple of the min&max indexes of validation samples in the samples data. @param lrate Learning rate. @param momentum Learning momentum. ''' train_sampl = train_indexes[1] - train_indexes[0] for i in range(train_sampl): n = np.random.randint( *train_indexes ) sample = self.data[n] input = np.hstack( (sample['state'],sample['factors']) ) self.getOutput( input ) # Forward propagation self.MLP.propagate_backward( sample['output'], lrate, momentum )