Python Sampler Exemples, molusce.algorithms.models.sampler.sampler.Sampler Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : lr.py Projet : asiaairsurvey/molusce

    def setTrainingData(self, state, factors, output, mode='All', samples=None):
        '''
        @param state            Raster of the current state (classes) values.
        @param factors          List of the factor rasters (predicting variables).
        @param output           Raster that contains classes to predict.
        @param mode             Type of sampling method:
                                    All             Get all pixels
                                    Normal          Get samples. Count of samples in the data=samples.
                                    Balanced        Undersampling of major classes and/or oversampling of minor classes.
        @samples                Sample count of the training data (doesn't used in 'All' mode).
        '''
        if not self.logreg:
            raise LRError('You must create a Logistic Regression model before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode = 'mean')

        sampler = Sampler(state, factors, output, ns=self.ns)
        sampler.setTrainingData(state, factors, output, shuffle=False, mode=mode, samples=samples)

        outputVecLen  = sampler.outputVecLen
        stateVecLen   = sampler.stateVecLen
        factorVectLen = sampler.factorVectLen
        size = len(sampler.data)

        self.data = sampler.data

Exemple #2

0

Afficher le fichier

Fichier : manager.py Projet : asiaairsurvey/molusce

    def setTrainingData(self, state, factors, output, shuffle=True, mode='All', samples=None):
        '''
        @param state            Raster of the current state (classes) values.
        @param factors          List of the factor rasters (predicting variables).
        @param output           Raster that contains classes to predict.
        @param shuffle          Perform random shuffle.
        @param mode             Type of sampling method:
                                    All             Get all pixels
                                    Normal          Get samples. Count of samples in the data=samples.
                                    Balanced        Undersampling of major classes and/or oversampling of minor classes.
        @samples                Sample count of the training data (doesn't used in 'All' mode).
        '''
        if not self.MLP:
            raise MlpManagerError('You must create a MLP before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode = 'mean')

        sampler = Sampler(state, factors, output, self.ns)
        sampler.setTrainingData(state, factors, output, shuffle, mode, samples)

        outputVecLen  = self.getOutputVectLen()
        stateVecLen   = sampler.stateVecLen
        factorVectLen = sampler.factorVectLen
        size = len(sampler.data)

        self.data = np.zeros(size, dtype=[('state', float, stateVecLen), ('factors',  float, factorVectLen), ('output', float, outputVecLen)])
        self.data['state'] = sampler.data['state']
        self.data['factors'] = sampler.data['factors']
        self.data['output'] = [self.getOutputVector(sample['output']) for sample in sampler.data]

Exemple #3

0

Afficher le fichier

Fichier : lr.py Projet : paladin74/molusce

    def setTrainingData(self):
        state, factors, output, mode, samples = self.state, self.factors, self.output, self.mode, self.samples
        if not self.logreg:
            raise LRError(
                'You must create a Logistic Regression model before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode='mean')

        self.sampler = Sampler(state, factors, output, ns=self.ns)
        self.__propagateSamplerSignals()
        self.sampler.setTrainingData(state,
                                     output,
                                     shuffle=False,
                                     mode=mode,
                                     samples=samples)

        outputVecLen = self.sampler.outputVecLen
        stateVecLen = self.sampler.stateVecLen
        factorVectLen = self.sampler.factorVectLen
        size = len(self.sampler.data)

        self.data = self.sampler.data
        self.catlist = np.unique(self.data['output'])

Exemple #4

0

Afficher le fichier

Fichier : test_sampler.py Projet : paladin74/molusce

    def test_get_state(self):
        smp = Sampler(self.state, self.factors, self.output, ns=0)
        assert_array_equal(smp.get_state(self.state, 1, 1), [0, 0])
        assert_array_equal(smp.get_state(self.state, 0, 0), [0, 1])

        smp = Sampler(self.state, self.factors, self.output, ns=1)
        res = [
            0,
            1,
            0,
            0,
            0,
            1,
            0,
            1,
            0,
            0,
            0,
            1,
            1,
            0,
            0,
            1,
            0,
            0,
        ]
        assert_array_equal(smp.get_state(self.state, 1, 1), res)

        inputRast = Raster('../../examples/sites.tif')
        inputRast.resetMask([0])
        smp = Sampler(inputRast, self.factors, self.output, ns=0)
        assert_array_equal(smp.get_state(self.state, 1, 1), [0])
        assert_array_equal(smp.get_state(self.state, 0, 0), [1])

Exemple #5

0

Afficher le fichier

Fichier : test_sampler.py Projet : nextgis/molusce

    def test_cat2vect(self):
        smp = Sampler(self.state, self.factors,  self.output, ns=0)
        assert_array_equal(smp.cat2vect(0), [1, 0])
        assert_array_equal(smp.cat2vect(1), [0, 1])
        assert_array_equal(smp.cat2vect(2), [0, 0])

        inputRast = Raster('../../examples/sites.tif')
        inputRast.resetMask([0])
        smp = Sampler(inputRast, self.factors,  self.output, ns=0)
        assert_array_equal(smp.cat2vect(1), [1])
        assert_array_equal(smp.cat2vect(2), [0])

Exemple #6

0

Afficher le fichier

    def setTrainingData(self,
                        state,
                        factors,
                        output,
                        shuffle=True,
                        mode='All',
                        samples=None):
        '''
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        @param output           Raster that contains categories to predict.
        @param shuffle          Perform random shuffle.
        @param mode             Type of sampling method:
                                    All             Get all pixels
                                    Random          Get samples. Count of samples in the data=samples.
                                    Stratified      Undersampling of major categories and/or oversampling of minor categories.
        @samples                Sample count of the training data (doesn't used in 'All' mode).
        '''
        if not self.MLP:
            raise MlpManagerError('You must create a MLP before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode='mean')

        self.sampler = Sampler(state, factors, output, self.ns)
        self.sampler.setTrainingData(state=state,
                                     output=output,
                                     shuffle=shuffle,
                                     mode=mode,
                                     samples=samples)

        outputVecLen = self.getOutputVectLen()
        stateVecLen = self.sampler.stateVecLen
        factorVectLen = self.sampler.factorVectLen
        size = len(self.sampler.data)

        self.data = np.zeros(size,
                             dtype=[('coords', float, 2),
                                    ('state', float, stateVecLen),
                                    ('factors', float, factorVectLen),
                                    ('output', float, outputVecLen)])
        self.data['coords'] = self.sampler.data['coords']
        self.data['state'] = self.sampler.data['state']
        self.data['factors'] = self.sampler.data['factors']
        self.data['output'] = [
            self.getOutputVector(sample['output'])
            for sample in self.sampler.data
        ]

Exemple #7

0

Afficher le fichier

Fichier : manager.py Projet : asiaairsurvey/molusce

    def _predict(self, state, factors):
        '''
        Calculate output and confidence rasters using MLP model and input rasters
        @param state            Raster of the current state (classes) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        geodata = state.getGeodata()
        rows, cols = geodata['ySize'], geodata['xSize']
        for r in factors:
            if not state.geoDataMatch(r):
                raise MlpManagerError('Geometries of the input rasters are different!')

        # Normalize factors before prediction:
        for f in factors:
            f.normalize(mode = 'mean')

        predicted_band  = np.zeros([rows, cols])
        confidence_band = np.zeros([rows, cols])

        sampler = Sampler(state, factors, ns=self.ns)
        mask = state.getBand(1).mask.copy()
        for i in xrange(rows):
            for j in xrange(cols):
                if not mask[i,j]:
                    input = sampler.get_inputs(state, factors, i,j)
                    if input != None:
                        out = self.getOutput(input)
                        # Get index of the biggest output value as the result
                        biggest = max(out)
                        res = list(out).index(biggest)
                        predicted_band[i, j] = self.classlist[res]

                        confidence = self.outputConfidence(out)
                        confidence_band[i, j] = confidence
                    else: # Input sample is incomplete => mask this pixel
                        mask[i, j] = True
        predicted_bands  = [np.ma.array(data = predicted_band, mask = mask)]
        confidence_bands = [np.ma.array(data = confidence_band, mask = mask)]

        self.prediction = Raster()
        self.prediction.create(predicted_bands, geodata)
        self.confidence = Raster()
        self.confidence.create(confidence_bands, geodata)

Exemple #8

0

Afficher le fichier

Fichier : test_sampler.py Projet : paladin74/molusce

    def test_cat2vect(self):
        smp = Sampler(self.state, self.factors, self.output, ns=0)
        assert_array_equal(smp.cat2vect(0), [1, 0])
        assert_array_equal(smp.cat2vect(1), [0, 1])
        assert_array_equal(smp.cat2vect(2), [0, 0])

        inputRast = Raster('../../examples/sites.tif')
        inputRast.resetMask([0])
        smp = Sampler(inputRast, self.factors, self.output, ns=0)
        assert_array_equal(smp.cat2vect(1), [1])
        assert_array_equal(smp.cat2vect(2), [0])

Exemple #9

0

Afficher le fichier

Fichier : lr.py Projet : nextgis/molusce

    def setTrainingData(self):
        state, factors, output, mode, samples = self.state, self.factors, self.output, self.mode, self.samples
        if not self.logreg:
            raise LRError('You must create a Logistic Regression model before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode = 'mean')

        self.sampler = Sampler(state, factors, output, ns=self.ns)
        self.__propagateSamplerSignals()
        self.sampler.setTrainingData(state, output, shuffle=False, mode=mode, samples=samples)

        outputVecLen  = self.sampler.outputVecLen
        stateVecLen   = self.sampler.stateVecLen
        factorVectLen = self.sampler.factorVectLen
        size = len(self.sampler.data)

        self.data = self.sampler.data
        self.catlist = np.unique(self.data['output'])

Exemple #10

0

Afficher le fichier

Fichier : test_sampler.py Projet : nextgis/molusce

    def test_get_state(self):
        smp = Sampler(self.state, self.factors,  self.output, ns=0)
        assert_array_equal(smp.get_state(self.state, 1,1), [0, 0])
        assert_array_equal(smp.get_state(self.state, 0,0), [0, 1])

        smp = Sampler(self.state, self.factors,  self.output, ns=1)
        res = [ 0, 1, 0, 0, 0, 1,
                0, 1, 0, 0, 0, 1,
                1, 0, 0, 1, 0, 0,
            ]
        assert_array_equal(smp.get_state(self.state, 1,1), res)

        inputRast = Raster('../../examples/sites.tif')
        inputRast.resetMask([0])
        smp = Sampler(inputRast, self.factors,  self.output, ns=0)
        assert_array_equal(smp.get_state(self.state, 1,1), [0])
        assert_array_equal(smp.get_state(self.state, 0,0), [1])

Exemple #11

0

Afficher le fichier

class MlpManager(QObject):
    '''This class gets the data extracted from the UI and
    pass it to multi-layer perceptron, then gets and stores the result.
    '''

    updateGraph = pyqtSignal(float, float)  # Train error, val. error
    updateMinValErr = pyqtSignal(float)  # Min validation error
    updateDeltaRMS = pyqtSignal(
        float)  # Delta of RMS: min(valError) - currentValError
    updateKappa = pyqtSignal(float)  # Kappa value
    processFinished = pyqtSignal()
    processInterrupted = pyqtSignal()
    logMessage = pyqtSignal(str)
    errorReport = pyqtSignal(str)
    rangeChanged = pyqtSignal(str, int)
    updateProgress = pyqtSignal()

    def __init__(self, ns=0, MLP=None):

        QObject.__init__(self)

        self.MLP = MLP
        self.interrupted = False

        self.layers = None
        if self.MLP:
            self.layers = self.getMlpTopology()

        self.ns = ns  # Neighbourhood size of training rasters.
        self.data = None  # Training data
        self.catlist = None  # List of unique output values of the output raster
        self.train_error = None  # Error on training set
        self.val_error = None  # Error on validation set
        self.minValError = None  # The minimum error that is achieved on the validation set
        self.valKappa = 0  # Kappa on on the validation set
        self.sampler = None  # Sampler

        # Results of the MLP prediction
        self.prediction = None  # Raster of the MLP prediction results
        self.confidence = None  # Raster of the MLP results confidence (1 = the maximum confidence, 0 = the least confidence)
        self.transitionPotentials = None  # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...}

        # Outputs of the activation function for small and big numbers
        self.sigmax, self.sigmin = sigmoid(100), sigmoid(
            -100)  # Max and Min of the sigmoid function
        self.sigrange = self.sigmax - self.sigmin  # Range of the sigmoid

    def computeMlpError(self, sample):
        '''Get MLP error on the sample'''
        input = np.hstack((sample['state'], sample['factors']))
        out = self.getOutput(input)
        err = ((sample['output'] - out)**2).sum() / len(out)
        return err

    def computePerformance(self, train_indexes, val_ind):
        '''Check errors of training and validation sets
        @param train_indexes     Tuple that contains indexes of the first and last elements of the training set.
        @param val_ind           Tuple that contains indexes of the first and last elements of the validation set.
        '''
        train_error = 0
        train_sampl = train_indexes[1] - train_indexes[
            0]  # Count of training samples
        for i in range(train_indexes[0], train_indexes[1]):
            train_error = train_error + self.computeMlpError(
                sample=self.data[i])
        self.setTrainError(train_error / train_sampl)

        if val_ind:
            val_error = 0
            val_sampl = val_ind[1] - val_ind[0]
            answers = np.ma.zeros(val_sampl)
            out = np.ma.zeros(val_sampl)
            for i in xrange(val_ind[0], val_ind[1]):
                sample = self.data[i]
                val_error = val_error + self.computeMlpError(
                    sample=self.data[i])

                input = np.hstack((sample['state'], sample['factors']))
                output = self.getOutput(input)
                out[i - val_ind[0]] = self.outCategory(output)
                answers[i - val_ind[0]] = self.outCategory(sample['output'])
            self.setValError(val_error / val_sampl)
            depCoef = DependenceCoef(out, answers, expand=True)
            self.valKappa = depCoef.kappa(mode=None)

    def copyWeights(self):
        '''Deep copy of the MLP weights'''
        return copy.deepcopy(self.MLP.weights)

    def createMlp(self, state, factors, output, hidden_layers):
        '''
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        @param hidden_layers    List of neuron counts in hidden layers.
        @param ns               Neighbourhood size.
        '''

        if output.getBandsCount() != 1:
            raise MlpManagerError('Output layer must have one band!')

        input_neurons = 0
        for raster in factors:
            input_neurons = input_neurons + raster.getNeighbourhoodSize(
                self.ns)

        # state raster contains categories. We need use n-1 dummy variables (where n = number of categories)
        input_neurons = input_neurons + (len(state.getBandGradation(1)) -
                                         1) * state.getNeighbourhoodSize(
                                             self.ns)

        # Output category's (neuron) list and count
        self.catlist = output.getBandGradation(1)
        categories = len(self.catlist)

        # set neuron counts in the MLP layers
        self.layers = hidden_layers
        self.layers.insert(0, input_neurons)
        self.layers.append(categories)

        self.MLP = MLP(*self.layers)

    def getConfidence(self):
        return self.confidence

    def getInputVectLen(self):
        '''Length of input data vector of the MLP'''
        shape = self.getMlpTopology()
        return shape[0]

    def getOutput(self, input_vector):
        out = self.MLP.propagate_forward(input_vector)
        return out

    def getOutputVectLen(self):
        '''Length of input data vector of the MLP'''
        shape = self.getMlpTopology()
        return shape[-1]

    def getOutputVector(self, val):
        '''Convert a number val into vector,
        for example, let self.catlist = [1, 3, 4] then
        if val = 1, result = [ 1, -1, -1]
        if val = 3, result = [-1,  1, -1]
        if val = 4, result = [-1, -1,  1]
        where -1 is minimum of the sigmoid, 1 is max of the sigmoid
        '''
        size = self.getOutputVectLen()
        res = np.ones(size) * (self.sigmin)
        ind = np.where(self.catlist == val)
        res[ind] = self.sigmax
        return res

    def getMinValError(self):
        return self.minValError

    def getMlpTopology(self):
        return self.MLP.shape

    def getKappa(self):
        return self.valKappa

    def getPrediction(self, state, factors, calcTransitions=False):
        self._predict(state, factors, calcTransitions)
        return self.prediction

    def getTrainError(self):
        return self.train_error

    def getTransitionPotentials(self):
        return self.transitionPotentials

    def getValError(self):
        return self.val_error

    def outCategory(self, out_vector):
        # Get index of the biggest output value as the result
        biggest = max(out_vector)
        res = list(out_vector).index(biggest)
        res = self.catlist[res]
        return res

    def outputConfidence(self, output, scale=True):
        '''
        Return confidence (difference between 2 biggest values) of the MLP output.
        @param output: The confidence
        @param scale: If True, then scale the confidence to int [0, 1, ..., 100] percent
        '''
        out_scl = self.scaleOutput(output, percent=scale)
        out_scl.sort()
        return out_scl[-1] - out_scl[-2]

    def outputTransitions(self, output, scale=True):
        '''
        Return transition potencial of the outputs scaled to [0,1] or 1-100
        @param output: The output of MLP
        @param scale: If True, then scale the transitions to int ([0, 1, ..., 100]) percent
        '''
        out_scl = self.scaleOutput(output, percent=scale)
        result = {}
        for r, v in enumerate(out_scl):
            cat = self.catlist[r]
            result[cat] = v
        return result

    def scaleOutput(self, output, percent=True):
        '''
        Scale the output to range [0,1] or 1-100
        @param output: Output of a MLP
        @param percent: If True, then scale the output to int [0, 1, ..., 100] percent
        '''
        res = 1.0 * (output - self.sigmin) / self.sigrange
        if percent:
            res = [int(100 * x) for x in res]
        return res

    def _predict(self, state, factors, calcTransitions=False):
        '''
        Calculate output and confidence rasters using MLP model and input rasters
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)
            geodata = state.getGeodata()
            rows, cols = geodata['ySize'], geodata['xSize']
            for r in factors:
                if not state.geoDataMatch(r):
                    raise MlpManagerError(
                        'Geometries of the input rasters are different!')

            self.transitionPotentials = None  # Reset tr.potentials if they exist

            # Normalize factors before prediction:
            for f in factors:
                f.normalize(mode='mean')

            predicted_band = np.zeros([rows, cols], dtype=np.uint8)
            confidence_band = np.zeros([rows, cols], dtype=np.uint8)
            if calcTransitions:
                self.transitionPotentials = {}
                for cat in self.catlist:
                    self.transitionPotentials[cat] = np.zeros([rows, cols],
                                                              dtype=np.uint8)

            self.sampler = Sampler(state, factors, ns=self.ns)
            mask = state.getBand(1).mask.copy()
            if mask.shape == ():
                mask = np.zeros([rows, cols], dtype=np.bool)
            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)
            for i in xrange(rows):
                for j in xrange(cols):
                    if not mask[i, j]:
                        input = self.sampler.get_inputs(state, i, j)
                        if input != None:
                            out = self.getOutput(input)
                            res = self.outCategory(out)
                            predicted_band[i, j] = res

                            confidence = self.outputConfidence(out)
                            confidence_band[i, j] = confidence

                            if calcTransitions:
                                potentials = self.outputTransitions(out)
                                for cat in self.catlist:
                                    map = self.transitionPotentials[cat]
                                    map[i, j] = potentials[cat]
                        else:  # Input sample is incomplete => mask this pixel
                            mask[i, j] = True
                self.updateProgress.emit()
            predicted_bands = [
                np.ma.array(data=predicted_band, mask=mask, dtype=np.uint8)
            ]
            confidence_bands = [
                np.ma.array(data=confidence_band, mask=mask, dtype=np.uint8)
            ]

            self.prediction = Raster()
            self.prediction.create(predicted_bands, geodata)
            self.confidence = Raster()
            self.confidence.create(confidence_bands, geodata)

            if calcTransitions:
                for cat in self.catlist:
                    band = [
                        np.ma.array(data=self.transitionPotentials[cat],
                                    mask=mask,
                                    dtype=np.uint8)
                    ]
                    self.transitionPotentials[cat] = Raster()
                    self.transitionPotentials[cat].create(band, geodata)
        except MemoryError:
            self.errorReport.emit(
                self.tr("The system out of memory during ANN prediction"))
            raise
        except:
            self.errorReport.emit(
                self.tr("An unknown error occurs during ANN prediction"))
            raise

    def readMlp(self):
        pass

    def resetErrors(self):
        self.val_error = np.finfo(np.float).max
        self.train_error = np.finfo(np.float).max

    def resetMlp(self):
        self.MLP.reset()
        self.resetErrors()

    def saveMlp(self):
        pass

    def saveSamples(self, fileName):
        self.sampler.saveSamples(fileName)

    def setMlpWeights(self, w):
        '''Set weights of the MLP'''
        self.MLP.weights = w

    def setTrainingData(self,
                        state,
                        factors,
                        output,
                        shuffle=True,
                        mode='All',
                        samples=None):
        '''
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        @param output           Raster that contains categories to predict.
        @param shuffle          Perform random shuffle.
        @param mode             Type of sampling method:
                                    All             Get all pixels
                                    Random          Get samples. Count of samples in the data=samples.
                                    Stratified      Undersampling of major categories and/or oversampling of minor categories.
        @samples                Sample count of the training data (doesn't used in 'All' mode).
        '''
        if not self.MLP:
            raise MlpManagerError('You must create a MLP before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode='mean')

        self.sampler = Sampler(state, factors, output, self.ns)
        self.sampler.setTrainingData(state=state,
                                     output=output,
                                     shuffle=shuffle,
                                     mode=mode,
                                     samples=samples)

        outputVecLen = self.getOutputVectLen()
        stateVecLen = self.sampler.stateVecLen
        factorVectLen = self.sampler.factorVectLen
        size = len(self.sampler.data)

        self.data = np.zeros(size,
                             dtype=[('coords', float, 2),
                                    ('state', float, stateVecLen),
                                    ('factors', float, factorVectLen),
                                    ('output', float, outputVecLen)])
        self.data['coords'] = self.sampler.data['coords']
        self.data['state'] = self.sampler.data['state']
        self.data['factors'] = self.sampler.data['factors']
        self.data['output'] = [
            self.getOutputVector(sample['output'])
            for sample in self.sampler.data
        ]

    def setTrainError(self, error):
        self.train_error = error

    def setValError(self, error):
        self.val_error = error

    def setEpochs(self, epochs):
        self.epochs = epochs

    def setValPercent(self, value=20):
        self.valPercent = value

    def setLRate(self, value=0.1):
        self.lrate = value

    def setMomentum(self, value=0.01):
        self.momentum = value

    def setContinueTrain(self, value=False):
        self.continueTrain = value

    def startTrain(self):
        self.train(self.epochs, self.valPercent, self.lrate, self.momentum,
                   self.continueTrain)

    def stopTrain(self):
        self.interrupted = True

    def train(self,
              epochs,
              valPercent=20,
              lrate=0.1,
              momentum=0.01,
              continue_train=False):
        '''Perform the training procedure on the MLP and save the best neural net
        @param epoch            Max iteration count.
        @param valPercent       Percent of the validation set.
        @param lrate            Learning rate.
        @param momentum         Learning momentum.
        @param continue_train   If False then it is new training cycle, reset weights training and validation error. If True, then continue training.
        '''
        try:
            samples_count = len(self.data)
            val_sampl_count = samples_count * valPercent / 100
            apply_validation = True if val_sampl_count > 0 else False  # Use or not use validation set
            train_sampl_count = samples_count - val_sampl_count

            # Set first train_sampl_count as training set, the other as validation set
            train_indexes = (0, train_sampl_count)
            val_indexes = (train_sampl_count,
                           samples_count) if apply_validation else None

            if not continue_train: self.resetMlp()
            self.minValError = self.getValError(
            )  # The minimum error that is achieved on the validation set
            last_train_err = self.getTrainError()
            best_weights = self.copyWeights(
            )  # The MLP weights when minimum error that is achieved on the validation set

            self.rangeChanged.emit(self.tr("Train model %p%"), epochs)
            for epoch in range(epochs):
                self.trainEpoch(train_indexes, lrate, momentum)
                self.computePerformance(train_indexes, val_indexes)
                self.updateGraph.emit(self.getTrainError(), self.getValError())
                self.updateDeltaRMS.emit(self.getMinValError() -
                                         self.getValError())
                self.updateKappa.emit(self.getKappa())

                QCoreApplication.processEvents()
                if self.interrupted:
                    self.processInterrupted.emit()
                    break

                last_train_err = self.getTrainError()
                self.setTrainError(last_train_err)
                if apply_validation and (self.getValError() <
                                         self.getMinValError()):
                    self.minValError = self.getValError()
                    best_weights = self.copyWeights()
                    self.updateMinValErr.emit(self.getMinValError())
                self.updateProgress.emit()

            self.setMlpWeights(best_weights)
        except MemoryError:
            self.errorReport.emit(
                self.tr("The system out of memory during ANN training"))
            raise
        except:
            self.errorReport.emit(
                self.tr("An unknown error occurs during ANN trainig"))
            raise
        finally:
            self.processFinished.emit()

    def trainEpoch(self, train_indexes, lrate=0.1, momentum=0.01):
        '''Perform a training epoch on the MLP
        @param train_ind        Tuple of the min&max indexes of training samples in the samples data.
        @param val_ind          Tuple of the min&max indexes of validation samples in the samples data.
        @param lrate            Learning rate.
        @param momentum         Learning momentum.
        '''
        train_sampl = train_indexes[1] - train_indexes[0]

        for i in range(train_sampl):
            n = np.random.randint(*train_indexes)
            sample = self.data[n]
            input = np.hstack((sample['state'], sample['factors']))
            self.getOutput(input)  # Forward propagation
            self.MLP.propagate_backward(sample['output'], lrate, momentum)

Exemple #12

0

Afficher le fichier

Fichier : test_sampler.py Projet : asiaairsurvey/molusce

    def test_setTrainingData(self):
        smp = Sampler(self.output, self.factors,  self.output, ns=0)
        smp.setTrainingData(self.output, self.factors, self.output, shuffle=False)
        
        data = np.array(
            [
                (1.0, 1.0, 1.0),
                (2.0, 1.0, 2.0),
                (1.0, 3.0, 1.0),
                (1.0, 3.0, 1.0),
                (2.0, 2.0, 2.0),
                (1.0, 1.0, 1.0),
                (0.0, 0.0, 0.0),
                (1.0, 3.0, 1.0),
                (2.0, 1.0, 2.0),
            ], 
            dtype=[('state', float, (1,)), ('factors', float, (1,)), ('output', float, 1)]
        )
        for i in range(len(data)):
            assert_array_equal(data[i]['factors'], smp.data[i]['factors'])
            assert_array_equal(data[i]['output'], smp.data[i]['output'])
            assert_array_equal(data[i]['state'],  smp.data[i]['state'])

        # two factor_rasters
        smp = Sampler(self.output, self.factors2, self.output, ns=1)
        smp.setTrainingData(self.output, self.factors2, self.output)
        
        data = np.array(
            [
                ([1,2,1,   1,2,1,  0,1,2], 
                 [ 1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1., 
                            1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1.], 
                 2.0)
            ],
            dtype=[('state', float, (9,)), ('factors', float, (18,)), ('output', float, 1)]
        )
        assert_array_equal(data[0]['factors'], smp.data[0]['factors'])
        assert_array_equal(data[0]['output'], smp.data[0]['output'])
        assert_array_equal(data[0]['state'],  smp.data[0]['state'])
        
        # Multiband factors
        smp = Sampler(self.output, self.factors3, self.output, ns=1)
        smp.setTrainingData(self.output, self.factors3, self.output)
        
        data = np.array(
            [
                ([1,2,1,   1,2,1,  0,1,2], 
                 [ 1.,  2.,  1.,  1.,  2.,  1.,  0.,  1.,  2.,  
                                1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1.], 
                 2.0)
            ],
            dtype=[('state', float, (9,)), ('factors', float, (18,)), ('output', float, 1)]
        )
        
        assert_array_equal(data[0]['factors'], smp.data[0]['factors'])
        assert_array_equal(data[0]['output'], smp.data[0]['output'])
        assert_array_equal(data[0]['state'],  smp.data[0]['state'])

        # Several factor bands, several factor rasters
        smp = Sampler(self.output, self.factors4, self.output, ns=1)
        smp.setTrainingData(self.output, self.factors4, self.output)
        data = np.array(
            [
                ([1,2,1,   1,2,1,  0,1,2], 
                 [ 1.,  2.,  1.,  1.,  2.,  1.,  0.,  1.,  2.,  
                                1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1.,
                                     1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1.], 
                 2.0)
            ],
            dtype=[('state', float, (9,)), ('factors', float, (27,)), ('output', float, 1)]
        )
        assert_array_equal(data[0]['factors'], smp.data[0]['factors'])
        assert_array_equal(data[0]['output'], smp.data[0]['output'])
        assert_array_equal(data[0]['state'],  smp.data[0]['state'])
        
        # Mode = Normal
        # As the Multiband factors example, but 10 samples:
        data = np.array(
            [
                ([1,2,1,   1,2,1,  0,1,2], 
                 [ 1.,  2.,  1.,  1.,  2.,  1.,  0.,  1.,  2.,  
                                1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1.], 
                 2.0)
            ],
            dtype=[('state', float, (9,)), ('factors', float, (18,)), ('output', float, 1)]
        )
        smp = Sampler(self.output, self.factors3, self.output, ns=1)
        smp.setTrainingData(self.output, self.factors3, self.output, mode='Normal', samples=10)
        for i in range(10):
            assert_array_equal(data[0]['factors'], smp.data[i]['factors'])
            assert_array_equal(data[0]['output'], smp.data[i]['output'])
            assert_array_equal(data[0]['state'],  smp.data[i]['state'])
            
        # Mode = Balanced
        smp = Sampler(self.output, self.factors, self.output, ns=0)
        smp.setTrainingData(self.output, self.factors, self.output, mode='Balanced', samples=15)
        out =  smp.data['output']
        out.sort()
        self.assertEqual(out[0],  0)
        self.assertEqual(out[4],  0)
        self.assertEqual(out[5],  1)
        self.assertEqual(out[9],  1)
        self.assertEqual(out[10], 2)

Exemple #13

0

Afficher le fichier

Fichier : lr.py Projet : nextgis/molusce

class LR(QObject):
    """
    Implements Logistic Regression model definition and calibration
    (maximum liklihood parameter estimation).
    """

    rangeChanged = pyqtSignal(str, int)
    updateProgress = pyqtSignal()
    processFinished = pyqtSignal()
    samplingFinished = pyqtSignal()
    finished = pyqtSignal()
    logMessage = pyqtSignal(str)
    errorReport = pyqtSignal(str)

    def __init__(self, ns=0, logreg=None):

        QObject.__init__(self)

        if logreg:
            self.logreg = logreg
        else:
            self.logreg = mlr.MLR()

        self.state = None
        self.factors = None
        self.output = None
        self.mode = "All"
        self.samples = None
        self.catlist = None

        self.ns = ns            # Neighbourhood size of training rasters.
        self.data = None        # Training data
        self.maxiter = 100      # Maximum of fitting iterations

        self.sampler = None     # Sampler

        # Results of the LR prediction
        self.prediction = None  # Raster of the LR prediction results
        self.confidence = None  # Raster of the LR results confidence (1 = the maximum confidence, 0 = the least confidence)
        self.Kappa      = 0     # Kappa value
        self.pseudoR    = 0     # Pseudo R-squared (Count) (http://www.ats.ucla.edu/stat/mult_pkg/faq/general/Psuedo_RSquareds.htm)
        self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...}

    def getCoef(self):
        return self.logreg.get_weights().T

    def getConfidence(self):
        return self.confidence

    def getIntercept(self):
        return self.logreg.get_intercept()

    def getKappa(self):
        return self.Kappa

    def getStdErrIntercept(self):
        X = np.column_stack( (self.data['state'], self.data['factors']) )
        return self.logreg.get_stderr_intercept(X)

    def getStdErrWeights(self):
        X = np.column_stack( (self.data['state'], self.data['factors']) )
        return self.logreg.get_stderr_weights(X).T

    def get_PvalIntercept(self):
        X = np.column_stack( (self.data['state'], self.data['factors']) )
        return self.logreg.get_pval_intercept(X)

    def get_PvalWeights(self):
        X = np.column_stack( (self.data['state'], self.data['factors']) )
        return self.logreg.get_pval_weights(X).T

    def getPrediction(self, state, factors, calcTransitions=False):
        self._predict(state, factors, calcTransitions)
        return self.prediction

    def getPseudoR(self):
        return self.pseudoR

    def getTransitionPotentials(self):
        return self.transitionPotentials

    def _outputConfidence(self, input):
        '''
        Return confidence (difference between 2 biggest probabilities) of the LR output.
        1 = the maximum confidence, 0 = the least confidence
        '''
        out_scl = self.logreg.predict_proba(input)[0]
        # Calculate the confidence:
        out_scl.sort()
        return int(100 * (out_scl[-1] - out_scl[-2]) )

    def outputTransitions(self, input):
        '''
        Return transition potential of the outputs
        '''
        out_scl = self.logreg.predict_proba(input)[0]
        out_scl = [int(100 * x) for x in out_scl]
        result = {}
        for r, v in enumerate(out_scl):
            cat = self.catlist[r]
            result[cat] = v
        return result

    def _predict(self, state, factors, calcTransitions=False):
        '''
        Calculate output and confidence rasters using LR model and input rasters
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)
            geodata = state.getGeodata()
            rows, cols = geodata['ySize'], geodata['xSize']
            for r in factors:
                if not state.geoDataMatch(r):
                    raise LRError('Geometries of the input rasters are different!')

            self.transitionPotentials = None    # Reset tr.potentials if they exist

            # Normalize factors before prediction:
            for f in factors:
                f.normalize(mode = 'mean')

            predicted_band  = np.zeros([rows, cols], dtype=np.uint8)
            confidence_band = np.zeros([rows, cols], dtype=np.uint8)
            if calcTransitions:
                self.transitionPotentials = {}
                for cat in self.catlist:
                    self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8)

            self.sampler = Sampler(state, factors, ns=self.ns)
            mask = state.getBand(1).mask.copy()
            if mask.shape == ():
                mask = np.zeros([rows, cols], dtype=np.bool)
            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)
            for i in xrange(rows):
                for j in xrange(cols):
                    if not mask[i,j]:
                        input = self.sampler.get_inputs(state, i,j)
                        if input != None:
                            input = np.array([input])
                            out = self.logreg.predict(input)
                            predicted_band[i,j] = out
                            confidence = self._outputConfidence(input)
                            confidence_band[i, j] = confidence

                            if calcTransitions:
                                potentials = self.outputTransitions(input)
                                for cat in self.catlist:
                                    map = self.transitionPotentials[cat]
                                    map[i, j] = potentials[cat]
                        else: # Input sample is incomplete => mask this pixel
                            mask[i, j] = True
                self.updateProgress.emit()
            predicted_bands  = [np.ma.array(data = predicted_band,  mask = mask, dtype=np.uint8)]
            confidence_bands = [np.ma.array(data = confidence_band, mask = mask, dtype=np.uint8)]

            self.prediction = Raster()
            self.prediction.create(predicted_bands, geodata)
            self.confidence = Raster()
            self.confidence.create(confidence_bands, geodata)

            if calcTransitions:
                for cat in self.catlist:
                    band = [np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8)]
                    self.transitionPotentials[cat] = Raster()
                    self.transitionPotentials[cat].create(band, geodata)
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during LR prediction"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during LR prediction"))
            raise
        finally:
            self.processFinished.emit()

    def __propagateSamplerSignals(self):
        self.sampler.rangeChanged.connect(self.__samplerProgressRangeChanged)
        self.sampler.updateProgress.connect(self.__samplerProgressChanged)
        self.sampler.samplingFinished.connect(self.__samplerFinished)

    def __samplerFinished(self):
        self.sampler.rangeChanged.disconnect(self.__samplerProgressRangeChanged)
        self.sampler.updateProgress.disconnect(self.__samplerProgressChanged)
        self.sampler.samplingFinished.disconnect(self.__samplerFinished)
        self.samplingFinished.emit()

    def __samplerProgressRangeChanged(self, message, maxValue):
        self.rangeChanged.emit(message, maxValue)

    def __samplerProgressChanged(self):
        self.updateProgress.emit()

    def save(self):
        pass

    def saveSamples(self, fileName):
        self.sampler.saveSamples(fileName)

    def setMaxIter(self, maxiter):
        self.maxiter = maxiter

    def setTrainingData(self):
        state, factors, output, mode, samples = self.state, self.factors, self.output, self.mode, self.samples
        if not self.logreg:
            raise LRError('You must create a Logistic Regression model before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode = 'mean')

        self.sampler = Sampler(state, factors, output, ns=self.ns)
        self.__propagateSamplerSignals()
        self.sampler.setTrainingData(state, output, shuffle=False, mode=mode, samples=samples)

        outputVecLen  = self.sampler.outputVecLen
        stateVecLen   = self.sampler.stateVecLen
        factorVectLen = self.sampler.factorVectLen
        size = len(self.sampler.data)

        self.data = self.sampler.data
        self.catlist = np.unique(self.data['output'])

    def train(self):
        X = np.column_stack( (self.data['state'], self.data['factors']) )
        Y = self.data['output']
        self.labelCodes = np.unique(Y)
        self.logreg.fit(X, Y, maxiter=self.maxiter)
        out = self.logreg.predict(X)
        depCoef = DependenceCoef(np.ma.array(out), np.ma.array(Y), expand=True)
        self.Kappa = depCoef.kappa(mode=None)
        self.pseudoR = depCoef.correctness(percent = False)

    def setState(self, state):
        self.state = state

    def setFactors(self, factors):
        self.factors = factors

    def setOutput(self, output):
        self.output = output

    def setMode(self, mode):
        self.mode = mode

    def setSamples(self, samples):
        self.samples = samples

    def startTrain(self):
        try:
            self.setTrainingData()
            self.train()
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during LR training"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during LR trainig"))
            raise
        finally:
            self.finished.emit()

Exemple #14

0

Afficher le fichier

Fichier : lr.py Projet : nextgis/molusce

    def _predict(self, state, factors, calcTransitions=False):
        '''
        Calculate output and confidence rasters using LR model and input rasters
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)
            geodata = state.getGeodata()
            rows, cols = geodata['ySize'], geodata['xSize']
            for r in factors:
                if not state.geoDataMatch(r):
                    raise LRError('Geometries of the input rasters are different!')

            self.transitionPotentials = None    # Reset tr.potentials if they exist

            # Normalize factors before prediction:
            for f in factors:
                f.normalize(mode = 'mean')

            predicted_band  = np.zeros([rows, cols], dtype=np.uint8)
            confidence_band = np.zeros([rows, cols], dtype=np.uint8)
            if calcTransitions:
                self.transitionPotentials = {}
                for cat in self.catlist:
                    self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8)

            self.sampler = Sampler(state, factors, ns=self.ns)
            mask = state.getBand(1).mask.copy()
            if mask.shape == ():
                mask = np.zeros([rows, cols], dtype=np.bool)
            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)
            for i in xrange(rows):
                for j in xrange(cols):
                    if not mask[i,j]:
                        input = self.sampler.get_inputs(state, i,j)
                        if input != None:
                            input = np.array([input])
                            out = self.logreg.predict(input)
                            predicted_band[i,j] = out
                            confidence = self._outputConfidence(input)
                            confidence_band[i, j] = confidence

                            if calcTransitions:
                                potentials = self.outputTransitions(input)
                                for cat in self.catlist:
                                    map = self.transitionPotentials[cat]
                                    map[i, j] = potentials[cat]
                        else: # Input sample is incomplete => mask this pixel
                            mask[i, j] = True
                self.updateProgress.emit()
            predicted_bands  = [np.ma.array(data = predicted_band,  mask = mask, dtype=np.uint8)]
            confidence_bands = [np.ma.array(data = confidence_band, mask = mask, dtype=np.uint8)]

            self.prediction = Raster()
            self.prediction.create(predicted_bands, geodata)
            self.confidence = Raster()
            self.confidence.create(confidence_bands, geodata)

            if calcTransitions:
                for cat in self.catlist:
                    band = [np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8)]
                    self.transitionPotentials[cat] = Raster()
                    self.transitionPotentials[cat].create(band, geodata)
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during LR prediction"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during LR prediction"))
            raise
        finally:
            self.processFinished.emit()

Exemple #15

0

Afficher le fichier

Fichier : test_sampler.py Projet : paladin74/molusce

    def test_setTrainingData(self):
        smp = Sampler(self.state, self.factors, self.output, ns=0)
        smp.setTrainingData(self.state, self.output, shuffle=False)

        data = np.array([
            ([0, 3], [0, 1], 1.0, 1.0),
            ([1, 3], [0, 0], 1.0, 2.0),
            ([2, 3], [0, 1], 3.0, 1.0),
            ([0, 2], [0, 1], 3.0, 1.0),
            ([1, 2], [0, 0], 2.0, 2.0),
            ([2, 2], [0, 1], 1.0, 1.0),
            ([0, 1], [1, 0], 0.0, 0.0),
            ([1, 1], [0, 1], 3.0, 1.0),
            ([2, 1], [0, 0], 1.0, 2.0),
        ],
                        dtype=[('coords', float, 2), ('state', float, (2, )),
                               ('factors', float, (1, )),
                               ('output', float, 1)])
        for i in range(len(data)):
            assert_array_equal(data[i]['coords'], smp.data[i]['coords'])
            assert_array_equal(data[i]['factors'], smp.data[i]['factors'])
            assert_array_equal(data[i]['output'], smp.data[i]['output'])
            assert_array_equal(data[i]['state'], smp.data[i]['state'])

        # two factor_rasters
        smp = Sampler(self.state, self.factors2, self.output, ns=1)
        smp.setTrainingData(self.state, self.output)

        # Check all except coords
        data = np.array(
            [(  #State categories: [1,2,1,   1,2,1,  0,1,2],
                [0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0],
                # Factors:
                [
                    1., 1., 3., 3., 2., 1., 0., 3., 1., 1., 1., 3., 3., 2., 1.,
                    0., 3., 1.
                ],
                # Output:
                2.0)],
            dtype=[('state', float, (18, )), ('factors', float, (18, )),
                   ('output', float, 1)])
        assert_array_equal(data[0]['factors'], smp.data[0]['factors'])
        assert_array_equal(data[0]['output'], smp.data[0]['output'])
        assert_array_equal(data[0]['state'], smp.data[0]['state'])

        # Multiband factors
        smp = Sampler(self.state, self.factors3, self.output, ns=1)
        smp.setTrainingData(self.state, self.output)

        # Check all except coords
        data = np.array(
            [([0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0], [
                1., 2., 1., 1., 2., 1., 0., 1., 2., 1., 1., 3., 3., 2., 1., 0.,
                3., 1.
            ], 2.0)],
            dtype=[('state', float, (18, )), ('factors', float, (18, )),
                   ('output', float, 1)])

        assert_array_equal(data[0]['factors'], smp.data[0]['factors'])
        assert_array_equal(data[0]['output'], smp.data[0]['output'])
        assert_array_equal(data[0]['state'], smp.data[0]['state'])

        # Several factor bands, several factor rasters
        smp = Sampler(self.state, self.factors4, self.output, ns=1)
        smp.setTrainingData(self.state, self.output)
        # Check all except coords
        data = np.array([
            ([0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0], [
                1., 2., 1., 1., 2., 1., 0., 1., 2., 1., 1., 3., 3., 2., 1., 0.,
                3., 1., 1., 1., 3., 3., 2., 1., 0., 3., 1.
            ], 2.0)
        ],
                        dtype=[('state', float, (18, )),
                               ('factors', float, (27, )),
                               ('output', float, 1)])
        assert_array_equal(data[0]['factors'], smp.data[0]['factors'])
        assert_array_equal(data[0]['output'], smp.data[0]['output'])
        assert_array_equal(data[0]['state'], smp.data[0]['state'])

        # Mode = Random
        # As the Multiband factors example, but 10 samples:
        data = np.array(
            [([0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0], [
                1., 2., 1., 1., 2., 1., 0., 1., 2., 1., 1., 3., 3., 2., 1., 0.,
                3., 1.
            ], 2.0)],
            dtype=[('state', float, (18, )), ('factors', float, (18, )),
                   ('output', float, 1)])
        smp = Sampler(self.state, self.factors3, self.output, ns=1)
        smp.setTrainingData(self.state, self.output, mode='Random', samples=10)
        for i in range(10):
            assert_array_equal(data[0]['factors'], smp.data[i]['factors'])
            assert_array_equal(data[0]['output'], smp.data[i]['output'])
            assert_array_equal(data[0]['state'], smp.data[i]['state'])

        # Mode = Stratified
        smp = Sampler(self.state, self.factors, self.output, ns=0)
        smp.setTrainingData(self.state,
                            self.output,
                            mode='Stratified',
                            samples=15)
        out = smp.data['output']
        out.sort()
        self.assertEqual(out[0], 0)
        self.assertEqual(out[4], 0)
        self.assertEqual(out[5], 1)
        self.assertEqual(out[9], 1)
        self.assertEqual(out[10], 2)

Exemple #16

0

Afficher le fichier

Fichier : lr.py Projet : paladin74/molusce

    def _predict(self, state, factors, calcTransitions=False):
        '''
        Calculate output and confidence rasters using LR model and input rasters
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)
            geodata = state.getGeodata()
            rows, cols = geodata['ySize'], geodata['xSize']
            for r in factors:
                if not state.geoDataMatch(r):
                    raise LRError(
                        'Geometries of the input rasters are different!')

            self.transitionPotentials = None  # Reset tr.potentials if they exist

            # Normalize factors before prediction:
            for f in factors:
                f.normalize(mode='mean')

            predicted_band = np.zeros([rows, cols], dtype=np.uint8)
            confidence_band = np.zeros([rows, cols], dtype=np.uint8)
            if calcTransitions:
                self.transitionPotentials = {}
                for cat in self.catlist:
                    self.transitionPotentials[cat] = np.zeros([rows, cols],
                                                              dtype=np.uint8)

            self.sampler = Sampler(state, factors, ns=self.ns)
            mask = state.getBand(1).mask.copy()
            if mask.shape == ():
                mask = np.zeros([rows, cols], dtype=np.bool)
            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)
            for i in xrange(rows):
                for j in xrange(cols):
                    if not mask[i, j]:
                        input = self.sampler.get_inputs(state, i, j)
                        if input != None:
                            input = np.array([input])
                            out = self.logreg.predict(input)
                            predicted_band[i, j] = out
                            confidence = self._outputConfidence(input)
                            confidence_band[i, j] = confidence

                            if calcTransitions:
                                potentials = self.outputTransitions(input)
                                for cat in self.catlist:
                                    map = self.transitionPotentials[cat]
                                    map[i, j] = potentials[cat]
                        else:  # Input sample is incomplete => mask this pixel
                            mask[i, j] = True
                self.updateProgress.emit()
            predicted_bands = [
                np.ma.array(data=predicted_band, mask=mask, dtype=np.uint8)
            ]
            confidence_bands = [
                np.ma.array(data=confidence_band, mask=mask, dtype=np.uint8)
            ]

            self.prediction = Raster()
            self.prediction.create(predicted_bands, geodata)
            self.confidence = Raster()
            self.confidence.create(confidence_bands, geodata)

            if calcTransitions:
                for cat in self.catlist:
                    band = [
                        np.ma.array(data=self.transitionPotentials[cat],
                                    mask=mask,
                                    dtype=np.uint8)
                    ]
                    self.transitionPotentials[cat] = Raster()
                    self.transitionPotentials[cat].create(band, geodata)
        except MemoryError:
            self.errorReport.emit(
                self.tr("The system out of memory during LR prediction"))
            raise
        except:
            self.errorReport.emit(
                self.tr("An unknown error occurs during LR prediction"))
            raise
        finally:
            self.processFinished.emit()

Exemple #17

0

Afficher le fichier

Fichier : lr.py Projet : paladin74/molusce

class LR(QObject):
    """
    Implements Logistic Regression model definition and calibration
    (maximum liklihood parameter estimation).
    """

    rangeChanged = pyqtSignal(str, int)
    updateProgress = pyqtSignal()
    processFinished = pyqtSignal()
    samplingFinished = pyqtSignal()
    finished = pyqtSignal()
    logMessage = pyqtSignal(str)
    errorReport = pyqtSignal(str)

    def __init__(self, ns=0, logreg=None):

        QObject.__init__(self)

        if logreg:
            self.logreg = logreg
        else:
            self.logreg = mlr.MLR()

        self.state = None
        self.factors = None
        self.output = None
        self.mode = "All"
        self.samples = None
        self.catlist = None

        self.ns = ns  # Neighbourhood size of training rasters.
        self.data = None  # Training data
        self.maxiter = 100  # Maximum of fitting iterations

        self.sampler = None  # Sampler

        # Results of the LR prediction
        self.prediction = None  # Raster of the LR prediction results
        self.confidence = None  # Raster of the LR results confidence (1 = the maximum confidence, 0 = the least confidence)
        self.Kappa = 0  # Kappa value
        self.pseudoR = 0  # Pseudo R-squared (Count) (http://www.ats.ucla.edu/stat/mult_pkg/faq/general/Psuedo_RSquareds.htm)
        self.transitionPotentials = None  # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...}

    def getCoef(self):
        return self.logreg.get_weights().T

    def getConfidence(self):
        return self.confidence

    def getIntercept(self):
        return self.logreg.get_intercept()

    def getKappa(self):
        return self.Kappa

    def getStdErrIntercept(self):
        X = np.column_stack((self.data['state'], self.data['factors']))
        return self.logreg.get_stderr_intercept(X)

    def getStdErrWeights(self):
        X = np.column_stack((self.data['state'], self.data['factors']))
        return self.logreg.get_stderr_weights(X).T

    def get_PvalIntercept(self):
        X = np.column_stack((self.data['state'], self.data['factors']))
        return self.logreg.get_pval_intercept(X)

    def get_PvalWeights(self):
        X = np.column_stack((self.data['state'], self.data['factors']))
        return self.logreg.get_pval_weights(X).T

    def getPrediction(self, state, factors, calcTransitions=False):
        self._predict(state, factors, calcTransitions)
        return self.prediction

    def getPseudoR(self):
        return self.pseudoR

    def getTransitionPotentials(self):
        return self.transitionPotentials

    def _outputConfidence(self, input):
        '''
        Return confidence (difference between 2 biggest probabilities) of the LR output.
        1 = the maximum confidence, 0 = the least confidence
        '''
        out_scl = self.logreg.predict_proba(input)[0]
        # Calculate the confidence:
        out_scl.sort()
        return int(100 * (out_scl[-1] - out_scl[-2]))

    def outputTransitions(self, input):
        '''
        Return transition potential of the outputs
        '''
        out_scl = self.logreg.predict_proba(input)[0]
        out_scl = [int(100 * x) for x in out_scl]
        result = {}
        for r, v in enumerate(out_scl):
            cat = self.catlist[r]
            result[cat] = v
        return result

    def _predict(self, state, factors, calcTransitions=False):
        '''
        Calculate output and confidence rasters using LR model and input rasters
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)
            geodata = state.getGeodata()
            rows, cols = geodata['ySize'], geodata['xSize']
            for r in factors:
                if not state.geoDataMatch(r):
                    raise LRError(
                        'Geometries of the input rasters are different!')

            self.transitionPotentials = None  # Reset tr.potentials if they exist

            # Normalize factors before prediction:
            for f in factors:
                f.normalize(mode='mean')

            predicted_band = np.zeros([rows, cols], dtype=np.uint8)
            confidence_band = np.zeros([rows, cols], dtype=np.uint8)
            if calcTransitions:
                self.transitionPotentials = {}
                for cat in self.catlist:
                    self.transitionPotentials[cat] = np.zeros([rows, cols],
                                                              dtype=np.uint8)

            self.sampler = Sampler(state, factors, ns=self.ns)
            mask = state.getBand(1).mask.copy()
            if mask.shape == ():
                mask = np.zeros([rows, cols], dtype=np.bool)
            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)
            for i in xrange(rows):
                for j in xrange(cols):
                    if not mask[i, j]:
                        input = self.sampler.get_inputs(state, i, j)
                        if input != None:
                            input = np.array([input])
                            out = self.logreg.predict(input)
                            predicted_band[i, j] = out
                            confidence = self._outputConfidence(input)
                            confidence_band[i, j] = confidence

                            if calcTransitions:
                                potentials = self.outputTransitions(input)
                                for cat in self.catlist:
                                    map = self.transitionPotentials[cat]
                                    map[i, j] = potentials[cat]
                        else:  # Input sample is incomplete => mask this pixel
                            mask[i, j] = True
                self.updateProgress.emit()
            predicted_bands = [
                np.ma.array(data=predicted_band, mask=mask, dtype=np.uint8)
            ]
            confidence_bands = [
                np.ma.array(data=confidence_band, mask=mask, dtype=np.uint8)
            ]

            self.prediction = Raster()
            self.prediction.create(predicted_bands, geodata)
            self.confidence = Raster()
            self.confidence.create(confidence_bands, geodata)

            if calcTransitions:
                for cat in self.catlist:
                    band = [
                        np.ma.array(data=self.transitionPotentials[cat],
                                    mask=mask,
                                    dtype=np.uint8)
                    ]
                    self.transitionPotentials[cat] = Raster()
                    self.transitionPotentials[cat].create(band, geodata)
        except MemoryError:
            self.errorReport.emit(
                self.tr("The system out of memory during LR prediction"))
            raise
        except:
            self.errorReport.emit(
                self.tr("An unknown error occurs during LR prediction"))
            raise
        finally:
            self.processFinished.emit()

    def __propagateSamplerSignals(self):
        self.sampler.rangeChanged.connect(self.__samplerProgressRangeChanged)
        self.sampler.updateProgress.connect(self.__samplerProgressChanged)
        self.sampler.samplingFinished.connect(self.__samplerFinished)

    def __samplerFinished(self):
        self.sampler.rangeChanged.disconnect(
            self.__samplerProgressRangeChanged)
        self.sampler.updateProgress.disconnect(self.__samplerProgressChanged)
        self.sampler.samplingFinished.disconnect(self.__samplerFinished)
        self.samplingFinished.emit()

    def __samplerProgressRangeChanged(self, message, maxValue):
        self.rangeChanged.emit(message, maxValue)

    def __samplerProgressChanged(self):
        self.updateProgress.emit()

    def save(self):
        pass

    def saveSamples(self, fileName):
        self.sampler.saveSamples(fileName)

    def setMaxIter(self, maxiter):
        self.maxiter = maxiter

    def setTrainingData(self):
        state, factors, output, mode, samples = self.state, self.factors, self.output, self.mode, self.samples
        if not self.logreg:
            raise LRError(
                'You must create a Logistic Regression model before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode='mean')

        self.sampler = Sampler(state, factors, output, ns=self.ns)
        self.__propagateSamplerSignals()
        self.sampler.setTrainingData(state,
                                     output,
                                     shuffle=False,
                                     mode=mode,
                                     samples=samples)

        outputVecLen = self.sampler.outputVecLen
        stateVecLen = self.sampler.stateVecLen
        factorVectLen = self.sampler.factorVectLen
        size = len(self.sampler.data)

        self.data = self.sampler.data
        self.catlist = np.unique(self.data['output'])

    def train(self):
        X = np.column_stack((self.data['state'], self.data['factors']))
        Y = self.data['output']
        self.labelCodes = np.unique(Y)
        self.logreg.fit(X, Y, maxiter=self.maxiter)
        out = self.logreg.predict(X)
        depCoef = DependenceCoef(np.ma.array(out), np.ma.array(Y), expand=True)
        self.Kappa = depCoef.kappa(mode=None)
        self.pseudoR = depCoef.correctness(percent=False)

    def setState(self, state):
        self.state = state

    def setFactors(self, factors):
        self.factors = factors

    def setOutput(self, output):
        self.output = output

    def setMode(self, mode):
        self.mode = mode

    def setSamples(self, samples):
        self.samples = samples

    def startTrain(self):
        try:
            self.setTrainingData()
            self.train()
        except MemoryError:
            self.errorReport.emit(
                self.tr("The system out of memory during LR training"))
            raise
        except:
            self.errorReport.emit(
                self.tr("An unknown error occurs during LR trainig"))
            raise
        finally:
            self.finished.emit()

Exemple #18

0

Afficher le fichier

Fichier : test_sampler.py Projet : nextgis/molusce

    def test_setTrainingData(self):
        smp = Sampler(self.state, self.factors,  self.output, ns=0)
        smp.setTrainingData(self.state, self.output, shuffle=False)

        data = np.array(
            [
                ([0,3], [0, 1], 1.0, 1.0),
                ([1,3], [0, 0], 1.0, 2.0),
                ([2,3], [0, 1], 3.0, 1.0),
                ([0,2], [0, 1], 3.0, 1.0),
                ([1,2], [0, 0], 2.0, 2.0),
                ([2,2], [0, 1], 1.0, 1.0),
                ([0,1], [1, 0], 0.0, 0.0),
                ([1,1], [0, 1], 3.0, 1.0),
                ([2,1], [0, 0], 1.0, 2.0),
            ],
            dtype=[('coords', float, 2), ('state', float, (2,)), ('factors', float, (1,)), ('output', float, 1)]
        )
        for i in range(len(data)):
            assert_array_equal(data[i]['coords'], smp.data[i]['coords'])
            assert_array_equal(data[i]['factors'], smp.data[i]['factors'])
            assert_array_equal(data[i]['output'], smp.data[i]['output'])
            assert_array_equal(data[i]['state'],  smp.data[i]['state'])

        # two factor_rasters
        smp = Sampler(self.state, self.factors2, self.output, ns=1)
        smp.setTrainingData(self.state, self.output)

        # Check all except coords
        data = np.array(
            [
                (#State categories: [1,2,1,   1,2,1,  0,1,2],
                 [0,1,  0,0,  0,1,   0,1,  0,0,  0,1,   1,0,  0,1,  0,0],
                 # Factors:
                 [ 1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1.,
                            1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1.],
                 # Output:
                 2.0)
            ],
            dtype=[('state', float, (18,)), ('factors', float, (18,)), ('output', float, 1)]
        )
        assert_array_equal(data[0]['factors'], smp.data[0]['factors'])
        assert_array_equal(data[0]['output'], smp.data[0]['output'])
        assert_array_equal(data[0]['state'],  smp.data[0]['state'])

        # Multiband factors
        smp = Sampler(self.state, self.factors3, self.output, ns=1)
        smp.setTrainingData(self.state, self.output)

        # Check all except coords
        data = np.array(
            [
                ([0,1,  0,0,  0,1,   0,1,  0,0,  0,1,   1,0,  0,1,  0,0],
                 [ 1.,  2.,  1.,  1.,  2.,  1.,  0.,  1.,  2.,
                                1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1.],
                 2.0)
            ],
            dtype=[('state', float, (18,)), ('factors', float, (18,)), ('output', float, 1)]
        )

        assert_array_equal(data[0]['factors'], smp.data[0]['factors'])
        assert_array_equal(data[0]['output'], smp.data[0]['output'])
        assert_array_equal(data[0]['state'],  smp.data[0]['state'])

        # Several factor bands, several factor rasters
        smp = Sampler(self.state, self.factors4, self.output, ns=1)
        smp.setTrainingData(self.state, self.output)
        # Check all except coords
        data = np.array(
            [
                ([0,1,  0,0,  0,1,   0,1,  0,0,  0,1,   1,0,  0,1,  0,0],
                 [ 1.,  2.,  1.,  1.,  2.,  1.,  0.,  1.,  2.,
                                1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1.,
                                     1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1.],
                 2.0)
            ],
            dtype=[('state', float, (18,)), ('factors', float, (27,)), ('output', float, 1)]
        )
        assert_array_equal(data[0]['factors'], smp.data[0]['factors'])
        assert_array_equal(data[0]['output'], smp.data[0]['output'])
        assert_array_equal(data[0]['state'],  smp.data[0]['state'])

        # Mode = Random
        # As the Multiband factors example, but 10 samples:
        data = np.array(
            [
                ([0,1,  0,0,  0,1,   0,1,  0,0,  0,1,   1,0,  0,1,  0,0],
                 [ 1.,  2.,  1.,  1.,  2.,  1.,  0.,  1.,  2.,
                                1.,  1.,  3.,  3.,  2.,  1.,  0.,  3.,  1.],
                 2.0)
            ],
            dtype=[('state', float, (18,)), ('factors', float, (18,)), ('output', float, 1)]
        )
        smp = Sampler(self.state, self.factors3, self.output, ns=1)
        smp.setTrainingData(self.state, self.output, mode='Random', samples=10)
        for i in range(10):
            assert_array_equal(data[0]['factors'], smp.data[i]['factors'])
            assert_array_equal(data[0]['output'], smp.data[i]['output'])
            assert_array_equal(data[0]['state'],  smp.data[i]['state'])

        # Mode = Stratified
        smp = Sampler(self.state, self.factors, self.output, ns=0)
        smp.setTrainingData(self.state, self.output, mode='Stratified', samples=15)
        out =  smp.data['output']
        out.sort()
        self.assertEqual(out[0],  0)
        self.assertEqual(out[4],  0)
        self.assertEqual(out[5],  1)
        self.assertEqual(out[9],  1)
        self.assertEqual(out[10], 2)

Exemple #19

0

Afficher le fichier

Fichier : manager.py Projet : nextgis/molusce

class MlpManager(QObject):
    '''This class gets the data extracted from the UI and
    pass it to multi-layer perceptron, then gets and stores the result.
    '''

    updateGraph = pyqtSignal(float, float)      # Train error, val. error
    updateMinValErr = pyqtSignal(float)         # Min validation error
    updateDeltaRMS  = pyqtSignal(float)         # Delta of RMS: min(valError) - currentValError
    updateKappa     = pyqtSignal(float)         # Kappa value
    processFinished = pyqtSignal()
    processInterrupted = pyqtSignal()
    logMessage = pyqtSignal(str)
    errorReport = pyqtSignal(str)
    rangeChanged = pyqtSignal(str, int)
    updateProgress = pyqtSignal()

    def __init__(self, ns=0, MLP=None):

        QObject.__init__(self)

        self.MLP = MLP
        self.interrupted = False

        self.layers = None
        if self.MLP:
            self.layers = self.getMlpTopology()

        self.ns = ns            # Neighbourhood size of training rasters.
        self.data = None        # Training data
        self.catlist     = None # List of unique output values of the output raster
        self.train_error = None # Error on training set
        self.val_error   = None # Error on validation set
        self.minValError = None # The minimum error that is achieved on the validation set
        self.valKappa    = 0     # Kappa on on the validation set
        self.sampler     = None # Sampler

        # Results of the MLP prediction
        self.prediction = None  # Raster of the MLP prediction results
        self.confidence = None  # Raster of the MLP results confidence (1 = the maximum confidence, 0 = the least confidence)
        self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...}

        # Outputs of the activation function for small and big numbers
        self.sigmax, self.sigmin = sigmoid(100), sigmoid(-100)  # Max and Min of the sigmoid function
        self.sigrange = self.sigmax - self.sigmin               # Range of the sigmoid

    def computeMlpError(self, sample):
        '''Get MLP error on the sample'''
        input = np.hstack( (sample['state'], sample['factors']) )
        out = self.getOutput( input )
        err = ((sample['output'] - out)**2).sum()/len(out)
        return err

    def computePerformance(self, train_indexes, val_ind):
        '''Check errors of training and validation sets
        @param train_indexes     Tuple that contains indexes of the first and last elements of the training set.
        @param val_ind           Tuple that contains indexes of the first and last elements of the validation set.
        '''
        train_error = 0
        train_sampl = train_indexes[1] - train_indexes[0]       # Count of training samples
        for i in range(train_indexes[0], train_indexes[1]):
            train_error = train_error + self.computeMlpError(sample = self.data[i])
        self.setTrainError(train_error/train_sampl)

        if val_ind:
            val_error = 0
            val_sampl = val_ind[1] - val_ind[0]
            answers   = np.ma.zeros(val_sampl)
            out       = np.ma.zeros(val_sampl)
            for i in xrange(val_ind[0], val_ind[1]):
                sample = self.data[i]
                val_error = val_error + self.computeMlpError(sample = self.data[i])

                input = np.hstack( (sample['state'],sample['factors']) )
                output = self.getOutput(input)
                out[i-val_ind[0]]     = self.outCategory(output)
                answers[i-val_ind[0]] = self.outCategory(sample['output'])
            self.setValError(val_error/val_sampl)
            depCoef = DependenceCoef(out, answers, expand=True)
            self.valKappa = depCoef.kappa(mode=None)

    def copyWeights(self):
        '''Deep copy of the MLP weights'''
        return copy.deepcopy(self.MLP.weights)

    def createMlp(self, state, factors, output, hidden_layers):
        '''
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        @param hidden_layers    List of neuron counts in hidden layers.
        @param ns               Neighbourhood size.
        '''

        if output.getBandsCount() != 1:
            raise MlpManagerError('Output layer must have one band!')

        input_neurons = 0
        for raster in factors:
            input_neurons = input_neurons+ raster.getNeighbourhoodSize(self.ns)

        # state raster contains categories. We need use n-1 dummy variables (where n = number of categories)
        input_neurons = input_neurons + (len(state.getBandGradation(1))-1) * state.getNeighbourhoodSize(self.ns)

        # Output category's (neuron) list and count
        self.catlist = output.getBandGradation(1)
        categories = len(self.catlist)

        # set neuron counts in the MLP layers
        self.layers = hidden_layers
        self.layers.insert(0, input_neurons)
        self.layers.append(categories)

        self.MLP = MLP(*self.layers)

    def getConfidence(self):
        return self.confidence

    def getInputVectLen(self):
        '''Length of input data vector of the MLP'''
        shape = self.getMlpTopology()
        return shape[0]

    def getOutput(self, input_vector):
        out = self.MLP.propagate_forward( input_vector )
        return out

    def getOutputVectLen(self):
        '''Length of input data vector of the MLP'''
        shape = self.getMlpTopology()
        return shape[-1]

    def getOutputVector(self, val):
        '''Convert a number val into vector,
        for example, let self.catlist = [1, 3, 4] then
        if val = 1, result = [ 1, -1, -1]
        if val = 3, result = [-1,  1, -1]
        if val = 4, result = [-1, -1,  1]
        where -1 is minimum of the sigmoid, 1 is max of the sigmoid
        '''
        size = self.getOutputVectLen()
        res = np.ones(size) * (self.sigmin)
        ind = np.where(self.catlist==val)
        res[ind] = self.sigmax
        return res

    def getMinValError(self):
        return self.minValError

    def getMlpTopology(self):
        return self.MLP.shape

    def getKappa(self):
        return self.valKappa

    def getPrediction(self, state, factors, calcTransitions=False):
        self._predict(state, factors, calcTransitions)
        return self.prediction

    def getTrainError(self):
        return self.train_error

    def getTransitionPotentials(self):
        return self.transitionPotentials

    def getValError(self):
        return self.val_error

    def outCategory(self, out_vector):
        # Get index of the biggest output value as the result
        biggest = max(out_vector)
        res = list(out_vector).index(biggest)
        res = self.catlist[res]
        return res

    def outputConfidence(self, output, scale=True):
        '''
        Return confidence (difference between 2 biggest values) of the MLP output.
        @param output: The confidence
        @param scale: If True, then scale the confidence to int [0, 1, ..., 100] percent
        '''
        out_scl = self.scaleOutput(output, percent=scale)
        out_scl.sort()
        return out_scl[-1] - out_scl[-2]

    def outputTransitions(self, output, scale=True):
        '''
        Return transition potencial of the outputs scaled to [0,1] or 1-100
        @param output: The output of MLP
        @param scale: If True, then scale the transitions to int ([0, 1, ..., 100]) percent
        '''
        out_scl = self.scaleOutput(output, percent=scale)
        result = {}
        for r, v in enumerate(out_scl):
            cat = self.catlist[r]
            result[cat] = v
        return result

    def scaleOutput(self, output, percent=True):
        '''
        Scale the output to range [0,1] or 1-100
        @param output: Output of a MLP
        @param percent: If True, then scale the output to int [0, 1, ..., 100] percent
        '''
        res = 1.0 * (output - self.sigmin) / self.sigrange
        if percent:
            res = [ int(100 * x) for x in res]
        return res

    def _predict(self, state, factors, calcTransitions=False):
        '''
        Calculate output and confidence rasters using MLP model and input rasters
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)
            geodata = state.getGeodata()
            rows, cols = geodata['ySize'], geodata['xSize']
            for r in factors:
                if not state.geoDataMatch(r):
                    raise MlpManagerError('Geometries of the input rasters are different!')

            self.transitionPotentials = None    # Reset tr.potentials if they exist

            # Normalize factors before prediction:
            for f in factors:
                f.normalize(mode = 'mean')

            predicted_band  = np.zeros([rows, cols], dtype=np.uint8)
            confidence_band = np.zeros([rows, cols], dtype=np.uint8)
            if calcTransitions:
                self.transitionPotentials = {}
                for cat in self.catlist:
                    self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8)

            self.sampler = Sampler(state, factors, ns=self.ns)
            mask = state.getBand(1).mask.copy()
            if mask.shape == ():
                mask = np.zeros([rows, cols], dtype=np.bool)
            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)
            for i in xrange(rows):
                for j in xrange(cols):
                    if not mask[i,j]:
                        input = self.sampler.get_inputs(state, i,j)
                        if input != None:
                            out = self.getOutput(input)
                            res = self.outCategory(out)
                            predicted_band[i, j] = res

                            confidence = self.outputConfidence(out)
                            confidence_band[i, j] = confidence

                            if calcTransitions:
                                potentials = self.outputTransitions(out)
                                for cat in self.catlist:
                                    map = self.transitionPotentials[cat]
                                    map[i, j] = potentials[cat]
                        else: # Input sample is incomplete => mask this pixel
                            mask[i, j] = True
                self.updateProgress.emit()
            predicted_bands  = [np.ma.array(data = predicted_band,  mask = mask, dtype=np.uint8)]
            confidence_bands = [np.ma.array(data = confidence_band, mask = mask, dtype=np.uint8)]

            self.prediction = Raster()
            self.prediction.create(predicted_bands, geodata)
            self.confidence = Raster()
            self.confidence.create(confidence_bands, geodata)

            if calcTransitions:
                for cat in self.catlist:
                    band = [np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8)]
                    self.transitionPotentials[cat] = Raster()
                    self.transitionPotentials[cat].create(band, geodata)
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during ANN prediction"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during ANN prediction"))
            raise

    def readMlp(self):
        pass

    def resetErrors(self):
        self.val_error = np.finfo(np.float).max
        self.train_error = np.finfo(np.float).max

    def resetMlp(self):
        self.MLP.reset()
        self.resetErrors()

    def saveMlp(self):
        pass

    def saveSamples(self, fileName):
        self.sampler.saveSamples(fileName)

    def setMlpWeights(self, w):
        '''Set weights of the MLP'''
        self.MLP.weights = w

    def setTrainingData(self, state, factors, output, shuffle=True, mode='All', samples=None):
        '''
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        @param output           Raster that contains categories to predict.
        @param shuffle          Perform random shuffle.
        @param mode             Type of sampling method:
                                    All             Get all pixels
                                    Random          Get samples. Count of samples in the data=samples.
                                    Stratified      Undersampling of major categories and/or oversampling of minor categories.
        @samples                Sample count of the training data (doesn't used in 'All' mode).
        '''
        if not self.MLP:
            raise MlpManagerError('You must create a MLP before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode = 'mean')

        self.sampler = Sampler(state, factors, output, self.ns)
        self.sampler.setTrainingData(state=state, output=output, shuffle=shuffle, mode=mode, samples=samples)

        outputVecLen  = self.getOutputVectLen()
        stateVecLen   = self.sampler.stateVecLen
        factorVectLen = self.sampler.factorVectLen
        size = len(self.sampler.data)

        self.data = np.zeros(size, dtype=[('coords', float, 2), ('state', float, stateVecLen), ('factors',  float, factorVectLen), ('output', float, outputVecLen)])
        self.data['coords']   = self.sampler.data['coords']
        self.data['state']    = self.sampler.data['state']
        self.data['factors']  = self.sampler.data['factors']
        self.data['output']   = [self.getOutputVector(sample['output']) for sample in self.sampler.data]

    def setTrainError(self, error):
        self.train_error = error

    def setValError(self, error):
        self.val_error = error

    def setEpochs(self, epochs):
        self.epochs = epochs

    def setValPercent(self, value=20):
        self.valPercent = value

    def setLRate(self, value=0.1):
        self.lrate = value

    def setMomentum(self, value=0.01):
        self.momentum = value

    def setContinueTrain(self, value=False):
        self.continueTrain = value

    def startTrain(self):
        self.train(self.epochs, self.valPercent, self.lrate, self.momentum, self.continueTrain)

    def stopTrain(self):
        self.interrupted = True

    def train(self, epochs, valPercent=20, lrate=0.1, momentum=0.01, continue_train=False):
        '''Perform the training procedure on the MLP and save the best neural net
        @param epoch            Max iteration count.
        @param valPercent       Percent of the validation set.
        @param lrate            Learning rate.
        @param momentum         Learning momentum.
        @param continue_train   If False then it is new training cycle, reset weights training and validation error. If True, then continue training.
        '''
        try:
            samples_count = len(self.data)
            val_sampl_count = samples_count*valPercent/100
            apply_validation = True if val_sampl_count>0 else False # Use or not use validation set
            train_sampl_count = samples_count - val_sampl_count

            # Set first train_sampl_count as training set, the other as validation set
            train_indexes = (0, train_sampl_count)
            val_indexes = (train_sampl_count, samples_count) if apply_validation else None

            if not continue_train: self.resetMlp()
            self.minValError = self.getValError()  # The minimum error that is achieved on the validation set
            last_train_err = self.getTrainError()
            best_weights = self.copyWeights()   # The MLP weights when minimum error that is achieved on the validation set

            self.rangeChanged.emit(self.tr("Train model %p%"), epochs)
            for epoch in range(epochs):
                self.trainEpoch(train_indexes, lrate, momentum)
                self.computePerformance(train_indexes, val_indexes)
                self.updateGraph.emit(self.getTrainError(), self.getValError())
                self.updateDeltaRMS.emit(self.getMinValError() - self.getValError())
                self.updateKappa.emit(self.getKappa())

                QCoreApplication.processEvents()
                if self.interrupted:
                    self.processInterrupted.emit()
                    break

                last_train_err = self.getTrainError()
                self.setTrainError(last_train_err)
                if apply_validation and (self.getValError() < self.getMinValError()):
                    self.minValError = self.getValError()
                    best_weights = self.copyWeights()
                    self.updateMinValErr.emit(self.getMinValError())
                self.updateProgress.emit()

            self.setMlpWeights(best_weights)
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during ANN training"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during ANN trainig"))
            raise
        finally:
            self.processFinished.emit()

    def trainEpoch(self, train_indexes, lrate=0.1, momentum=0.01):
        '''Perform a training epoch on the MLP
        @param train_ind        Tuple of the min&max indexes of training samples in the samples data.
        @param val_ind          Tuple of the min&max indexes of validation samples in the samples data.
        @param lrate            Learning rate.
        @param momentum         Learning momentum.
        '''
        train_sampl = train_indexes[1] - train_indexes[0]

        for i in range(train_sampl):
            n = np.random.randint( *train_indexes )
            sample = self.data[n]
            input = np.hstack( (sample['state'],sample['factors']) )
            self.getOutput( input )     # Forward propagation
            self.MLP.propagate_backward( sample['output'], lrate, momentum )