Beispiel #1
0
    def sim(self):
        """
        Make 1 iteracion of simulation.
        """
        # TODO: eleminate AreaAnalyst.getChangeMap() from the process

        transition = self.crosstable.getCrosstable()

        prediction = self.getPrediction()
        state = self.getState()
        new_state = state.getBand(1).copy()  # New states (the result of simulation) will be stored there.
        analyst = AreaAnalyst(state, prediction)
        classes = analyst.classes
        changes = analyst.getChangeMap().getBand(1)

        # Make transition between classes according to
        # number of moved pixel in crosstable
        self.rangeChanged.emit(self.tr("Simulation process %p%"), len(classes) ** 2 - len(classes))
        for initClass in classes:
            for finalClass in classes:
                if initClass == finalClass:
                    continue

                # TODO: Calculate number of pixels to be moved via TransitoionMatrix and state raster
                n = transition.getTransition(
                    initClass, finalClass
                )  # Number of pixels to be moved (constant count now).
                # Find n appropriate places for transition initClass -> finalClass
                class_code = analyst.encode(initClass, finalClass)
                places = changes == class_code  # Array of places where transitions initClass -> finalClass are occured
                placesCount = np.sum(places)
                if placesCount < n:
                    self.logMessage.emit(
                        self.tr("There are more transitions in the transition matrix, then the model have found")
                    )
                    n = placesCount

                confidence = self.getConfidence().getBand(1)
                confidence = (
                    confidence * places
                )  # The higher is number in cell, the higer is probability of transition in the cell
                indices = []
                for i in range(n):
                    index = np.unravel_index(
                        confidence.argmax(), confidence.shape
                    )  # Select the cell with biggest probability
                    indices.append(index)
                    confidence[index] = -1  # Mark the cell to prevent second selection

                # Now "indices" contains indices of the appropriate places,
                # make transition initClass -> finalClass
                for index in indices:
                    new_state[index] = finalClass
                self.updateProgress.emit()

        result = Raster()
        result.create([new_state], state.getGeodata())
        self.state = result
        self.updatePrediction(result)
        self.processFinished.emit()
Beispiel #2
0
class TestSimulator(unittest.TestCase):
    def setUp(self):

        # Raster1:
        # ~ [1, 1, 3,],
        # ~ [3, 2, 1,],
        # ~ [0, 3, 1,]
        self.raster1 = Raster("../../examples/multifact.tif")
        self.raster1.resetMask([0])

        self.X = np.array([[1, 2, 3], [3, 2, 1], [0, 1, 1]])
        self.X = np.ma.array(self.X, mask=(self.X == 0))
        self.raster2 = Raster()
        self.raster2.create([self.X], self.raster1.getGeodata())

        self.aa = AreaAnalyst(self.raster1, self.raster2)

        self.crosstab = CrossTableManager(self.raster1, self.raster2)

        # Simple model
        self.model = Model(state=self.raster1)

    def test_compute_table(self):

        # print self.crosstab.getCrosstable().getCrosstable()
        # CrossTab:
        #  [[ 3.  1.  0.]
        #   [ 0.  1.  0.]
        #   [ 1.  0.  2.]]
        prediction = self.model.getPrediction(self.raster1)
        # print prediction.getBand(1)
        # prediction = [[1.0 1.0 6.0]
        #  [6.0 5.0 1.0]
        #  [-- 6.0 1.0]]
        # confidence = self.model.getConfidence()
        # print confidence.getBand(1)
        # confidence =     [[1.0 0.5  0.33]
        #  [0.5 0.33 0.25]
        #  [--  0.25 0.2]]
        result = np.array([[2.0, 1.0, 3.0], [1.0, 2.0, 1.0], [0, 3.0, 1.0]])
        result = np.ma.array(result, mask=(result == 0))

        simulator = Simulator(
            state=self.raster1, factors=None, model=self.model, crosstable=self.crosstab
        )  # The model does't use factors
        simulator.setIterationCount(1)
        simulator.simN()
        state = simulator.getState().getBand(1)
        assert_array_equal(result, state)

        result = np.array([[2.0, 1.0, 1.0], [2.0, 2.0, 1.0], [0, 3.0, 1.0]])
        result = np.ma.array(result, mask=(result == 0))

        simulator = Simulator(self.raster1, None, self.model, self.crosstab)
        simulator.setIterationCount(2)
        simulator.simN()
        state = simulator.getState().getBand(1)
        assert_array_equal(result, state)
Beispiel #3
0
    def train(self):
        """
        Train the model
        """
        self.transitionPotentials = {}
        try:
            iterCount = len(self.codes) * len(self.factors)
            self.rangeChanged.emit(self.tr("Training WoE... %p%"), iterCount)
            changeMap = self.changeMap.getBand(1)
            for code in self.codes:
                sites = binaryzation(changeMap, [code])
                # Reclass factors (continuous factor -> ordinal factor)
                wMap = np.ma.zeros(changeMap.shape)  # The map of summary weight of the all factors
                self.weights[code] = {}  # Dictionary for storing wheights of every raster's band
                for k in xrange(len(self.factors)):
                    fact = self.factors[k]
                    self.weights[code][k] = {}  # Weights of the factor
                    factorW = self.weights[code][k]
                    if self.bins:  # Get bins of the factor
                        bin = self.bins[k]
                        if (bin != None) and fact.getBandsCount() != len(bin):
                            raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!")
                    else:
                        bin = None
                    for i in range(1, fact.getBandsCount() + 1):
                        band = fact.getBand(i)
                        if bin and bin[i - 1]:  #
                            band = reclass(band, bin[i - 1])
                        band, sites = masks_identity(band, sites, dtype=np.uint8)  # Combine masks of the rasters
                        woeRes = woe(
                            band, sites, self.unit_cell
                        )  # WoE for the 'code' (initState->finalState) transition and current 'factor'.
                        weights = woeRes["map"]
                        wMap = wMap + weights
                        factorW[i] = woeRes["weights"]
                    self.updateProgress.emit()

                # Reclassification finished => set WoE coefficients
                self.woe[code] = wMap  # WoE for all factors and the transition code.

                # Potentials are WoE map rescaled to 0--100 percents
                band = (sigmoid(wMap) * 100).astype(np.uint8)
                p = Raster()
                p.create([band], self.geodata)
                self.transitionPotentials[code] = p
                gc.collect()
        except MemoryError:
            self.errorReport.emit("The system out of memory during WoE trainig")
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during WoE trainig"))
            raise
        finally:
            self.processFinished.emit()
Beispiel #4
0
 def errorMap(self, answer):
     '''
     Create map of correct and incorrect prediction.
     This function compares the known answer and the result of predicting procedure,
     correct pixel is marked as 0.
     '''
     state = self.getState()
     b = state.getBand(1)
     a = answer.getBand(1)
     diff = (a - b).astype(np.int16)
     result = Raster()
     result.create([diff], state.getGeodata())
     return result
Beispiel #5
0
 def errorMap(self, answer):
     '''
     Create map of correct and incorrect prediction.
     This function compares the known answer and the result of predicting procedure,
     correct pixel is marked as 0.
     '''
     state = self.getState()
     b = state.getBand(1)
     a = answer.getBand(1)
     diff = (a-b).astype(np.int16)
     result = Raster()
     result.create([diff], state.getGeodata())
     return result
Beispiel #6
0
    def train(self):
        '''
        Train the model
        '''
        self.transitionPotentials = {}
        try:
            iterCount = len(self.codes)*len(self.factors)
            self.rangeChanged.emit(self.tr("Training WoE... %p%"), iterCount)
            changeMap = self.changeMap.getBand(1)
            for code in self.codes:
                sites = binaryzation(changeMap, [code])
                # Reclass factors (continuous factor -> ordinal factor)
                wMap = np.ma.zeros(changeMap.shape) # The map of summary weight of the all factors
                self.weights[code] = {}             # Dictionary for storing wheights of every raster's band
                for k in xrange(len(self.factors)):
                    fact = self.factors[k]
                    self.weights[code][k] = {}      # Weights of the factor
                    factorW = self.weights[code][k]
                    if self.bins: # Get bins of the factor
                        bin = self.bins[k]
                        if (bin != None) and fact.getBandsCount() != len(bin):
                            raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!")
                    else: bin = None
                    for i in range(1, fact.getBandsCount()+1):
                        band = fact.getBand(i)
                        if bin and bin[i-1]: #
                            band = reclass(band, bin[i-1])
                        band, sites = masks_identity(band, sites, dtype=np.uint8)   # Combine masks of the rasters
                        woeRes = woe(band, sites, self.unit_cell)   # WoE for the 'code' (initState->finalState) transition and current 'factor'.
                        weights = woeRes['map']
                        wMap = wMap + weights
                        factorW[i] = woeRes['weights']
                    self.updateProgress.emit()

                # Reclassification finished => set WoE coefficients
                self.woe[code]=wMap             # WoE for all factors and the transition code.

                # Potentials are WoE map rescaled to 0--100 percents
                band = (sigmoid(wMap)*100).astype(np.uint8)
                p = Raster()
                p.create([band], self.geodata)
                self.transitionPotentials[code] = p
                gc.collect()
        except MemoryError:
            self.errorReport.emit('The system out of memory during WoE trainig')
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during WoE trainig"))
            raise
        finally:
            self.processFinished.emit()
Beispiel #7
0
    def test_WoeManager(self):
        aa = AreaAnalyst(self.sites, self.sites)
        w1 = WoeManager([self.factor], aa)
        p = w1.getPrediction(self.sites).getBand(1)
        assert_array_equal(p, self.sites.getBand(1))

        initState = Raster("../../examples/data.tif")
        finalState = Raster("../../examples/data1.tif")
        aa = AreaAnalyst(initState, finalState)
        w = WoeManager([initState], aa)
        p = w.getPrediction(initState).getBand(1)

        # Calculate by hands:
        # 1->1 transition raster:
        r11 = [[1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
        # 1->2 raster:
        r12 = [[0, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
        # 1->3 raster:
        r13 = [[0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
        # 2->1
        r21 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
        # 2->2
        r22 = [[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
        # 2->3
        r23 = [[0, 0, 0, 0], [0, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]
        # 3->1
        r31 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0]]
        # 3->2
        r32 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]]
        # 3->3
        r33 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0]]
        geodata = initState.getGeodata()
        sites = {"11": r11, "12": r12, "13": r13, "21": r21, "22": r22, "23": r23, "31": r31, "32": r32, "33": r33}
        woeDict = {}  # WoE of transitions
        for k in sites.keys():  #
            if k != "21":  # !!! r21 is zero
                x = Raster()
                x.create([np.ma.array(data=sites[k])], geodata)
                sites[k] = x
                woeDict[k] = woe(initState.getBand(1), x.getBand(1))
        # w1max = np.maximum(woeDict['11'], woeDict['12'], woeDict['13'])
        # w2max = np.maximum(woeDict['22'], woeDict['23'])
        # w3max = np.maximum(woeDict['31'], woeDict['32'], woeDict['33'])
        # Answer is index of finalClass that maximizes weights of transiotion initClass -> finalClass
        answer = [[1, 1, 1, 1], [1, 1, 3, 3], [3, 3, 3, 3], [1, 1, 1, 1]]
        assert_array_equal(p, answer)

        w = WoeManager([initState], aa, bins={0: [[2]]})
        p = w.getPrediction(initState).getBand(1)
Beispiel #8
0
 def makeChangeMap(self):
     f, s = self.first, self.second
     rows, cols = self.geodata['ySize'], self.geodata['xSize']
     band = np.zeros([rows, cols])
     self.rangeChanged.emit(self.tr("Creating change map %p%"), rows)
     for i in xrange(rows):
         for j in xrange(cols):
             if not f.mask[i,j]:
                 r = f[i,j]
                 c = s[i,j]
                 band[i, j] = self.encode(r, c)
         self.updateProgress.emit()
     bands = [np.ma.array(data = band, mask = f.mask)]
     raster = Raster()
     raster.create(bands, self.geodata)
     self.processFinished.emit(raster)
     self.changeMap = raster
Beispiel #9
0
class Model(object):
    """
    Simple predicting model for Simulator tests
    """

    def __init__(self, state):
        self._predict(state)

    def getConfidence(self):
        return self.confidence

    def getPrediction(self, state, factors=None, calcTransitions=False):
        self._predict(state, factors)
        return self.prediction

    def _predict(self, state, factors=None, calcTransitions=False):
        geodata = state.getGeodata()
        band = state.getBand(1)
        rows, cols = geodata["ySize"], geodata["xSize"]
        # Let the new state is: 1 -> 2, 2- >3, 3 -> 1, then
        # the prediction is 1->1, 2->5, 3->6

        predicted_band = np.copy(band)
        predicted_band[band == 1] = 1.0
        predicted_band[band == 2] = 5.0
        predicted_band[band == 3] = 6.0

        # Let the confidence is 1/(1+row+col), where row is row number of the cell, col is column number of the cell.
        confidence_band = np.zeros([rows, cols])
        for i in xrange(cols):
            for j in xrange(rows):
                confidence_band[i, j] = 1.0 / (1 + i + j)

        predicted_bands = [np.ma.array(data=predicted_band, mask=band.mask)]
        confidence_bands = [np.ma.array(data=confidence_band, mask=band.mask)]
        self.prediction = Raster()
        self.prediction.create(predicted_bands, state.geodata)
        self.confidence = Raster()
        self.confidence.create(confidence_bands, state.geodata)
Beispiel #10
0
class Model(object):
    '''
    Simple predicting model for Simulator tests
    '''
    def __init__(self, state):
        self.state = state
        self._predict(state)
    
    def getConfidence(self):
        return self.confidence
        
    def getPrediction(self, state, factors=None):
        self._predict(state, factors)
        return self.prediction
        
    def _predict(self, state, factors = None):
        geodata = self.state.getGeodata()
        band = state.getBand(1)
        rows, cols = geodata['ySize'], geodata['xSize']
        # Let the prediction is: 1 -> 2, 2- >3, 3 -> 1
        
        predicted_band  = np.copy(band)
        predicted_band[band == 1] = 2
        predicted_band[band == 2] = 3
        predicted_band[band == 3] = 1
        
        # Let the confidence is 1/(1+row+col), where row is row number of the cell, col is column number of the cell.
        confidence_band = np.zeros([rows, cols])
        for i in xrange(cols):
            for j in xrange(rows):
                confidence_band[i,j] = 1.0/(1+i+j)
        
        predicted_bands  = [np.ma.array(data = predicted_band, mask = band.mask)]
        confidence_bands = [np.ma.array(data = confidence_band, mask = band.mask)]
        self.prediction = Raster()
        self.prediction.create(predicted_bands, state.geodata)
        self.confidence = Raster()
        self.confidence.create(confidence_bands, state.geodata)
Beispiel #11
0
class Model(object):
    '''
    Simple predicting model for Simulator tests
    '''
    def __init__(self, state):
        self._predict(state)

    def getConfidence(self):
        return self.confidence

    def getPrediction(self, state, factors=None, calcTransitions=False):
        self._predict(state, factors)
        return self.prediction

    def _predict(self, state, factors = None, calcTransitions=False):
        geodata = state.getGeodata()
        band = state.getBand(1)
        rows, cols = geodata['ySize'], geodata['xSize']
        # Let the new state is: 1 -> 2, 2- >3, 3 -> 1, then
        # the prediction is 1->1, 2->5, 3->6

        predicted_band  = np.copy(band)
        predicted_band[band == 1] = 1.0
        predicted_band[band == 2] = 5.0
        predicted_band[band == 3] = 6.0

        # Let the confidence is 1/(1+row+col), where row is row number of the cell, col is column number of the cell.
        confidence_band = np.zeros([rows, cols])
        for i in xrange(cols):
            for j in xrange(rows):
                confidence_band[i,j] = 1.0/(1+i+j)

        predicted_bands  = [np.ma.array(data = predicted_band, mask = band.mask)]
        confidence_bands = [np.ma.array(data = confidence_band, mask = band.mask)]
        self.prediction = Raster()
        self.prediction.create(predicted_bands, state.geodata)
        self.confidence = Raster()
        self.confidence.create(confidence_bands, state.geodata)
Beispiel #12
0
    def makeChangeMap(self):
        rows, cols = self.geodata['ySize'], self.geodata['xSize']
        band = np.zeros([rows, cols], dtype=np.int16)

        f, s = self.first, self.second
        if self.initRaster == None:
            checkPersistent = False
        else:
            checkPersistent = True
            t = self.initRaster.getBand(1)
        raster = None
        try:
            self.rangeChanged.emit(self.tr("Creating change map %p%"), rows)
            for i in xrange(rows):
                for j in xrange(cols):
                    if (f.mask.shape == ()) or (not f.mask[i,j]):
                        r = f[i,j]
                        c = s[i,j]
                        # Percistent category is the category that is constant for all three rasters
                        if checkPersistent and (r==c) and (r==t[i,j]):
                            band[i, j] = self.persistentCategoryCode
                        else:
                            band[i, j] = self.encode(r, c)
                self.updateProgress.emit()
            bands = [np.ma.array(data = band, mask = f.mask, dtype=np.int16)]
            raster = Raster()
            raster.create(bands, self.geodata)
            self.changeMap = raster
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during change map creating"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during change map creating"))
            raise
        finally:
            self.processFinished.emit(raster)
Beispiel #13
0
 def test_create(self):
     raster = Raster()
     raster.create([self.data1], geodata=self.r1.getGeodata())
     self.assertTrue(raster.geoDataMatch(self.r1))
     self.assertEqual(raster.getBandsCount(), 1)
     self.assertEqual(set(raster.getBandGradation(1)), set([0, 1, 2, 3]))
Beispiel #14
0
class LR(object):
    """
    Implements Logistic Regression model definition and calibration
    (maximum liklihood parameter estimation).
    """

    def __init__(self, ns=0, logreg=None):

        from sklearn import linear_model as lm


        if logreg:
            self.logreg = logreg
        else:
            self.logreg = lm.LogisticRegression()

        self.ns = ns            # Neighbourhood size of training rasters.
        self.data = None        # Training data
        self.classlist = None   # List of unique output values of the output raster

        # Results of the LR prediction
        self.prediction = None  # Raster of the LR prediction results
        self.confidence = None  # Raster of the LR results confidence


    def getCoef(self):
        return self.logreg.coef_

    def getConfidence(self):
        return self.confidence

    def getIntercept(self):
        return self.logreg.intercept_

    def getPrediction(self, state, factors):
        self._predict(state, factors)
        return self.prediction

    def _outputConfidence(self, input):
        '''
        Return confidence (difference between 2 biggest probabilities) of the LR output.
        '''
        out_scl = self.logreg.predict_proba(input)[0]
        # Calculate the confidence:
        out_scl.sort()
        return out_scl[-1] - out_scl[-2]

    def _predict(self, state, factors):
        '''
        Calculate output and confidence rasters using LR model and input rasters
        @param state            Raster of the current state (classes) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        geodata = state.getGeodata()
        rows, cols = geodata['ySize'], geodata['xSize']
        for r in factors:
            if not state.geoDataMatch(r):
                raise LRError('Geometries of the input rasters are different!')

        # Normalize factors before prediction:
        for f in factors:
            f.normalize(mode = 'mean')

        predicted_band  = np.zeros([rows, cols])
        confidence_band = np.zeros([rows, cols])

        sampler = Sampler(state, factors, ns=self.ns)
        mask = state.getBand(1).mask.copy()
        for i in xrange(rows):
            for j in xrange(cols):
                if not mask[i,j]:
                    input = sampler.get_inputs(state, factors, i,j)
                    if input != None:
                        out = self.logreg.predict(input)
                        predicted_band[i,j] = out
                        confidence = self._outputConfidence(input)
                        confidence_band[i, j] = confidence
                    else: # Input sample is incomplete => mask this pixel
                        mask[i, j] = True
        predicted_bands  = [np.ma.array(data = predicted_band, mask = mask)]
        confidence_bands = [np.ma.array(data = confidence_band, mask = mask)]

        self.prediction = Raster()
        self.prediction.create(predicted_bands, geodata)
        self.confidence = Raster()
        self.confidence.create(confidence_bands, geodata)

    def read(self):
        pass

    def save(self):
        pass

    def setTrainingData(self, state, factors, output, mode='All', samples=None):
        '''
        @param state            Raster of the current state (classes) values.
        @param factors          List of the factor rasters (predicting variables).
        @param output           Raster that contains classes to predict.
        @param mode             Type of sampling method:
                                    All             Get all pixels
                                    Normal          Get samples. Count of samples in the data=samples.
                                    Balanced        Undersampling of major classes and/or oversampling of minor classes.
        @samples                Sample count of the training data (doesn't used in 'All' mode).
        '''
        if not self.logreg:
            raise LRError('You must create a Logistic Regression model before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode = 'mean')

        sampler = Sampler(state, factors, output, ns=self.ns)
        sampler.setTrainingData(state, factors, output, shuffle=False, mode=mode, samples=samples)

        outputVecLen  = sampler.outputVecLen
        stateVecLen   = sampler.stateVecLen
        factorVectLen = sampler.factorVectLen
        size = len(sampler.data)

        self.data = sampler.data

    def train(self):
        X = np.column_stack( (self.data['state'], self.data['factors']) )
        Y = self.data['output']
        self.logreg.fit(X, Y)
Beispiel #15
0
class TestSimulator(unittest.TestCase):

    def setUp(self):

        # Raster1:
            #~ [1, 1, 3,],
            #~ [3, 2, 1,],
            #~ [0, 3, 1,]
        self.raster1 = Raster('../../examples/multifact.tif')
        self.raster1.resetMask([0])

        self.X = np.array([
            [1, 2, 3],
            [3, 2, 1],
            [0, 1, 1]
        ])
        self.X = np.ma.array(self.X, mask=(self.X == 0))
        self.raster2 = Raster()
        self.raster2.create([self.X], self.raster1.getGeodata())

        self.aa = AreaAnalyst(self.raster1, self.raster2)

        self.crosstab = CrossTableManager(self.raster1, self.raster2)

        # Simple model
        self.model = Model(state=self.raster1)

    def test_compute_table(self):

        # print self.crosstab.getCrosstable().getCrosstable()
        # CrossTab:
        #  [[ 3.  1.  0.]
        #   [ 0.  1.  0.]
        #   [ 1.  0.  2.]]
        prediction = self.model.getPrediction(self.raster1)
        # print prediction.getBand(1)
        # prediction = [[1.0 1.0 6.0]
                     #  [6.0 5.0 1.0]
                     #  [-- 6.0 1.0]]
        # confidence = self.model.getConfidence()
        # print confidence.getBand(1)
        # confidence =     [[1.0 0.5  0.33]
                         #  [0.5 0.33 0.25]
                         #  [--  0.25 0.2]]
        result = np.array([
            [2.0, 1.0, 3.0],
            [1.0, 2.0, 1.0],
            [0,   3.0, 1.0]
        ])
        result = np.ma.array(result, mask = (result==0))

        simulator = Simulator(state=self.raster1, factors=None, model=self.model, crosstable=self.crosstab)    # The model does't use factors
        simulator.setIterationCount(1)
        simulator.simN()
        state = simulator.getState().getBand(1)
        assert_array_equal(result, state)

        result = np.array([
            [2.0, 1.0, 1.0],
            [2.0, 2.0, 1.0],
            [0,   3.0, 1.0]
        ])
        result = np.ma.array(result, mask = (result==0))

        simulator = Simulator(self.raster1, None, self.model, self.crosstab)
        simulator.setIterationCount(2)
        simulator.simN()
        state = simulator.getState().getBand(1)
        assert_array_equal(result, state)
Beispiel #16
0
    def test_WoeManager(self):
        aa = AreaAnalyst(self.sites, self.sites)
        w1 = WoeManager([self.factor], aa)
        w1.train()
        p = w1.getPrediction(self.sites).getBand(1)
        answer = [[0, 3, 0], [0, 3, 0], [9, 0, 3]]
        answer = ma.array(data=answer, mask=self.mask)
        assert_array_equal(p, answer)

        initState = Raster('../../examples/data.tif')
        #~ [1,1,1,1],
        #~ [1,1,2,2],
        #~ [2,2,2,2],
        #~ [3,3,3,3]
        finalState = Raster('../../examples/data1.tif')
        #~ [1,1,2,3],
        #~ [3,1,2,3],
        #~ [3,3,3,3],
        #~ [1,1,3,2]
        aa = AreaAnalyst(initState, finalState)
        w = WoeManager([initState], aa)
        w.train()
        #print w.woe
        p = w.getPrediction(initState).getBand(1)
        self.assertEquals(p.dtype, np.uint8)

        # Calculate by hands:
        #1->1 transition raster:
        r11 = [[1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
        #1->2 raster:
        r12 = [[0, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
        #1->3 raster:
        r13 = [[0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
        # 2->1
        r21 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
        # 2->2
        r22 = [[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
        # 2->3
        r23 = [[0, 0, 0, 0], [0, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]
        # 3->1
        r31 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0]]
        # 3->2
        r32 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]]
        # 3->3
        r33 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0]]
        geodata = initState.getGeodata()
        sites = {
            '11': r11,
            '12': r12,
            '13': r13,
            '21': r21,
            '22': r22,
            '23': r23,
            '31': r31,
            '32': r32,
            '33': r33
        }
        woeDict = {}  # WoE of transitions
        for k in sites.keys():  #
            if k != '21':  # !!! r21 is zero
                x = Raster()
                x.create([np.ma.array(data=sites[k])], geodata)
                sites[k] = x
                woeDict[k] = woe(initState.getBand(1), x.getBand(1))
        #w1max = np.maximum(woeDict['11'], woeDict['12'], woeDict['13'])
        #w2max = np.maximum(woeDict['22'], woeDict['23'])
        #w3max = np.maximum(woeDict['31'], woeDict['32'], woeDict['33'])
        # Answer is a transition code with max weight
        answer = [[0, 0, 0, 0], [0, 0, 5, 5], [5, 5, 5, 5], [6, 6, 6, 6]]
        assert_array_equal(p, answer)

        w = WoeManager([initState], aa, bins={
            0: [
                [2],
            ],
        })
        w.train()
        p = w.getPrediction(initState).getBand(1)
        self.assertEquals(p.dtype, np.uint8)
        c = w.getConfidence().getBand(1)
        self.assertEquals(c.dtype, np.uint8)
Beispiel #17
0
class WoeManager(QObject):
    '''This class gets the data extracted from the UI and
    pass it to woe function, then gets and stores the result.
    '''

    rangeChanged = pyqtSignal(str, int)
    updateProgress = pyqtSignal()
    processFinished = pyqtSignal()
    logMessage = pyqtSignal(str)
    errorReport = pyqtSignal(str)

    def __init__(self, factors, areaAnalyst, unit_cell=1, bins = None):
        '''
        @param factors      List of the pattern rasters used for prediction of point objects (sites).
        @param areaAnalyst  AreaAnalyst that contains map of the changes, encodes and decodes category numbers.
        @param unit_cell    Method parameter, pixelsize of resampled rasters.
        @param bins         Dictionary of bins. Bins are binning boundaries that used for reduce count of categories.
                                For example if factors = [f0, f1], then bins could be (for example) {0:[bins for f0], 1:[bins for f1]} = {0:[[10, 100, 250]],1:[[0.2, 1, 1.5, 4]]}.
                                List of list used because a factor can be a multiband raster, we need get a list of bins for every band. For example:
                                factors = [f0, 2-band-factor], bins= {0: [[10, 100, 250]], 1:[[0.2, 1, 1.5, 4], [3, 4, 7]] }
        '''

        QObject.__init__(self)

        self.factors = factors
        self.analyst = areaAnalyst
        self.changeMap  = areaAnalyst.getChangeMap()
        self.bins       = bins
        self.unit_cell  = unit_cell

        self.prediction = None      # Raster of the prediction results
        self.confidence = None      # Raster of the results confidence(1 = the maximum confidence, 0 = the least confidence)

        if (bins != None) and (len(self.factors) != len(bins.keys())):
            raise WoeManagerError('Lengths of bins and factors are different!')

        for r in self.factors:
            if not self.changeMap.geoDataMatch(r):
                raise WoeManagerError('Geometries of the input rasters are different!')

        if self.changeMap.getBandsCount() != 1:
            raise WoeManagerError('Change map must have one band!')

        self.geodata = self.changeMap.getGeodata()

        # Denormalize factors if they are normalized
        for r in self.factors:
            r.denormalize()

        # Get list of codes from the changeMap raster
        categories = self.changeMap.getBandGradation(1)

        self.codes = [int(c) for c in categories]    # Codes of transitions initState->finalState (see AreaAnalyst.encode)
        self.woe = {}       # Maps of WoE results of every transition code

        self.weights = {}   # Weights of WoE (of raster band code)
        #{ # The format is: {Transition_code: {factorNumber1: [list of the weights], factorNumber2: [list of the weights]}, ...}
        #  # for example:
        #   0: {0: {1: [...]}, 1: {1: [...]}},
        #   1: {0: {1: [...]}, 1: {1: [...]}},
        #   2: {0: {1: [...]}, 1: {1: [...]}},
        #   ...
        #}
        #
        self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...}

    def checkBins(self):
        """
        Check if bins are applicable to the factors
        """
        if self.bins != None:
            for i, factor in enumerate(self.factors):
                factor.denormalize()
                bin = self.bins[i]
                if (bin != None) and (bin != [None]):
                    for j in range(factor.getBandsCount()):
                        b = bin[j]
                        tmp = b[:]
                        tmp.sort()
                        if b!=tmp: # Mast be sorted
                            return False
                        b0, bMax = b[0], b[len(b)-1]
                        bandStat = factor.getBandStat(j+1)
                        if bandStat['min'] >b0 or bandStat['max']<bMax:
                            return False
        return True

    def getConfidence(self):
        return self.confidence

    def getPrediction(self, state, factors=None, calcTransitions=False):
        '''
        Most of the models use factors for prediction, but WoE takes list of factors only once (during the initialization).
        '''
        self._predict(state, calcTransitions)
        return self.prediction

    def getTransitionPotentials(self):
        return self.transitionPotentials

    def getWoe(self):
        return self.woe

    def _predict(self, state, calcTransitions=False):
        '''
        Predict the changes.
        '''
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)

            rows, cols = self.geodata['ySize'], self.geodata['xSize']
            if not self.changeMap.geoDataMatch(state):
                raise WoeManagerError('Geometries of the state and changeMap rasters are different!')

            prediction = np.zeros((rows,cols), dtype=np.uint8)
            confidence = np.zeros((rows,cols), dtype=np.uint8)
            mask = np.zeros((rows,cols), dtype=np.byte)

            stateBand = state.getBand(1)

            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)

            for r in xrange(rows):
                for c in xrange(cols):
                    oldMax, currMax = -1000, -1000  # Small numbers
                    indexMax = -1                   # Index of Max weight
                    initCat = stateBand[r,c]        # Init category (state before transition)
                    try:
                        codes = self.analyst.codes(initCat)   # Possible final states
                        for code in codes:
                            try: # If not all possible transitions are presented in the changeMap
                                map = self.woe[code]     # Get WoE map of transition 'code'
                            except KeyError:
                                continue
                            w = map[r,c]        # The weight in the (r,c)-pixel
                            if w > currMax:
                                indexMax, oldMax, currMax = code, currMax, w
                        prediction[r,c] = indexMax
                        confidence[r,c] = int(100*(sigmoid(currMax) - sigmoid(oldMax)))
                    except ValueError:
                        mask[r,c] = 1
                self.updateProgress.emit()

            predicted_band = np.ma.array(data=prediction, mask=mask, dtype=np.uint8)
            self.prediction = Raster()
            self.prediction.create([predicted_band], self.geodata)
            confidence_band = np.ma.array(data=confidence, mask=mask, dtype=np.uint8)
            self.confidence = Raster()
            self.confidence.create([confidence_band], self.geodata)
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during WOE prediction"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during WoE prediction"))
            raise
        finally:
            self.processFinished.emit()

    def train(self):
        '''
        Train the model
        '''
        self.transitionPotentials = {}
        try:
            iterCount = len(self.codes)*len(self.factors)
            self.rangeChanged.emit(self.tr("Training WoE... %p%"), iterCount)
            changeMap = self.changeMap.getBand(1)
            for code in self.codes:
                sites = binaryzation(changeMap, [code])
                # Reclass factors (continuous factor -> ordinal factor)
                wMap = np.ma.zeros(changeMap.shape) # The map of summary weight of the all factors
                self.weights[code] = {}             # Dictionary for storing wheights of every raster's band
                for k in xrange(len(self.factors)):
                    fact = self.factors[k]
                    self.weights[code][k] = {}      # Weights of the factor
                    factorW = self.weights[code][k]
                    if self.bins: # Get bins of the factor
                        bin = self.bins[k]
                        if (bin != None) and fact.getBandsCount() != len(bin):
                            raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!")
                    else: bin = None
                    for i in range(1, fact.getBandsCount()+1):
                        band = fact.getBand(i)
                        if bin and bin[i-1]: #
                            band = reclass(band, bin[i-1])
                        band, sites = masks_identity(band, sites, dtype=np.uint8)   # Combine masks of the rasters
                        woeRes = woe(band, sites, self.unit_cell)   # WoE for the 'code' (initState->finalState) transition and current 'factor'.
                        weights = woeRes['map']
                        wMap = wMap + weights
                        factorW[i] = woeRes['weights']
                    self.updateProgress.emit()

                # Reclassification finished => set WoE coefficients
                self.woe[code]=wMap             # WoE for all factors and the transition code.

                # Potentials are WoE map rescaled to 0--100 percents
                band = (sigmoid(wMap)*100).astype(np.uint8)
                p = Raster()
                p.create([band], self.geodata)
                self.transitionPotentials[code] = p
                gc.collect()
        except MemoryError:
            self.errorReport.emit('The system out of memory during WoE trainig')
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during WoE trainig"))
            raise
        finally:
            self.processFinished.emit()

    def weightsToText(self):
        '''
        Format self.weights as text report.
        '''
        if self.weights == {}:
            return u""
        text = u""
        for code in self.codes:
            (initClass, finalClass) = self.analyst.decode(code)
            text = text + self.tr("Transition %s -> %s\n" % (int(initClass), int(finalClass)))
            try:
                factorW = self.weights[code]
                for factNum, factDict in factorW.iteritems():
                    name = self.factors[factNum].getFileName()
                    name = basename(name)
                    text = text + self.tr("\t factor: %s \n" % (name,) )
                    for bandNum, bandWeights in factDict.iteritems():
                        weights = ["%f" % (w,) for w in bandWeights]
                        text = text + self.tr("\t\t Weights of band %s: %s \n" % (bandNum, ", ".join(weights)) )
            except:
                text = text + self.tr('W for code % s (%s -> %s) causes error' % (code, initClass, finalClass))
                raise
        return text
Beispiel #18
0
    def test_WoeManager(self):
        aa = AreaAnalyst(self.sites, self.sites)
        w1 = WoeManager([self.factor], aa)
        w1.train()
        p = w1.getPrediction(self.sites).getBand(1)
        answer = [[0,3,0], [0,3,0], [9,0,3]]
        answer = ma.array(data = answer, mask = self.mask)
        assert_array_equal(p, answer)

        initState = Raster('../../examples/data.tif')
            #~ [1,1,1,1],
            #~ [1,1,2,2],
            #~ [2,2,2,2],
            #~ [3,3,3,3]
        finalState = Raster('../../examples/data1.tif')
            #~ [1,1,2,3],
            #~ [3,1,2,3],
            #~ [3,3,3,3],
            #~ [1,1,3,2]
        aa = AreaAnalyst(initState, finalState)
        w = WoeManager([initState], aa)
        w.train()
        #print w.woe
        p = w.getPrediction(initState).getBand(1)
        self.assertEquals(p.dtype, np.uint8)

        # Calculate by hands:
        #1->1 transition raster:
        r11 = [
            [1, 1, 0, 0],
            [0, 1, 0, 0],
            [0, 0, 0, 0],
            [0, 0, 0, 0]
        ]
        #1->2 raster:
        r12 = [
            [0, 0, 1, 0],
            [0, 0, 0, 0],
            [0, 0, 0, 0],
            [0, 0, 0, 0]
        ]
        #1->3 raster:
        r13 = [
            [0, 0, 0, 1],
            [0, 0, 0, 0],
            [0, 0, 0, 0],
            [0, 0, 0, 0]
        ]
        # 2->1
        r21 = [
            [0, 0, 0, 0],
            [0, 0, 0, 0],
            [0, 0, 0, 0],
            [0, 0, 0, 0]
        ]
        # 2->2
        r22 = [
            [0, 0, 0, 0],
            [0, 0, 1, 0],
            [0, 0, 0, 0],
            [0, 0, 0, 0]
        ]
        # 2->3
        r23 = [
            [0, 0, 0, 0],
            [0, 0, 0, 1],
            [1, 1, 1, 1],
            [0, 0, 0, 0]
        ]
        # 3->1
        r31 = [
            [0, 0, 0, 0],
            [0, 0, 0, 0],
            [0, 0, 0, 0],
            [1, 1, 0, 0]
        ]
        # 3->2
        r32 = [
            [0, 0, 0, 0],
            [0, 0, 0, 0],
            [0, 0, 0, 0],
            [0, 0, 0, 1]
        ]
        # 3->3
        r33 = [
            [0, 0, 0, 0],
            [0, 0, 0, 0],
            [0, 0, 0, 0],
            [0, 0, 1, 0]
        ]
        geodata = initState.getGeodata()
        sites = {'11': r11, '12': r12, '13': r13, '21': r21, '22': r22, '23': r23, '31': r31, '32': r32, '33': r33}
        woeDict = {}    # WoE of transitions
        for k in sites.keys(): #
            if k !='21' : # !!! r21 is zero
                x = Raster()
                x.create([np.ma.array(data=sites[k])], geodata)
                sites[k] = x
                woeDict[k] = woe(initState.getBand(1), x.getBand(1))
        #w1max = np.maximum(woeDict['11'], woeDict['12'], woeDict['13'])
        #w2max = np.maximum(woeDict['22'], woeDict['23'])
        #w3max = np.maximum(woeDict['31'], woeDict['32'], woeDict['33'])
        # Answer is a transition code with max weight
        answer = [
            [0, 0, 0, 0],
            [0, 0, 5, 5],
            [5, 5, 5, 5],
            [6, 6, 6, 6]
        ]
        assert_array_equal(p, answer)

        w = WoeManager([initState], aa, bins = {0: [[2], ],})
        w.train()
        p = w.getPrediction(initState).getBand(1)
        self.assertEquals(p.dtype, np.uint8)
        c = w.getConfidence().getBand(1)
        self.assertEquals(c.dtype, np.uint8)
Beispiel #19
0
class LR(QObject):
    """
    Implements Logistic Regression model definition and calibration
    (maximum liklihood parameter estimation).
    """

    rangeChanged = pyqtSignal(str, int)
    updateProgress = pyqtSignal()
    processFinished = pyqtSignal()
    samplingFinished = pyqtSignal()
    finished = pyqtSignal()
    logMessage = pyqtSignal(str)
    errorReport = pyqtSignal(str)

    def __init__(self, ns=0, logreg=None):

        QObject.__init__(self)

        if logreg:
            self.logreg = logreg
        else:
            self.logreg = mlr.MLR()

        self.state = None
        self.factors = None
        self.output = None
        self.mode = "All"
        self.samples = None
        self.catlist = None

        self.ns = ns            # Neighbourhood size of training rasters.
        self.data = None        # Training data
        self.maxiter = 100      # Maximum of fitting iterations

        self.sampler = None     # Sampler

        # Results of the LR prediction
        self.prediction = None  # Raster of the LR prediction results
        self.confidence = None  # Raster of the LR results confidence (1 = the maximum confidence, 0 = the least confidence)
        self.Kappa      = 0     # Kappa value
        self.pseudoR    = 0     # Pseudo R-squared (Count) (http://www.ats.ucla.edu/stat/mult_pkg/faq/general/Psuedo_RSquareds.htm)
        self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...}

    def getCoef(self):
        return self.logreg.get_weights().T

    def getConfidence(self):
        return self.confidence

    def getIntercept(self):
        return self.logreg.get_intercept()

    def getKappa(self):
        return self.Kappa

    def getStdErrIntercept(self):
        X = np.column_stack( (self.data['state'], self.data['factors']) )
        return self.logreg.get_stderr_intercept(X)

    def getStdErrWeights(self):
        X = np.column_stack( (self.data['state'], self.data['factors']) )
        return self.logreg.get_stderr_weights(X).T

    def get_PvalIntercept(self):
        X = np.column_stack( (self.data['state'], self.data['factors']) )
        return self.logreg.get_pval_intercept(X)

    def get_PvalWeights(self):
        X = np.column_stack( (self.data['state'], self.data['factors']) )
        return self.logreg.get_pval_weights(X).T

    def getPrediction(self, state, factors, calcTransitions=False):
        self._predict(state, factors, calcTransitions)
        return self.prediction

    def getPseudoR(self):
        return self.pseudoR

    def getTransitionPotentials(self):
        return self.transitionPotentials

    def _outputConfidence(self, input):
        '''
        Return confidence (difference between 2 biggest probabilities) of the LR output.
        1 = the maximum confidence, 0 = the least confidence
        '''
        out_scl = self.logreg.predict_proba(input)[0]
        # Calculate the confidence:
        out_scl.sort()
        return int(100 * (out_scl[-1] - out_scl[-2]) )

    def outputTransitions(self, input):
        '''
        Return transition potential of the outputs
        '''
        out_scl = self.logreg.predict_proba(input)[0]
        out_scl = [int(100 * x) for x in out_scl]
        result = {}
        for r, v in enumerate(out_scl):
            cat = self.catlist[r]
            result[cat] = v
        return result

    def _predict(self, state, factors, calcTransitions=False):
        '''
        Calculate output and confidence rasters using LR model and input rasters
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)
            geodata = state.getGeodata()
            rows, cols = geodata['ySize'], geodata['xSize']
            for r in factors:
                if not state.geoDataMatch(r):
                    raise LRError('Geometries of the input rasters are different!')

            self.transitionPotentials = None    # Reset tr.potentials if they exist

            # Normalize factors before prediction:
            for f in factors:
                f.normalize(mode = 'mean')

            predicted_band  = np.zeros([rows, cols], dtype=np.uint8)
            confidence_band = np.zeros([rows, cols], dtype=np.uint8)
            if calcTransitions:
                self.transitionPotentials = {}
                for cat in self.catlist:
                    self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8)

            self.sampler = Sampler(state, factors, ns=self.ns)
            mask = state.getBand(1).mask.copy()
            if mask.shape == ():
                mask = np.zeros([rows, cols], dtype=np.bool)
            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)
            for i in xrange(rows):
                for j in xrange(cols):
                    if not mask[i,j]:
                        input = self.sampler.get_inputs(state, i,j)
                        if input != None:
                            input = np.array([input])
                            out = self.logreg.predict(input)
                            predicted_band[i,j] = out
                            confidence = self._outputConfidence(input)
                            confidence_band[i, j] = confidence

                            if calcTransitions:
                                potentials = self.outputTransitions(input)
                                for cat in self.catlist:
                                    map = self.transitionPotentials[cat]
                                    map[i, j] = potentials[cat]
                        else: # Input sample is incomplete => mask this pixel
                            mask[i, j] = True
                self.updateProgress.emit()
            predicted_bands  = [np.ma.array(data = predicted_band,  mask = mask, dtype=np.uint8)]
            confidence_bands = [np.ma.array(data = confidence_band, mask = mask, dtype=np.uint8)]

            self.prediction = Raster()
            self.prediction.create(predicted_bands, geodata)
            self.confidence = Raster()
            self.confidence.create(confidence_bands, geodata)

            if calcTransitions:
                for cat in self.catlist:
                    band = [np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8)]
                    self.transitionPotentials[cat] = Raster()
                    self.transitionPotentials[cat].create(band, geodata)
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during LR prediction"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during LR prediction"))
            raise
        finally:
            self.processFinished.emit()

    def __propagateSamplerSignals(self):
        self.sampler.rangeChanged.connect(self.__samplerProgressRangeChanged)
        self.sampler.updateProgress.connect(self.__samplerProgressChanged)
        self.sampler.samplingFinished.connect(self.__samplerFinished)

    def __samplerFinished(self):
        self.sampler.rangeChanged.disconnect(self.__samplerProgressRangeChanged)
        self.sampler.updateProgress.disconnect(self.__samplerProgressChanged)
        self.sampler.samplingFinished.disconnect(self.__samplerFinished)
        self.samplingFinished.emit()

    def __samplerProgressRangeChanged(self, message, maxValue):
        self.rangeChanged.emit(message, maxValue)

    def __samplerProgressChanged(self):
        self.updateProgress.emit()

    def save(self):
        pass

    def saveSamples(self, fileName):
        self.sampler.saveSamples(fileName)

    def setMaxIter(self, maxiter):
        self.maxiter = maxiter

    def setTrainingData(self):
        state, factors, output, mode, samples = self.state, self.factors, self.output, self.mode, self.samples
        if not self.logreg:
            raise LRError('You must create a Logistic Regression model before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode = 'mean')

        self.sampler = Sampler(state, factors, output, ns=self.ns)
        self.__propagateSamplerSignals()
        self.sampler.setTrainingData(state, output, shuffle=False, mode=mode, samples=samples)

        outputVecLen  = self.sampler.outputVecLen
        stateVecLen   = self.sampler.stateVecLen
        factorVectLen = self.sampler.factorVectLen
        size = len(self.sampler.data)

        self.data = self.sampler.data
        self.catlist = np.unique(self.data['output'])

    def train(self):
        X = np.column_stack( (self.data['state'], self.data['factors']) )
        Y = self.data['output']
        self.labelCodes = np.unique(Y)
        self.logreg.fit(X, Y, maxiter=self.maxiter)
        out = self.logreg.predict(X)
        depCoef = DependenceCoef(np.ma.array(out), np.ma.array(Y), expand=True)
        self.Kappa = depCoef.kappa(mode=None)
        self.pseudoR = depCoef.correctness(percent = False)

    def setState(self, state):
        self.state = state

    def setFactors(self, factors):
        self.factors = factors

    def setOutput(self, output):
        self.output = output

    def setMode(self, mode):
        self.mode = mode

    def setSamples(self, samples):
        self.samples = samples

    def startTrain(self):
        try:
            self.setTrainingData()
            self.train()
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during LR training"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during LR trainig"))
            raise
        finally:
            self.finished.emit()
Beispiel #20
0
    def __sim(self):
        '''
        1 iteracion of simulation.
        '''
        transition = self.crosstable.getCrosstable()

        self.updatePrediction(self.state)
        changes = self.getPrediction().getBand(1)   # Predicted change map
        changes = changes + 1                       # Filling nodata as 0 can be ambiguous:
        changes = np.ma.filled(changes, 0)          #   (cat_code can be 0, to do not mix it with no-data, add 1)
        state = self.getState()
        new_state = state.getBand(1).copy().astype(np.uint8)    # New states (the result of simulation) will be stored there.

        self.rangeChanged.emit(self.tr("Area Change Analysis %p%"), 2)
        self.updateProgress.emit()
        QCoreApplication.processEvents()
        analyst = AreaAnalyst(state, second = None)
        self.updateProgress.emit()
        QCoreApplication.processEvents()

        categories = state.getBandGradation(1)

        # Make transition between categories according to
        # number of moved pixel in crosstable
        self.rangeChanged.emit(self.tr("Simulation process %p%"), len(categories)**2 - len(categories))
        QCoreApplication.processEvents()
        for initClass in categories:
            for finalClass in categories:
                if initClass == finalClass: continue

                # TODO: Calculate number of pixels to be moved via TransitionMatrix and state raster
                n = transition.getTransition(initClass, finalClass)   # Number of pixels that have to be
                                                                      # changed the categories
                                                                      # (use TransitoionMatrix only).
                if n==0:
                    continue
                # Find n appropriate places for transition initClass -> finalClass
                cat_code = analyst.encode(initClass, finalClass)
                # Array of places where transitions initClass -> finalClass are occured
                places = (changes==cat_code+1)  # cat_code can be 0, do not mix it with no-data in 'changes' variable
                placesCount = np.sum(places)
                # print "cat_code, placesCount, n", cat_code, placesCount

                if placesCount < n:
                    self.logMessage.emit(self.tr("There are more transitions in the transition matrix, then the model have found"))
                    # print "There are more transitions in the transition matrix, then the model have found"
                    # print "cat_code, placesCount, n", cat_code, placesCount, n
                    QCoreApplication.processEvents()
                    n = placesCount
                if n >0:
                    confidence = self.getConfidence().getBand(1)
                    # Add some random value
                    rnd = np.random.sample(size=confidence.shape)/1000 # A small random
                    confidence = np.ma.filled(confidence, 0) + rnd
                    confidence = confidence * places # The higher is number in cell, the higer is probability of transition in the cell.

                    # Ensure, n is bigger then nonzero confidence
                    placesCount = np.sum(confidence>0)
                    if placesCount < n: # Some confidence where transitions has to be appear is zero. The transition count will be cropped.
                        # print "Some confidence is zero. cat_code, nonzeroConf, wantedPixels", cat_code, placesCount, n
                        n = placesCount

                    ind = confidence.argsort(axis=None)[-n:]
                    indices = [np.unravel_index(i, confidence.shape) for i in ind]

                    # Now "indices" contains indices of the appropriate places,
                    # make transition initClass -> finalClass
                    r1 = np.zeros(confidence.shape)
                    for index in indices:
                        new_state[index] = finalClass

                self.updateProgress.emit()
                QCoreApplication.processEvents()

        result = Raster()
        result.create([new_state], state.getGeodata())
        self.state = result
Beispiel #21
0
 def test_create(self):
     raster = Raster()
     raster.create([self.data1], geodata=self.r1.getGeodata())
     self.assertTrue(raster.geoDataMatch(self.r1))
     self.assertEqual(raster.getBandsCount(), 1)
     self.assertEqual(set(raster.getBandGradation(1)), set([0, 1, 2, 3]))
Beispiel #22
0
class WoeManager(object):
    '''This class gets the data extracted from the UI and
    pass it to woe function, then gets and stores the result.
    '''
    def __init__(self, factors, areaAnalyst, unit_cell=1, bins = None):
        '''
        @param factors      List of the pattern rasters used for prediction of point objects (sites).
        @param areaAnalyst  AreaAnalyst that contains map of the changes, encodes and decodes class numbers.
        @param unit_cell    Method parameter, pixelsize of resampled rasters.
        @param bins         Dictionary of bins. Bins are binning boundaries that used for reduce count of classes.
                                For example if factors = [f0, f1], then bins could be (for example) {0:[bins for f0], 1:[bins for f1]} = {0:[[10, 100, 250]],1:[[0.2, 1, 1.5, 4]]}.
                                List of list used because a factor can be a multiband raster, we need get a list of bins for every band. For example:
                                factors = [f0, 2-band-factor], bins= {0: [[10, 100, 250]], 1:[[0.2, 1, 1.5, 4], [3, 4, 7]] }
        '''

        self.factors = factors
        self.analyst = areaAnalyst
        self.changeMap   = areaAnalyst.getChangeMap()

        self.prediction = None
        self.confidence = None

        if (bins != None) and (len(factors) != len(bins.keys())):
            raise WoeManagerError('Lengths of bins and factors are different!')

        for r in self.factors:
            if not self.changeMap.geoDataMatch(r):
                raise WoeManagerError('Geometries of the input rasters are different!')

        if self.changeMap.getBandsCount() != 1:
            raise WoeManagerError('Change map must have one band!')

        # Get list of codes from the changeMap raster
        classes = self.changeMap.getBandStat(1)['gradation']
        cMap = self.changeMap.getBand(1)

        self.codes = [int(c) for c in classes]    # Codes of transitions initState->finalState (see AreaAnalyst.encode)

        self.woe = {}
        for code in self.codes:
            sites = binaryzation(cMap, [code])
            # TODO: reclass factors (continuous factor -> ordinal factor)
            wMap = np.ma.zeros(cMap.shape)
            for k in xrange(len(factors)):
                fact = factors[k]
                if bins: # Get bins of the factor
                    bin = bins[k]
                    if (bin != None) and fact.getBandsCount() != len(bin):
                        raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!")
                else: bin = None
                for i in range(1, fact.getBandsCount()+1):
                    band = fact.getBand(i)
                    if bin:
                        band = reclass(band, bin[i-1])
                    band, sites = masks_identity(band, sites)   # Combine masks of the rasters
                    weights = woe(band, sites, unit_cell)       # WoE for the 'code' (initState->finalState) transition and current 'factor'.
                    wMap = wMap + weights
            self.woe[code]=wMap             # WoE for all factors and the transition.


    def getConfidence(self):
        return self.confidence

    def getPrediction(self, state, factors=None):
        '''
        Most of the models use factors for prediction, but WoE takes list of factors only once (during the initialization).
        '''
        self._predict(state)
        return self.prediction

    def getWoe(self):
        return self.woe

    def _predict(self, state):
        '''
        Predict the changes.
        '''
        geodata = self.changeMap.getGeodata()
        rows, cols = geodata['ySize'], geodata['xSize']
        if not self.changeMap.geoDataMatch(state):
            raise WoeManagerError('Geometries of the state and changeMap rasters are different!')

        prediction = np.zeros((rows,cols))
        confidence = np.zeros((rows,cols))
        mask = np.zeros((rows,cols))

        woe = self.getWoe()
        stateBand = state.getBand(1)

        for r in xrange(rows):
            for c in xrange(cols):
                oldMax, currMax = -1000, -1000  # Small numbers
                indexMax = -1                   # Index of Max weight
                initClass = stateBand[r,c]      # Init class (state before transition)
                try:
                    codes = self.analyst.codes(initClass)   # Possible final states
                    for code in codes:
                        try: # If not all possible transitions are presented in the changeMap
                            map = woe[code]     # Get WoE map of transition 'code'
                        except KeyError:
                            continue
                        w = map[r,c]        # The weight in the (r,c)-pixel
                        if w > currMax:
                            indexMax, oldMax, currMax = code, currMax, w
                    decode = self.analyst.decode(indexMax)    # Get init & final classes (initState, finalState)
                    prediction[r,c] = decode[1]               # final class
                    confidence[r,c] = sigmoid(currMax) - sigmoid(oldMax)
                except ValueError:
                    mask[r,c] = 1

        predicted_band = np.ma.array(data=prediction, mask=mask)
        self.prediction = Raster()
        self.prediction.create([predicted_band], geodata)
        confidence_band = np.ma.array(data=confidence, mask=mask)
        self.confidence = Raster()
        self.confidence.create([confidence_band], geodata)
Beispiel #23
0
class MCE(object):

    randomConsistencyIndex = {
        2:  0,
        3:  0.58,
        4:  0.90,
        5:  1.12,
        6:  1.24,
        7:  1.32,
        8:  1.41,
        9:  1.45,
        10: 1.49,
        11: 1.51,
        12: 1.48,
        13: 1.56,
        14: 1.57,
        15: 1.59,
        16: 1.60,
        17: 1.61,
        18: 1.62,
        19: 1.63,
        20: 1.63,
        21: 1.64,
        22: 1.65,
        23: 1.65,
        24: 1.66,
        25: 1.66,
        26: 1.67,
        27: 1.67,
        28: 1.67,
        29: 1.68,
        30: 1.68,
        31: 1.68,
        32: 1.69,
        33: 1.69,
        34: 1.69,
        35: 1.69,
        36: 1.70,
        37: 1.70,
        38: 1.70,
        39: 1.70
    }
    def __init__(self, factors, wMatr, initStateNum, finalStateNum):
        '''
        Multicriteria evaluation based on Saaty method. It defines transition probability of two classes (initStateNum, finalStateNum).
        @param factors          List of the factor rasters used for prediction.
        @param wMatr            List of lists -- NxN comparison matrix.
        @param initStateNum     Number of initial state (the state before transition).
        @param finalStateNum    Number of final state (the state after transition).
        '''

        self.factors = factors
        self.initStateNum  = initStateNum
        self.finalStateNum = finalStateNum

        # Check matrix dimension and factor count, apply normalization
        self.dim = 0
        for f in factors:
            self.dim = self.dim + f.getBandsCount()
            f.normalize(mode = 'maxmin')
        if self.dim != len(wMatr):
            raise MCEError('Matrix size is different from the number of variables!')

        # Check if the matrix is valid
        for i in xrange(self.dim):
            if len(wMatr[i]) != self.dim:
                raise MCEError('The weight matrix is not NxN!')
        EPSILON = 0.000001      # A small number
        for i in xrange(self.dim):
            if wMatr[i][i] != 1:
                raise MCEError('w[i,i] not equal 1 !')
            for j in xrange(i+1, self.dim):
                if abs(wMatr[i][j] * wMatr[j][i] - 1) > EPSILON:
                    raise MCEError('w[i,j] * w[j,i] not equal 1 !')

        self.wMatr = np.array(wMatr)

        self.weights = None     # Weights of the factors, calculated using wMatr
                                # It's a list, the length is self.dim
                                # first element is the weight of first band of the first factor and so on:
                                # [W_f1, ... weights of 1-st factors ... , W_f2, ... weights of 1-st factors..., W_fn, ...]

        self.consistency =None  # Consistency ratio of the comparison matrix.

        self.prediction = None
        self.confidence = None


    def getConsistency(self):
        if self.consistency == None:
            self.setWeights()
        return self.consistency

    def getConfidence(self):
        return self.confidence

    def getPrediction(self, state, factors=None):
        '''
        Most of the models use factors for prediction, but WoE takes list of factors only once (during the initialization).
        '''
        self._predict(state)
        return self.prediction

    def getWeights(self):
        if self.weights == None:
            self.setWeights()
        return self.weights

    def _predict(self, state):
        '''
        Predict the changes.
        '''
        geodata = state.getGeodata()
        rows, cols = geodata['ySize'], geodata['xSize']

        # Get locations where self.initStateNum is occurs
        band = state.getBand(1)
        initStateMask = binaryzation(band, [self.initStateNum])
        mask = band.mask

        # Calculate summary map of factors weights
        # Confidence:
        #   confidence is summary map of factors, if current state = self.initState
        #   confidence is 0, if current state != self.initState
        # Prediction:
        #   predicted value is a constant = self.finalStateNum, if current state = self.initState
        #   predicted value is current state, if current state != self.initState
        confidence = np.zeros((rows,cols))
        weights = self.getWeights()
        weightNum = 0               # Number of processed weights
        for f in self.factors:
            if not f.geoDataMatch(state):
                raise MCEError('Geometries of the state and factor rasters are different!')
            f.normalize(mode = 'maxmin')
            for i in xrange(f.getBandsCount()):
                band = f.getBand(i+1)
                confidence = confidence + band*weights[weightNum]
                mask = np.ma.mask_or(mask, band.mask)
                weightNum = weightNum + 1
        confidence = confidence*initStateMask
        prediction = np.copy(state.getBand(1))
        prediction = np.logical_not(initStateMask) * prediction
        prediction = prediction + initStateMask*self.finalStateNum

        predicted_band = np.ma.array(data=prediction, mask=mask)
        self.prediction = Raster()
        self.prediction.create([predicted_band], geodata)
        confidence_band = np.ma.array(data=confidence, mask=mask)
        self.confidence = Raster()
        self.confidence.create([confidence_band], geodata)

    def setWeights(self):
        '''
        Calculate the weigths and consistency ratio.
        '''
        # Weights
        w, v = np.linalg.eig(self.wMatr)
        maxW = np.max(w)
        maxInd = list(w).index(maxW)    # Index of the biggest eigenvalue
        maxW = maxW.real
        v = v[:,maxInd]       # The eigen vector
        self.weights = [x.real for x in v]  # Maxtix v can be complex
        self.weights = self.weights/sum(self.weights)

        # Consistency ratio
        if self.dim > 2:
            ci = (maxW - self.dim)/(self.dim - 1)
            try:
                ri = self.randomConsistencyIndex[self.dim]
                self.consistency = ci/ri
            except KeyError:
                self.consistency = -1
        else:
            self.consistency = 0
Beispiel #24
0
class MlpManager(QObject):
    '''This class gets the data extracted from the UI and
    pass it to multi-layer perceptron, then gets and stores the result.
    '''

    updateGraph = pyqtSignal(float, float)      # Train error, val. error
    updateMinValErr = pyqtSignal(float)         # Min validation error
    updateDeltaRMS  = pyqtSignal(float)         # Delta of RMS: min(valError) - currentValError
    processFinished = pyqtSignal()
    logMessage = pyqtSignal(str)


    def __init__(self, ns=0, MLP=None):

        QObject.__init__(self)

        self.MLP = MLP

        self.layers = None
        if self.MLP:
            self.layers = self.getMlpTopology()

        self.ns = ns            # Neighbourhood size of training rasters.
        self.data = None        # Training data
        self.classlist   = None # List of unique output values of the output raster
        self.train_error = None # Error on training set
        self.val_error   = None # Error on validation set
        self.minValError = None # The minimum error that is achieved on the validation set

        # Results of the MLP prediction
        self.prediction = None  # Raster of the MLP prediction results
        self.confidence = None  # Raster of the MLP results confidence

        # Outputs of the activation function for small and big numbers
        self.sigmax, self.sigmin = sigmoid(100), sigmoid(-100)  # Max and Min of the sigmoid function
        self.sigrange = self.sigmax - self.sigmin               # Range of the sigmoid

    def computeMlpError(self, sample):
        '''Get MLP error on the sample'''
        input = np.hstack( (sample['state'], sample['factors']) )
        out = self.getOutput( input )
        err = ((sample['output'] - out)**2).sum()/len(out)
        return err

    def computePerformance(self, train_indexes, val_ind):
        '''Check errors of training and validation sets
        @param train_indexes     Tuple that contains indexes of the first and last elements of the training set.
        @param val_ind           Tuple that contains indexes of the first and last elements of the validation set.
        '''
        train_error = 0
        train_sampl = train_indexes[1] - train_indexes[0]       # Count of training samples
        for i in range(train_indexes[0], train_indexes[1]):
            train_error = train_error + self.computeMlpError(sample = self.data[i])
        self.setTrainError(train_error/train_sampl)

        if val_ind:
            val_error = 0
            val_sampl = val_ind[1] - val_ind[0]
            for i in xrange(val_ind[0], val_ind[1]):
                val_error = val_error + self.computeMlpError(sample = self.data[i])
            self.setValError(val_error/val_sampl)

    def copyWeights(self):
        '''Deep copy of the MLP weights'''
        return copy.deepcopy(self.MLP.weights)

    def createMlp(self, state, factors, output, hidden_layers):
        '''
        @param state            Raster of the current state (classes) values.
        @param factors          List of the factor rasters (predicting variables).
        @param hidden_layers    List of neuron counts in hidden layers.
        @param ns               Neighbourhood size.
        '''

        if output.getBandsCount() != 1:
            raise MplManagerError('Output layer must have one band!')

        input_neurons = 0
        for raster in [state] + factors:
            input_neurons = input_neurons+ raster.getNeighbourhoodSize(self.ns)


        # Output class (neuron) count
        band = output.getBand(1)
        self.classlist = np.unique(band.compressed())
        classes = len(self.classlist)

        # set neuron counts in the MLP layers
        self.layers = hidden_layers
        self.layers.insert(0, input_neurons)
        self.layers.append(classes)

        self.MLP = MLP(*self.layers)

    def getConfidence(self):
        return self.confidence

    def getInputVectLen(self):
        '''Length of input data vector of the MLP'''
        shape = self.getMlpTopology()
        return shape[0]

    def getOutput(self, input_vector):
        out = self.MLP.propagate_forward( input_vector )
        return out

    def getOutputVectLen(self):
        '''Length of input data vector of the MLP'''
        shape = self.getMlpTopology()
        return shape[-1]

    def getOutputVector(self, val):
        '''Convert a number val into vector,
        for example, let self.classlist = [1, 3, 4] then
        if val = 1, result = [ 1, -1, -1]
        if val = 3, result = [-1,  1, -1]
        if val = 4, result = [-1, -1,  1]
        where -1 is minimum of the sigmoid, 1 is max of the sigmoid
        '''
        size = self.getOutputVectLen()
        res = np.ones(size) * (self.sigmin)
        ind = np.where(self.classlist==val)
        res[ind] = self.sigmax
        return res

    def getMinValError(self):
        return self.minValError

    def getMlpTopology(self):
        return self.MLP.shape

    def getPrediction(self, state, factors):
        self._predict(state, factors)
        return self.prediction

    def getTrainError(self):
        return self.train_error

    def getValError(self):
        return self.val_error

    def outputConfidence(self, output):
        '''
        Return confidence (difference between 2 biggest values) of the MLP output.
        '''
        # Scale the output to range [0,1]
        out_scl = 1.0 * (output - self.sigmin) / self.sigrange

        # Calculate the confidence:
        out_scl.sort()
        return out_scl[-1] - out_scl[-2]


    def _predict(self, state, factors):
        '''
        Calculate output and confidence rasters using MLP model and input rasters
        @param state            Raster of the current state (classes) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        geodata = state.getGeodata()
        rows, cols = geodata['ySize'], geodata['xSize']
        for r in factors:
            if not state.geoDataMatch(r):
                raise MlpManagerError('Geometries of the input rasters are different!')

        # Normalize factors before prediction:
        for f in factors:
            f.normalize(mode = 'mean')

        predicted_band  = np.zeros([rows, cols])
        confidence_band = np.zeros([rows, cols])

        sampler = Sampler(state, factors, ns=self.ns)
        mask = state.getBand(1).mask.copy()
        for i in xrange(rows):
            for j in xrange(cols):
                if not mask[i,j]:
                    input = sampler.get_inputs(state, factors, i,j)
                    if input != None:
                        out = self.getOutput(input)
                        # Get index of the biggest output value as the result
                        biggest = max(out)
                        res = list(out).index(biggest)
                        predicted_band[i, j] = self.classlist[res]

                        confidence = self.outputConfidence(out)
                        confidence_band[i, j] = confidence
                    else: # Input sample is incomplete => mask this pixel
                        mask[i, j] = True
        predicted_bands  = [np.ma.array(data = predicted_band, mask = mask)]
        confidence_bands = [np.ma.array(data = confidence_band, mask = mask)]

        self.prediction = Raster()
        self.prediction.create(predicted_bands, geodata)
        self.confidence = Raster()
        self.confidence.create(confidence_bands, geodata)


    def readMlp(self):
        pass

    def resetErrors(self):
        self.val_error = np.finfo(np.float).max
        self.train_error = np.finfo(np.float).max

    def resetMlp(self):
        self.MLP.reset()
        self.resetErrors()

    def saveMlp(self):
        pass

    def setMlpWeights(self, w):
        '''Set weights of the MLP'''
        self.MLP.weights = w

    def setTrainingData(self, state, factors, output, shuffle=True, mode='All', samples=None):
        '''
        @param state            Raster of the current state (classes) values.
        @param factors          List of the factor rasters (predicting variables).
        @param output           Raster that contains classes to predict.
        @param shuffle          Perform random shuffle.
        @param mode             Type of sampling method:
                                    All             Get all pixels
                                    Normal          Get samples. Count of samples in the data=samples.
                                    Balanced        Undersampling of major classes and/or oversampling of minor classes.
        @samples                Sample count of the training data (doesn't used in 'All' mode).
        '''
        if not self.MLP:
            raise MlpManagerError('You must create a MLP before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode = 'mean')

        sampler = Sampler(state, factors, output, self.ns)
        sampler.setTrainingData(state, factors, output, shuffle, mode, samples)

        outputVecLen  = self.getOutputVectLen()
        stateVecLen   = sampler.stateVecLen
        factorVectLen = sampler.factorVectLen
        size = len(sampler.data)

        self.data = np.zeros(size, dtype=[('state', float, stateVecLen), ('factors',  float, factorVectLen), ('output', float, outputVecLen)])
        self.data['state'] = sampler.data['state']
        self.data['factors'] = sampler.data['factors']
        self.data['output'] = [self.getOutputVector(sample['output']) for sample in sampler.data]


    def setTrainError(self, error):
        self.train_error = error

    def setValError(self, error):
        self.val_error = error

    def setEpochs(self, epochs):
        self.epochs = epochs

    def setValPercent(self, value=20):
        self.valPercent = value

    def setLRate(self, value=0.1):
        self.lrate = value

    def setMomentum(self, value=0.01):
        self.momentum = value

    def setContinueTrain(self, value=False):
        self.continueTrain = value

    def startTrain(self):
        self.train(self.epochs, self.valPercent, self.lrate, self.momentum, self.continueTrain)

    def train(self, epochs, valPercent=20, lrate=0.1, momentum=0.01, continue_train=False):
        '''Perform the training procedure on the MLP and save the best neural net
        @param epoch            Max iteration count.
        @param valPercent       Percent of the validation set.
        @param lrate            Learning rate.
        @param momentum         Learning momentum.
        @param continue_train   If False then it is new training cycle, reset weights training and validation error. If True, then continue training.
        '''

        samples_count = len(self.data)
        val_sampl_count = samples_count*valPercent/100
        apply_validation = True if val_sampl_count>0 else False # Use or not use validation set
        train_sampl_count = samples_count - val_sampl_count

        # Set first train_sampl_count as training set, the other as validation set
        train_indexes = (0, train_sampl_count)
        val_indexes = (train_sampl_count, samples_count) if apply_validation else None

        if not continue_train: self.resetMlp()
        self.minValError = self.getValError()  # The minimum error that is achieved on the validation set
        last_train_err = self.getTrainError()
        best_weights = self.copyWeights()   # The MLP weights when minimum error that is achieved on the validation set

        for epoch in range(epochs):
            self.trainEpoch(train_indexes, lrate, momentum)
            self.computePerformance(train_indexes, val_indexes)
            self.updateGraph.emit(self.getTrainError(), self.getValError())
            self.updateDeltaRMS.emit(self.getMinValError() - self.getValError())

            last_train_err = self.getTrainError()
            self.setTrainError(last_train_err)
            if apply_validation and (self.getValError() < self.getMinValError()):
                self.minValError = self.getValError()
                best_weights = self.copyWeights()
                self.updateMinValErr.emit(self.getMinValError())

        self.setMlpWeights(best_weights)
        self.processFinished.emit()

    def trainEpoch(self, train_indexes, lrate=0.1, momentum=0.01):
        '''Perform a training epoch on the MLP
        @param train_ind        Tuple of the min&max indexes of training samples in the samples data.
        @param val_ind          Tuple of the min&max indexes of validation samples in the samples data.
        @param lrate            Learning rate.
        @param momentum         Learning momentum.
        '''
        train_sampl = train_indexes[1] - train_indexes[0]

        for i in range(train_sampl):
            n = np.random.randint( *train_indexes )
            sample = self.data[n]
            input = np.hstack( (sample['state'],sample['factors']) )
            self.getOutput( input )
            self.MLP.propagate_backward( sample['output'], lrate, momentum )
Beispiel #25
0
class LR(QObject):
    """
    Implements Logistic Regression model definition and calibration
    (maximum liklihood parameter estimation).
    """

    rangeChanged = pyqtSignal(str, int)
    updateProgress = pyqtSignal()
    processFinished = pyqtSignal()
    samplingFinished = pyqtSignal()
    finished = pyqtSignal()
    logMessage = pyqtSignal(str)
    errorReport = pyqtSignal(str)

    def __init__(self, ns=0, logreg=None):

        QObject.__init__(self)

        if logreg:
            self.logreg = logreg
        else:
            self.logreg = mlr.MLR()

        self.state = None
        self.factors = None
        self.output = None
        self.mode = "All"
        self.samples = None
        self.catlist = None

        self.ns = ns  # Neighbourhood size of training rasters.
        self.data = None  # Training data
        self.maxiter = 100  # Maximum of fitting iterations

        self.sampler = None  # Sampler

        # Results of the LR prediction
        self.prediction = None  # Raster of the LR prediction results
        self.confidence = None  # Raster of the LR results confidence (1 = the maximum confidence, 0 = the least confidence)
        self.Kappa = 0  # Kappa value
        self.pseudoR = 0  # Pseudo R-squared (Count) (http://www.ats.ucla.edu/stat/mult_pkg/faq/general/Psuedo_RSquareds.htm)
        self.transitionPotentials = None  # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...}

    def getCoef(self):
        return self.logreg.get_weights().T

    def getConfidence(self):
        return self.confidence

    def getIntercept(self):
        return self.logreg.get_intercept()

    def getKappa(self):
        return self.Kappa

    def getStdErrIntercept(self):
        X = np.column_stack((self.data['state'], self.data['factors']))
        return self.logreg.get_stderr_intercept(X)

    def getStdErrWeights(self):
        X = np.column_stack((self.data['state'], self.data['factors']))
        return self.logreg.get_stderr_weights(X).T

    def get_PvalIntercept(self):
        X = np.column_stack((self.data['state'], self.data['factors']))
        return self.logreg.get_pval_intercept(X)

    def get_PvalWeights(self):
        X = np.column_stack((self.data['state'], self.data['factors']))
        return self.logreg.get_pval_weights(X).T

    def getPrediction(self, state, factors, calcTransitions=False):
        self._predict(state, factors, calcTransitions)
        return self.prediction

    def getPseudoR(self):
        return self.pseudoR

    def getTransitionPotentials(self):
        return self.transitionPotentials

    def _outputConfidence(self, input):
        '''
        Return confidence (difference between 2 biggest probabilities) of the LR output.
        1 = the maximum confidence, 0 = the least confidence
        '''
        out_scl = self.logreg.predict_proba(input)[0]
        # Calculate the confidence:
        out_scl.sort()
        return int(100 * (out_scl[-1] - out_scl[-2]))

    def outputTransitions(self, input):
        '''
        Return transition potential of the outputs
        '''
        out_scl = self.logreg.predict_proba(input)[0]
        out_scl = [int(100 * x) for x in out_scl]
        result = {}
        for r, v in enumerate(out_scl):
            cat = self.catlist[r]
            result[cat] = v
        return result

    def _predict(self, state, factors, calcTransitions=False):
        '''
        Calculate output and confidence rasters using LR model and input rasters
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)
            geodata = state.getGeodata()
            rows, cols = geodata['ySize'], geodata['xSize']
            for r in factors:
                if not state.geoDataMatch(r):
                    raise LRError(
                        'Geometries of the input rasters are different!')

            self.transitionPotentials = None  # Reset tr.potentials if they exist

            # Normalize factors before prediction:
            for f in factors:
                f.normalize(mode='mean')

            predicted_band = np.zeros([rows, cols], dtype=np.uint8)
            confidence_band = np.zeros([rows, cols], dtype=np.uint8)
            if calcTransitions:
                self.transitionPotentials = {}
                for cat in self.catlist:
                    self.transitionPotentials[cat] = np.zeros([rows, cols],
                                                              dtype=np.uint8)

            self.sampler = Sampler(state, factors, ns=self.ns)
            mask = state.getBand(1).mask.copy()
            if mask.shape == ():
                mask = np.zeros([rows, cols], dtype=np.bool)
            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)
            for i in xrange(rows):
                for j in xrange(cols):
                    if not mask[i, j]:
                        input = self.sampler.get_inputs(state, i, j)
                        if input != None:
                            input = np.array([input])
                            out = self.logreg.predict(input)
                            predicted_band[i, j] = out
                            confidence = self._outputConfidence(input)
                            confidence_band[i, j] = confidence

                            if calcTransitions:
                                potentials = self.outputTransitions(input)
                                for cat in self.catlist:
                                    map = self.transitionPotentials[cat]
                                    map[i, j] = potentials[cat]
                        else:  # Input sample is incomplete => mask this pixel
                            mask[i, j] = True
                self.updateProgress.emit()
            predicted_bands = [
                np.ma.array(data=predicted_band, mask=mask, dtype=np.uint8)
            ]
            confidence_bands = [
                np.ma.array(data=confidence_band, mask=mask, dtype=np.uint8)
            ]

            self.prediction = Raster()
            self.prediction.create(predicted_bands, geodata)
            self.confidence = Raster()
            self.confidence.create(confidence_bands, geodata)

            if calcTransitions:
                for cat in self.catlist:
                    band = [
                        np.ma.array(data=self.transitionPotentials[cat],
                                    mask=mask,
                                    dtype=np.uint8)
                    ]
                    self.transitionPotentials[cat] = Raster()
                    self.transitionPotentials[cat].create(band, geodata)
        except MemoryError:
            self.errorReport.emit(
                self.tr("The system out of memory during LR prediction"))
            raise
        except:
            self.errorReport.emit(
                self.tr("An unknown error occurs during LR prediction"))
            raise
        finally:
            self.processFinished.emit()

    def __propagateSamplerSignals(self):
        self.sampler.rangeChanged.connect(self.__samplerProgressRangeChanged)
        self.sampler.updateProgress.connect(self.__samplerProgressChanged)
        self.sampler.samplingFinished.connect(self.__samplerFinished)

    def __samplerFinished(self):
        self.sampler.rangeChanged.disconnect(
            self.__samplerProgressRangeChanged)
        self.sampler.updateProgress.disconnect(self.__samplerProgressChanged)
        self.sampler.samplingFinished.disconnect(self.__samplerFinished)
        self.samplingFinished.emit()

    def __samplerProgressRangeChanged(self, message, maxValue):
        self.rangeChanged.emit(message, maxValue)

    def __samplerProgressChanged(self):
        self.updateProgress.emit()

    def save(self):
        pass

    def saveSamples(self, fileName):
        self.sampler.saveSamples(fileName)

    def setMaxIter(self, maxiter):
        self.maxiter = maxiter

    def setTrainingData(self):
        state, factors, output, mode, samples = self.state, self.factors, self.output, self.mode, self.samples
        if not self.logreg:
            raise LRError(
                'You must create a Logistic Regression model before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode='mean')

        self.sampler = Sampler(state, factors, output, ns=self.ns)
        self.__propagateSamplerSignals()
        self.sampler.setTrainingData(state,
                                     output,
                                     shuffle=False,
                                     mode=mode,
                                     samples=samples)

        outputVecLen = self.sampler.outputVecLen
        stateVecLen = self.sampler.stateVecLen
        factorVectLen = self.sampler.factorVectLen
        size = len(self.sampler.data)

        self.data = self.sampler.data
        self.catlist = np.unique(self.data['output'])

    def train(self):
        X = np.column_stack((self.data['state'], self.data['factors']))
        Y = self.data['output']
        self.labelCodes = np.unique(Y)
        self.logreg.fit(X, Y, maxiter=self.maxiter)
        out = self.logreg.predict(X)
        depCoef = DependenceCoef(np.ma.array(out), np.ma.array(Y), expand=True)
        self.Kappa = depCoef.kappa(mode=None)
        self.pseudoR = depCoef.correctness(percent=False)

    def setState(self, state):
        self.state = state

    def setFactors(self, factors):
        self.factors = factors

    def setOutput(self, output):
        self.output = output

    def setMode(self, mode):
        self.mode = mode

    def setSamples(self, samples):
        self.samples = samples

    def startTrain(self):
        try:
            self.setTrainingData()
            self.train()
        except MemoryError:
            self.errorReport.emit(
                self.tr("The system out of memory during LR training"))
            raise
        except:
            self.errorReport.emit(
                self.tr("An unknown error occurs during LR trainig"))
            raise
        finally:
            self.finished.emit()
Beispiel #26
0
class MlpManager(QObject):
    '''This class gets the data extracted from the UI and
    pass it to multi-layer perceptron, then gets and stores the result.
    '''

    updateGraph = pyqtSignal(float, float)      # Train error, val. error
    updateMinValErr = pyqtSignal(float)         # Min validation error
    updateDeltaRMS  = pyqtSignal(float)         # Delta of RMS: min(valError) - currentValError
    updateKappa     = pyqtSignal(float)         # Kappa value
    processFinished = pyqtSignal()
    processInterrupted = pyqtSignal()
    logMessage = pyqtSignal(str)
    errorReport = pyqtSignal(str)
    rangeChanged = pyqtSignal(str, int)
    updateProgress = pyqtSignal()

    def __init__(self, ns=0, MLP=None):

        QObject.__init__(self)

        self.MLP = MLP
        self.interrupted = False

        self.layers = None
        if self.MLP:
            self.layers = self.getMlpTopology()

        self.ns = ns            # Neighbourhood size of training rasters.
        self.data = None        # Training data
        self.catlist     = None # List of unique output values of the output raster
        self.train_error = None # Error on training set
        self.val_error   = None # Error on validation set
        self.minValError = None # The minimum error that is achieved on the validation set
        self.valKappa    = 0     # Kappa on on the validation set
        self.sampler     = None # Sampler

        # Results of the MLP prediction
        self.prediction = None  # Raster of the MLP prediction results
        self.confidence = None  # Raster of the MLP results confidence (1 = the maximum confidence, 0 = the least confidence)
        self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...}

        # Outputs of the activation function for small and big numbers
        self.sigmax, self.sigmin = sigmoid(100), sigmoid(-100)  # Max and Min of the sigmoid function
        self.sigrange = self.sigmax - self.sigmin               # Range of the sigmoid

    def computeMlpError(self, sample):
        '''Get MLP error on the sample'''
        input = np.hstack( (sample['state'], sample['factors']) )
        out = self.getOutput( input )
        err = ((sample['output'] - out)**2).sum()/len(out)
        return err

    def computePerformance(self, train_indexes, val_ind):
        '''Check errors of training and validation sets
        @param train_indexes     Tuple that contains indexes of the first and last elements of the training set.
        @param val_ind           Tuple that contains indexes of the first and last elements of the validation set.
        '''
        train_error = 0
        train_sampl = train_indexes[1] - train_indexes[0]       # Count of training samples
        for i in range(train_indexes[0], train_indexes[1]):
            train_error = train_error + self.computeMlpError(sample = self.data[i])
        self.setTrainError(train_error/train_sampl)

        if val_ind:
            val_error = 0
            val_sampl = val_ind[1] - val_ind[0]
            answers   = np.ma.zeros(val_sampl)
            out       = np.ma.zeros(val_sampl)
            for i in xrange(val_ind[0], val_ind[1]):
                sample = self.data[i]
                val_error = val_error + self.computeMlpError(sample = self.data[i])

                input = np.hstack( (sample['state'],sample['factors']) )
                output = self.getOutput(input)
                out[i-val_ind[0]]     = self.outCategory(output)
                answers[i-val_ind[0]] = self.outCategory(sample['output'])
            self.setValError(val_error/val_sampl)
            depCoef = DependenceCoef(out, answers, expand=True)
            self.valKappa = depCoef.kappa(mode=None)

    def copyWeights(self):
        '''Deep copy of the MLP weights'''
        return copy.deepcopy(self.MLP.weights)

    def createMlp(self, state, factors, output, hidden_layers):
        '''
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        @param hidden_layers    List of neuron counts in hidden layers.
        @param ns               Neighbourhood size.
        '''

        if output.getBandsCount() != 1:
            raise MlpManagerError('Output layer must have one band!')

        input_neurons = 0
        for raster in factors:
            input_neurons = input_neurons+ raster.getNeighbourhoodSize(self.ns)

        # state raster contains categories. We need use n-1 dummy variables (where n = number of categories)
        input_neurons = input_neurons + (len(state.getBandGradation(1))-1) * state.getNeighbourhoodSize(self.ns)

        # Output category's (neuron) list and count
        self.catlist = output.getBandGradation(1)
        categories = len(self.catlist)

        # set neuron counts in the MLP layers
        self.layers = hidden_layers
        self.layers.insert(0, input_neurons)
        self.layers.append(categories)

        self.MLP = MLP(*self.layers)

    def getConfidence(self):
        return self.confidence

    def getInputVectLen(self):
        '''Length of input data vector of the MLP'''
        shape = self.getMlpTopology()
        return shape[0]

    def getOutput(self, input_vector):
        out = self.MLP.propagate_forward( input_vector )
        return out

    def getOutputVectLen(self):
        '''Length of input data vector of the MLP'''
        shape = self.getMlpTopology()
        return shape[-1]

    def getOutputVector(self, val):
        '''Convert a number val into vector,
        for example, let self.catlist = [1, 3, 4] then
        if val = 1, result = [ 1, -1, -1]
        if val = 3, result = [-1,  1, -1]
        if val = 4, result = [-1, -1,  1]
        where -1 is minimum of the sigmoid, 1 is max of the sigmoid
        '''
        size = self.getOutputVectLen()
        res = np.ones(size) * (self.sigmin)
        ind = np.where(self.catlist==val)
        res[ind] = self.sigmax
        return res

    def getMinValError(self):
        return self.minValError

    def getMlpTopology(self):
        return self.MLP.shape

    def getKappa(self):
        return self.valKappa

    def getPrediction(self, state, factors, calcTransitions=False):
        self._predict(state, factors, calcTransitions)
        return self.prediction

    def getTrainError(self):
        return self.train_error

    def getTransitionPotentials(self):
        return self.transitionPotentials

    def getValError(self):
        return self.val_error

    def outCategory(self, out_vector):
        # Get index of the biggest output value as the result
        biggest = max(out_vector)
        res = list(out_vector).index(biggest)
        res = self.catlist[res]
        return res

    def outputConfidence(self, output, scale=True):
        '''
        Return confidence (difference between 2 biggest values) of the MLP output.
        @param output: The confidence
        @param scale: If True, then scale the confidence to int [0, 1, ..., 100] percent
        '''
        out_scl = self.scaleOutput(output, percent=scale)
        out_scl.sort()
        return out_scl[-1] - out_scl[-2]

    def outputTransitions(self, output, scale=True):
        '''
        Return transition potencial of the outputs scaled to [0,1] or 1-100
        @param output: The output of MLP
        @param scale: If True, then scale the transitions to int ([0, 1, ..., 100]) percent
        '''
        out_scl = self.scaleOutput(output, percent=scale)
        result = {}
        for r, v in enumerate(out_scl):
            cat = self.catlist[r]
            result[cat] = v
        return result

    def scaleOutput(self, output, percent=True):
        '''
        Scale the output to range [0,1] or 1-100
        @param output: Output of a MLP
        @param percent: If True, then scale the output to int [0, 1, ..., 100] percent
        '''
        res = 1.0 * (output - self.sigmin) / self.sigrange
        if percent:
            res = [ int(100 * x) for x in res]
        return res

    def _predict(self, state, factors, calcTransitions=False):
        '''
        Calculate output and confidence rasters using MLP model and input rasters
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)
            geodata = state.getGeodata()
            rows, cols = geodata['ySize'], geodata['xSize']
            for r in factors:
                if not state.geoDataMatch(r):
                    raise MlpManagerError('Geometries of the input rasters are different!')

            self.transitionPotentials = None    # Reset tr.potentials if they exist

            # Normalize factors before prediction:
            for f in factors:
                f.normalize(mode = 'mean')

            predicted_band  = np.zeros([rows, cols], dtype=np.uint8)
            confidence_band = np.zeros([rows, cols], dtype=np.uint8)
            if calcTransitions:
                self.transitionPotentials = {}
                for cat in self.catlist:
                    self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8)

            self.sampler = Sampler(state, factors, ns=self.ns)
            mask = state.getBand(1).mask.copy()
            if mask.shape == ():
                mask = np.zeros([rows, cols], dtype=np.bool)
            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)
            for i in xrange(rows):
                for j in xrange(cols):
                    if not mask[i,j]:
                        input = self.sampler.get_inputs(state, i,j)
                        if input != None:
                            out = self.getOutput(input)
                            res = self.outCategory(out)
                            predicted_band[i, j] = res

                            confidence = self.outputConfidence(out)
                            confidence_band[i, j] = confidence

                            if calcTransitions:
                                potentials = self.outputTransitions(out)
                                for cat in self.catlist:
                                    map = self.transitionPotentials[cat]
                                    map[i, j] = potentials[cat]
                        else: # Input sample is incomplete => mask this pixel
                            mask[i, j] = True
                self.updateProgress.emit()
            predicted_bands  = [np.ma.array(data = predicted_band,  mask = mask, dtype=np.uint8)]
            confidence_bands = [np.ma.array(data = confidence_band, mask = mask, dtype=np.uint8)]

            self.prediction = Raster()
            self.prediction.create(predicted_bands, geodata)
            self.confidence = Raster()
            self.confidence.create(confidence_bands, geodata)

            if calcTransitions:
                for cat in self.catlist:
                    band = [np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8)]
                    self.transitionPotentials[cat] = Raster()
                    self.transitionPotentials[cat].create(band, geodata)
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during ANN prediction"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during ANN prediction"))
            raise

    def readMlp(self):
        pass

    def resetErrors(self):
        self.val_error = np.finfo(np.float).max
        self.train_error = np.finfo(np.float).max

    def resetMlp(self):
        self.MLP.reset()
        self.resetErrors()

    def saveMlp(self):
        pass

    def saveSamples(self, fileName):
        self.sampler.saveSamples(fileName)

    def setMlpWeights(self, w):
        '''Set weights of the MLP'''
        self.MLP.weights = w

    def setTrainingData(self, state, factors, output, shuffle=True, mode='All', samples=None):
        '''
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        @param output           Raster that contains categories to predict.
        @param shuffle          Perform random shuffle.
        @param mode             Type of sampling method:
                                    All             Get all pixels
                                    Random          Get samples. Count of samples in the data=samples.
                                    Stratified      Undersampling of major categories and/or oversampling of minor categories.
        @samples                Sample count of the training data (doesn't used in 'All' mode).
        '''
        if not self.MLP:
            raise MlpManagerError('You must create a MLP before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode = 'mean')

        self.sampler = Sampler(state, factors, output, self.ns)
        self.sampler.setTrainingData(state=state, output=output, shuffle=shuffle, mode=mode, samples=samples)

        outputVecLen  = self.getOutputVectLen()
        stateVecLen   = self.sampler.stateVecLen
        factorVectLen = self.sampler.factorVectLen
        size = len(self.sampler.data)

        self.data = np.zeros(size, dtype=[('coords', float, 2), ('state', float, stateVecLen), ('factors',  float, factorVectLen), ('output', float, outputVecLen)])
        self.data['coords']   = self.sampler.data['coords']
        self.data['state']    = self.sampler.data['state']
        self.data['factors']  = self.sampler.data['factors']
        self.data['output']   = [self.getOutputVector(sample['output']) for sample in self.sampler.data]

    def setTrainError(self, error):
        self.train_error = error

    def setValError(self, error):
        self.val_error = error

    def setEpochs(self, epochs):
        self.epochs = epochs

    def setValPercent(self, value=20):
        self.valPercent = value

    def setLRate(self, value=0.1):
        self.lrate = value

    def setMomentum(self, value=0.01):
        self.momentum = value

    def setContinueTrain(self, value=False):
        self.continueTrain = value

    def startTrain(self):
        self.train(self.epochs, self.valPercent, self.lrate, self.momentum, self.continueTrain)

    def stopTrain(self):
        self.interrupted = True

    def train(self, epochs, valPercent=20, lrate=0.1, momentum=0.01, continue_train=False):
        '''Perform the training procedure on the MLP and save the best neural net
        @param epoch            Max iteration count.
        @param valPercent       Percent of the validation set.
        @param lrate            Learning rate.
        @param momentum         Learning momentum.
        @param continue_train   If False then it is new training cycle, reset weights training and validation error. If True, then continue training.
        '''
        try:
            samples_count = len(self.data)
            val_sampl_count = samples_count*valPercent/100
            apply_validation = True if val_sampl_count>0 else False # Use or not use validation set
            train_sampl_count = samples_count - val_sampl_count

            # Set first train_sampl_count as training set, the other as validation set
            train_indexes = (0, train_sampl_count)
            val_indexes = (train_sampl_count, samples_count) if apply_validation else None

            if not continue_train: self.resetMlp()
            self.minValError = self.getValError()  # The minimum error that is achieved on the validation set
            last_train_err = self.getTrainError()
            best_weights = self.copyWeights()   # The MLP weights when minimum error that is achieved on the validation set

            self.rangeChanged.emit(self.tr("Train model %p%"), epochs)
            for epoch in range(epochs):
                self.trainEpoch(train_indexes, lrate, momentum)
                self.computePerformance(train_indexes, val_indexes)
                self.updateGraph.emit(self.getTrainError(), self.getValError())
                self.updateDeltaRMS.emit(self.getMinValError() - self.getValError())
                self.updateKappa.emit(self.getKappa())

                QCoreApplication.processEvents()
                if self.interrupted:
                    self.processInterrupted.emit()
                    break

                last_train_err = self.getTrainError()
                self.setTrainError(last_train_err)
                if apply_validation and (self.getValError() < self.getMinValError()):
                    self.minValError = self.getValError()
                    best_weights = self.copyWeights()
                    self.updateMinValErr.emit(self.getMinValError())
                self.updateProgress.emit()

            self.setMlpWeights(best_weights)
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during ANN training"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during ANN trainig"))
            raise
        finally:
            self.processFinished.emit()

    def trainEpoch(self, train_indexes, lrate=0.1, momentum=0.01):
        '''Perform a training epoch on the MLP
        @param train_ind        Tuple of the min&max indexes of training samples in the samples data.
        @param val_ind          Tuple of the min&max indexes of validation samples in the samples data.
        @param lrate            Learning rate.
        @param momentum         Learning momentum.
        '''
        train_sampl = train_indexes[1] - train_indexes[0]

        for i in range(train_sampl):
            n = np.random.randint( *train_indexes )
            sample = self.data[n]
            input = np.hstack( (sample['state'],sample['factors']) )
            self.getOutput( input )     # Forward propagation
            self.MLP.propagate_backward( sample['output'], lrate, momentum )
Beispiel #27
0
class WoeManager(QObject):
    """This class gets the data extracted from the UI and
    pass it to woe function, then gets and stores the result.
    """

    rangeChanged = pyqtSignal(str, int)
    updateProgress = pyqtSignal()
    processFinished = pyqtSignal()
    logMessage = pyqtSignal(str)
    errorReport = pyqtSignal(str)

    def __init__(self, factors, areaAnalyst, unit_cell=1, bins=None):
        """
        @param factors      List of the pattern rasters used for prediction of point objects (sites).
        @param areaAnalyst  AreaAnalyst that contains map of the changes, encodes and decodes category numbers.
        @param unit_cell    Method parameter, pixelsize of resampled rasters.
        @param bins         Dictionary of bins. Bins are binning boundaries that used for reduce count of categories.
                                For example if factors = [f0, f1], then bins could be (for example) {0:[bins for f0], 1:[bins for f1]} = {0:[[10, 100, 250]],1:[[0.2, 1, 1.5, 4]]}.
                                List of list used because a factor can be a multiband raster, we need get a list of bins for every band. For example:
                                factors = [f0, 2-band-factor], bins= {0: [[10, 100, 250]], 1:[[0.2, 1, 1.5, 4], [3, 4, 7]] }
        """

        QObject.__init__(self)

        self.factors = factors
        self.analyst = areaAnalyst
        self.changeMap = areaAnalyst.getChangeMap()
        self.bins = bins
        self.unit_cell = unit_cell

        self.prediction = None  # Raster of the prediction results
        self.confidence = None  # Raster of the results confidence(1 = the maximum confidence, 0 = the least confidence)

        if (bins != None) and (len(self.factors) != len(bins.keys())):
            raise WoeManagerError("Lengths of bins and factors are different!")

        for r in self.factors:
            if not self.changeMap.geoDataMatch(r):
                raise WoeManagerError("Geometries of the input rasters are different!")

        if self.changeMap.getBandsCount() != 1:
            raise WoeManagerError("Change map must have one band!")

        self.geodata = self.changeMap.getGeodata()

        # Denormalize factors if they are normalized
        for r in self.factors:
            r.denormalize()

        # Get list of codes from the changeMap raster
        categories = self.changeMap.getBandGradation(1)

        self.codes = [int(c) for c in categories]  # Codes of transitions initState->finalState (see AreaAnalyst.encode)
        self.woe = {}  # Maps of WoE results of every transition code

        self.weights = {}  # Weights of WoE (of raster band code)
        # { # The format is: {Transition_code: {factorNumber1: [list of the weights], factorNumber2: [list of the weights]}, ...}
        #  # for example:
        #   0: {0: {1: [...]}, 1: {1: [...]}},
        #   1: {0: {1: [...]}, 1: {1: [...]}},
        #   2: {0: {1: [...]}, 1: {1: [...]}},
        #   ...
        # }
        #
        self.transitionPotentials = (
            None
        )  # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...}

    def checkBins(self):
        """
        Check if bins are applicable to the factors
        """
        if self.bins != None:
            for i, factor in enumerate(self.factors):
                factor.denormalize()
                bin = self.bins[i]
                if (bin != None) and (bin != [None]):
                    for j in range(factor.getBandsCount()):
                        b = bin[j]
                        tmp = b[:]
                        tmp.sort()
                        if b != tmp:  # Mast be sorted
                            return False
                        b0, bMax = b[0], b[len(b) - 1]
                        bandStat = factor.getBandStat(j + 1)
                        if bandStat["min"] > b0 or bandStat["max"] < bMax:
                            return False
        return True

    def getConfidence(self):
        return self.confidence

    def getPrediction(self, state, factors=None, calcTransitions=False):
        """
        Most of the models use factors for prediction, but WoE takes list of factors only once (during the initialization).
        """
        self._predict(state, calcTransitions)
        return self.prediction

    def getTransitionPotentials(self):
        return self.transitionPotentials

    def getWoe(self):
        return self.woe

    def _predict(self, state, calcTransitions=False):
        """
        Predict the changes.
        """
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)

            rows, cols = self.geodata["ySize"], self.geodata["xSize"]
            if not self.changeMap.geoDataMatch(state):
                raise WoeManagerError("Geometries of the state and changeMap rasters are different!")

            prediction = np.zeros((rows, cols), dtype=np.uint8)
            confidence = np.zeros((rows, cols), dtype=np.uint8)
            mask = np.zeros((rows, cols), dtype=np.byte)

            stateBand = state.getBand(1)

            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)

            for r in xrange(rows):
                for c in xrange(cols):
                    oldMax, currMax = -1000, -1000  # Small numbers
                    indexMax = -1  # Index of Max weight
                    initCat = stateBand[r, c]  # Init category (state before transition)
                    try:
                        codes = self.analyst.codes(initCat)  # Possible final states
                        for code in codes:
                            try:  # If not all possible transitions are presented in the changeMap
                                map = self.woe[code]  # Get WoE map of transition 'code'
                            except KeyError:
                                continue
                            w = map[r, c]  # The weight in the (r,c)-pixel
                            if w > currMax:
                                indexMax, oldMax, currMax = code, currMax, w
                        prediction[r, c] = indexMax
                        confidence[r, c] = int(100 * (sigmoid(currMax) - sigmoid(oldMax)))
                    except ValueError:
                        mask[r, c] = 1
                self.updateProgress.emit()

            predicted_band = np.ma.array(data=prediction, mask=mask, dtype=np.uint8)
            self.prediction = Raster()
            self.prediction.create([predicted_band], self.geodata)
            confidence_band = np.ma.array(data=confidence, mask=mask, dtype=np.uint8)
            self.confidence = Raster()
            self.confidence.create([confidence_band], self.geodata)
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during WOE prediction"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during WoE prediction"))
            raise
        finally:
            self.processFinished.emit()

    def train(self):
        """
        Train the model
        """
        self.transitionPotentials = {}
        try:
            iterCount = len(self.codes) * len(self.factors)
            self.rangeChanged.emit(self.tr("Training WoE... %p%"), iterCount)
            changeMap = self.changeMap.getBand(1)
            for code in self.codes:
                sites = binaryzation(changeMap, [code])
                # Reclass factors (continuous factor -> ordinal factor)
                wMap = np.ma.zeros(changeMap.shape)  # The map of summary weight of the all factors
                self.weights[code] = {}  # Dictionary for storing wheights of every raster's band
                for k in xrange(len(self.factors)):
                    fact = self.factors[k]
                    self.weights[code][k] = {}  # Weights of the factor
                    factorW = self.weights[code][k]
                    if self.bins:  # Get bins of the factor
                        bin = self.bins[k]
                        if (bin != None) and fact.getBandsCount() != len(bin):
                            raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!")
                    else:
                        bin = None
                    for i in range(1, fact.getBandsCount() + 1):
                        band = fact.getBand(i)
                        if bin and bin[i - 1]:  #
                            band = reclass(band, bin[i - 1])
                        band, sites = masks_identity(band, sites, dtype=np.uint8)  # Combine masks of the rasters
                        woeRes = woe(
                            band, sites, self.unit_cell
                        )  # WoE for the 'code' (initState->finalState) transition and current 'factor'.
                        weights = woeRes["map"]
                        wMap = wMap + weights
                        factorW[i] = woeRes["weights"]
                    self.updateProgress.emit()

                # Reclassification finished => set WoE coefficients
                self.woe[code] = wMap  # WoE for all factors and the transition code.

                # Potentials are WoE map rescaled to 0--100 percents
                band = (sigmoid(wMap) * 100).astype(np.uint8)
                p = Raster()
                p.create([band], self.geodata)
                self.transitionPotentials[code] = p
                gc.collect()
        except MemoryError:
            self.errorReport.emit("The system out of memory during WoE trainig")
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during WoE trainig"))
            raise
        finally:
            self.processFinished.emit()

    def weightsToText(self):
        """
        Format self.weights as text report.
        """
        if self.weights == {}:
            return u""
        text = u""
        for code in self.codes:
            (initClass, finalClass) = self.analyst.decode(code)
            text = text + self.tr("Transition %s -> %s\n" % (int(initClass), int(finalClass)))
            try:
                factorW = self.weights[code]
                for factNum, factDict in factorW.iteritems():
                    name = self.factors[factNum].getFileName()
                    name = basename(name)
                    text = text + self.tr("\t factor: %s \n" % (name,))
                    for bandNum, bandWeights in factDict.iteritems():
                        weights = ["%f" % (w,) for w in bandWeights]
                        text = text + self.tr("\t\t Weights of band %s: %s \n" % (bandNum, ", ".join(weights)))
            except:
                text = text + self.tr("W for code % s (%s -> %s) causes error" % (code, initClass, finalClass))
                raise
        return text
Beispiel #28
0
class MCE(QObject):
    logMessage = pyqtSignal(str)
    errorReport = pyqtSignal(str)

    randomConsistencyIndex = {
        2:  0,
        3:  0.58,
        4:  0.90,
        5:  1.12,
        6:  1.24,
        7:  1.32,
        8:  1.41,
        9:  1.45,
        10: 1.49,
        11: 1.51,
        12: 1.48,
        13: 1.56,
        14: 1.57,
        15: 1.59,
        16: 1.60,
        17: 1.61,
        18: 1.62,
        19: 1.63,
        20: 1.63,
        21: 1.64,
        22: 1.65,
        23: 1.65,
        24: 1.66,
        25: 1.66,
        26: 1.67,
        27: 1.67,
        28: 1.67,
        29: 1.68,
        30: 1.68,
        31: 1.68,
        32: 1.69,
        33: 1.69,
        34: 1.69,
        35: 1.69,
        36: 1.70,
        37: 1.70,
        38: 1.70,
        39: 1.70
    }
    def __init__(self, factors, wMatr, initStateNum, finalStateNum, areaAnalyst):
        '''
        Multicriteria evaluation based on Saaty method. It defines transition probability of two categories (initStateNum, finalStateNum).
        @param factors          List of the factor rasters used for prediction.
        @param wMatr            List of lists -- NxN comparison matrix.
        @param initStateNum     Number of initial state (the state before transition).
        @param finalStateNum    Number of final state (the state after transition).
        '''

        QObject.__init__(self)

        self.factors = factors
        self.initStateNum  = initStateNum
        self.finalStateNum = finalStateNum
        self.areaAnalyst   = areaAnalyst

        # Check matrix dimension and factor count, apply normalization
        self.dim = 0
        for f in factors:
            self.dim = self.dim + f.getBandsCount()
            f.normalize(mode = 'maxmin')
        if self.dim != len(wMatr):
            raise MCEError('Matrix size is different from the number of variables!')

        # Check if the matrix is valid
        for i in xrange(self.dim):
            if len(wMatr[i]) != self.dim:
                raise MCEError('The weight matrix is not NxN!')
        EPSILON = 0.000001      # A small number
        for i in xrange(self.dim):
            if wMatr[i][i] != 1:
                raise MCEError('w[i,i] not equal 1 !')
            for j in xrange(i+1, self.dim):
                if abs(wMatr[i][j] * wMatr[j][i] - 1) > EPSILON:
                    raise MCEError('w[i,j] * w[j,i] not equal 1 !')

        self.wMatr = np.array(wMatr)

        self.weights = None     # Weights of the factors, calculated using wMatr
                                # It's a list, the length is self.dim
                                # first element is the weight of first band of the first factor and so on:
                                # [W_f1, ... weights of 1-st factors ... , W_f2, ... weights of 2-nd factors..., W_fn, ...]

        self.consistency =None  # Consistency ratio of the comparison matrix.

        self.prediction = None      # Raster of the prediction results
        self.confidence = None      # Raster of the results confidence(1 = the maximum confidence, 0 = the least confidence)
        self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...}


    def getConsistency(self):
        if self.consistency == None:
            self.setWeights()
        return self.consistency

    def getConfidence(self):
        return self.confidence

    def getTransitionPotentials(self):
        return self.transitionPotentials

    def getPrediction(self, state, factors=None, calcTransitions=False):
        '''
        Most of the models use factors for prediction, but MCE takes list of factors only once (during the initialization).
        '''
        self._predict(state, calcTransitions)
        return self.prediction

    def getWeights(self):
        if self.weights == None:
            self.setWeights()
        return self.weights

    def _predict(self, state, calcTransitions=False):
        '''
        Predict the changes.
        '''
        try:
            geodata = state.getGeodata()
            rows, cols = geodata['ySize'], geodata['xSize']

            self.transitionPotentials = None    # Reset tr.potentials if they exist

            # Get locations where self.initStateNum is occurs
            band = state.getBand(1)
            initStateMask = binaryzation(band, [self.initStateNum])
            mask = band.mask

            # Calculate summary map of factors weights
            # Transition potentials:
            #   current implementation: potential and confidence are equal (two-class implementation)
            # Confidence:
            #   confidence is summary map of factors scaled to 0-100, if current state = self.initState
            #   confidence is 0, if current state != self.initState
            # Prediction:
            #   predicted value is a constant = areaAnalyst.encode(initStateNum, finalStateNum), if current state = self.initState
            #   predicted value is the transition code current_state -> current_state, if current state != self.initState
            confidence = np.zeros((rows,cols), dtype=np.uint8)
            weights = self.getWeights()
            weightNum = 0               # Number of processed weights
            for f in self.factors:
                if not f.geoDataMatch(state):
                    raise MCEError('Geometries of the state and factor rasters are different!')
                f.normalize(mode = 'maxmin')
                for i in xrange(f.getBandsCount()):
                    band = f.getBand(i+1)
                    confidence = confidence + (band*weights[weightNum]*100).astype(np.uint8)
                    mask = np.ma.mask_or(mask, band.mask)
                    weightNum = weightNum + 1
            confidence = confidence*initStateMask
            prediction = np.copy(state.getBand(1))
            for code in self.areaAnalyst.categories:
                if code != self.initStateNum:
                    prediction[prediction==code] = self.areaAnalyst.encode(code, code)
                else:
                    prediction[prediction==code] = self.areaAnalyst.encode(self.initStateNum, self.finalStateNum)

            predicted_band = np.ma.array(data=prediction, mask=mask, dtype=np.uint8)
            self.prediction = Raster()
            self.prediction.create([predicted_band], geodata)
            confidence_band = np.ma.array(data=confidence, mask=mask, dtype=np.uint8)
            self.confidence = Raster()
            self.confidence.create([confidence_band], geodata)

            code = self.areaAnalyst.encode(self.initStateNum, self.finalStateNum)
            self.transitionPotentials = {code: self.confidence}
        except MemoryError:
            self.errorReport.emit(self.tr("The system out of memory during MCE prediction"))
            raise
        except:
            self.errorReport.emit(self.tr("An unknown error occurs during MCE prediction"))
            raise

    def setWeights(self):
        '''
        Calculate the weigths and consistency ratio.
        '''
        # Weights
        w, v = np.linalg.eig(self.wMatr)
        maxW = np.max(w)
        maxInd = list(w).index(maxW)    # Index of the biggest eigenvalue
        maxW = maxW.real
        v = v[:,maxInd]       # The eigen vector
        self.weights = [x.real for x in v]  # Maxtix v can be complex
        self.weights = self.weights/sum(self.weights)

        # Consistency ratio
        if self.dim > 2:
            ci = (maxW - self.dim)/(self.dim - 1)
            try:
                ri = self.randomConsistencyIndex[self.dim]
                self.consistency = ci/ri
            except KeyError:
                self.consistency = -1
        else:
            self.consistency = 0
Beispiel #29
0
    def __sim(self):
        '''
        1 iteracion of simulation.
        '''
        transition = self.crosstable.getCrosstable()

        self.updatePrediction(self.state)
        changes = self.getPrediction().getBand(1)  # Predicted change map
        changes = changes + 1  # Filling nodata as 0 can be ambiguous:
        changes = np.ma.filled(
            changes,
            0)  #   (cat_code can be 0, to do not mix it with no-data, add 1)
        state = self.getState()
        new_state = state.getBand(1).copy().astype(
            np.uint8
        )  # New states (the result of simulation) will be stored there.

        self.rangeChanged.emit(self.tr("Area Change Analysis %p%"), 2)
        self.updateProgress.emit()
        QCoreApplication.processEvents()
        analyst = AreaAnalyst(state, second=None)
        self.updateProgress.emit()
        QCoreApplication.processEvents()

        categories = state.getBandGradation(1)

        # Make transition between categories according to
        # number of moved pixel in crosstable
        self.rangeChanged.emit(self.tr("Simulation process %p%"),
                               len(categories)**2 - len(categories))
        QCoreApplication.processEvents()
        for initClass in categories:
            for finalClass in categories:
                if initClass == finalClass: continue

                # TODO: Calculate number of pixels to be moved via TransitionMatrix and state raster
                n = transition.getTransition(
                    initClass, finalClass)  # Number of pixels that have to be
                # changed the categories
                # (use TransitoionMatrix only).
                if n == 0:
                    continue
                # Find n appropriate places for transition initClass -> finalClass
                cat_code = analyst.encode(initClass, finalClass)
                # Array of places where transitions initClass -> finalClass are occured
                places = (
                    changes == cat_code + 1
                )  # cat_code can be 0, do not mix it with no-data in 'changes' variable
                placesCount = np.sum(places)
                # print "cat_code, placesCount, n", cat_code, placesCount

                if placesCount < n:
                    self.logMessage.emit(
                        self.
                        tr("There are more transitions in the transition matrix, then the model have found"
                           ))
                    # print "There are more transitions in the transition matrix, then the model have found"
                    # print "cat_code, placesCount, n", cat_code, placesCount, n
                    QCoreApplication.processEvents()
                    n = placesCount
                if n > 0:
                    confidence = self.getConfidence().getBand(1)
                    # Add some random value
                    rnd = np.random.sample(
                        size=confidence.shape) / 1000  # A small random
                    confidence = np.ma.filled(confidence, 0) + rnd
                    confidence = confidence * places  # The higher is number in cell, the higer is probability of transition in the cell.

                    # Ensure, n is bigger then nonzero confidence
                    placesCount = np.sum(confidence > 0)
                    if placesCount < n:  # Some confidence where transitions has to be appear is zero. The transition count will be cropped.
                        # print "Some confidence is zero. cat_code, nonzeroConf, wantedPixels", cat_code, placesCount, n
                        n = placesCount

                    ind = confidence.argsort(axis=None)[-n:]
                    indices = [
                        np.unravel_index(i, confidence.shape) for i in ind
                    ]

                    # Now "indices" contains indices of the appropriate places,
                    # make transition initClass -> finalClass
                    r1 = np.zeros(confidence.shape)
                    for index in indices:
                        new_state[index] = finalClass

                self.updateProgress.emit()
                QCoreApplication.processEvents()

        result = Raster()
        result.create([new_state], state.getGeodata())
        self.state = result
Beispiel #30
0
class MlpManager(QObject):
    '''This class gets the data extracted from the UI and
    pass it to multi-layer perceptron, then gets and stores the result.
    '''

    updateGraph = pyqtSignal(float, float)  # Train error, val. error
    updateMinValErr = pyqtSignal(float)  # Min validation error
    updateDeltaRMS = pyqtSignal(
        float)  # Delta of RMS: min(valError) - currentValError
    updateKappa = pyqtSignal(float)  # Kappa value
    processFinished = pyqtSignal()
    processInterrupted = pyqtSignal()
    logMessage = pyqtSignal(str)
    errorReport = pyqtSignal(str)
    rangeChanged = pyqtSignal(str, int)
    updateProgress = pyqtSignal()

    def __init__(self, ns=0, MLP=None):

        QObject.__init__(self)

        self.MLP = MLP
        self.interrupted = False

        self.layers = None
        if self.MLP:
            self.layers = self.getMlpTopology()

        self.ns = ns  # Neighbourhood size of training rasters.
        self.data = None  # Training data
        self.catlist = None  # List of unique output values of the output raster
        self.train_error = None  # Error on training set
        self.val_error = None  # Error on validation set
        self.minValError = None  # The minimum error that is achieved on the validation set
        self.valKappa = 0  # Kappa on on the validation set
        self.sampler = None  # Sampler

        # Results of the MLP prediction
        self.prediction = None  # Raster of the MLP prediction results
        self.confidence = None  # Raster of the MLP results confidence (1 = the maximum confidence, 0 = the least confidence)
        self.transitionPotentials = None  # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...}

        # Outputs of the activation function for small and big numbers
        self.sigmax, self.sigmin = sigmoid(100), sigmoid(
            -100)  # Max and Min of the sigmoid function
        self.sigrange = self.sigmax - self.sigmin  # Range of the sigmoid

    def computeMlpError(self, sample):
        '''Get MLP error on the sample'''
        input = np.hstack((sample['state'], sample['factors']))
        out = self.getOutput(input)
        err = ((sample['output'] - out)**2).sum() / len(out)
        return err

    def computePerformance(self, train_indexes, val_ind):
        '''Check errors of training and validation sets
        @param train_indexes     Tuple that contains indexes of the first and last elements of the training set.
        @param val_ind           Tuple that contains indexes of the first and last elements of the validation set.
        '''
        train_error = 0
        train_sampl = train_indexes[1] - train_indexes[
            0]  # Count of training samples
        for i in range(train_indexes[0], train_indexes[1]):
            train_error = train_error + self.computeMlpError(
                sample=self.data[i])
        self.setTrainError(train_error / train_sampl)

        if val_ind:
            val_error = 0
            val_sampl = val_ind[1] - val_ind[0]
            answers = np.ma.zeros(val_sampl)
            out = np.ma.zeros(val_sampl)
            for i in xrange(val_ind[0], val_ind[1]):
                sample = self.data[i]
                val_error = val_error + self.computeMlpError(
                    sample=self.data[i])

                input = np.hstack((sample['state'], sample['factors']))
                output = self.getOutput(input)
                out[i - val_ind[0]] = self.outCategory(output)
                answers[i - val_ind[0]] = self.outCategory(sample['output'])
            self.setValError(val_error / val_sampl)
            depCoef = DependenceCoef(out, answers, expand=True)
            self.valKappa = depCoef.kappa(mode=None)

    def copyWeights(self):
        '''Deep copy of the MLP weights'''
        return copy.deepcopy(self.MLP.weights)

    def createMlp(self, state, factors, output, hidden_layers):
        '''
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        @param hidden_layers    List of neuron counts in hidden layers.
        @param ns               Neighbourhood size.
        '''

        if output.getBandsCount() != 1:
            raise MlpManagerError('Output layer must have one band!')

        input_neurons = 0
        for raster in factors:
            input_neurons = input_neurons + raster.getNeighbourhoodSize(
                self.ns)

        # state raster contains categories. We need use n-1 dummy variables (where n = number of categories)
        input_neurons = input_neurons + (len(state.getBandGradation(1)) -
                                         1) * state.getNeighbourhoodSize(
                                             self.ns)

        # Output category's (neuron) list and count
        self.catlist = output.getBandGradation(1)
        categories = len(self.catlist)

        # set neuron counts in the MLP layers
        self.layers = hidden_layers
        self.layers.insert(0, input_neurons)
        self.layers.append(categories)

        self.MLP = MLP(*self.layers)

    def getConfidence(self):
        return self.confidence

    def getInputVectLen(self):
        '''Length of input data vector of the MLP'''
        shape = self.getMlpTopology()
        return shape[0]

    def getOutput(self, input_vector):
        out = self.MLP.propagate_forward(input_vector)
        return out

    def getOutputVectLen(self):
        '''Length of input data vector of the MLP'''
        shape = self.getMlpTopology()
        return shape[-1]

    def getOutputVector(self, val):
        '''Convert a number val into vector,
        for example, let self.catlist = [1, 3, 4] then
        if val = 1, result = [ 1, -1, -1]
        if val = 3, result = [-1,  1, -1]
        if val = 4, result = [-1, -1,  1]
        where -1 is minimum of the sigmoid, 1 is max of the sigmoid
        '''
        size = self.getOutputVectLen()
        res = np.ones(size) * (self.sigmin)
        ind = np.where(self.catlist == val)
        res[ind] = self.sigmax
        return res

    def getMinValError(self):
        return self.minValError

    def getMlpTopology(self):
        return self.MLP.shape

    def getKappa(self):
        return self.valKappa

    def getPrediction(self, state, factors, calcTransitions=False):
        self._predict(state, factors, calcTransitions)
        return self.prediction

    def getTrainError(self):
        return self.train_error

    def getTransitionPotentials(self):
        return self.transitionPotentials

    def getValError(self):
        return self.val_error

    def outCategory(self, out_vector):
        # Get index of the biggest output value as the result
        biggest = max(out_vector)
        res = list(out_vector).index(biggest)
        res = self.catlist[res]
        return res

    def outputConfidence(self, output, scale=True):
        '''
        Return confidence (difference between 2 biggest values) of the MLP output.
        @param output: The confidence
        @param scale: If True, then scale the confidence to int [0, 1, ..., 100] percent
        '''
        out_scl = self.scaleOutput(output, percent=scale)
        out_scl.sort()
        return out_scl[-1] - out_scl[-2]

    def outputTransitions(self, output, scale=True):
        '''
        Return transition potencial of the outputs scaled to [0,1] or 1-100
        @param output: The output of MLP
        @param scale: If True, then scale the transitions to int ([0, 1, ..., 100]) percent
        '''
        out_scl = self.scaleOutput(output, percent=scale)
        result = {}
        for r, v in enumerate(out_scl):
            cat = self.catlist[r]
            result[cat] = v
        return result

    def scaleOutput(self, output, percent=True):
        '''
        Scale the output to range [0,1] or 1-100
        @param output: Output of a MLP
        @param percent: If True, then scale the output to int [0, 1, ..., 100] percent
        '''
        res = 1.0 * (output - self.sigmin) / self.sigrange
        if percent:
            res = [int(100 * x) for x in res]
        return res

    def _predict(self, state, factors, calcTransitions=False):
        '''
        Calculate output and confidence rasters using MLP model and input rasters
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        '''
        try:
            self.rangeChanged.emit(self.tr("Initialize model %p%"), 1)
            geodata = state.getGeodata()
            rows, cols = geodata['ySize'], geodata['xSize']
            for r in factors:
                if not state.geoDataMatch(r):
                    raise MlpManagerError(
                        'Geometries of the input rasters are different!')

            self.transitionPotentials = None  # Reset tr.potentials if they exist

            # Normalize factors before prediction:
            for f in factors:
                f.normalize(mode='mean')

            predicted_band = np.zeros([rows, cols], dtype=np.uint8)
            confidence_band = np.zeros([rows, cols], dtype=np.uint8)
            if calcTransitions:
                self.transitionPotentials = {}
                for cat in self.catlist:
                    self.transitionPotentials[cat] = np.zeros([rows, cols],
                                                              dtype=np.uint8)

            self.sampler = Sampler(state, factors, ns=self.ns)
            mask = state.getBand(1).mask.copy()
            if mask.shape == ():
                mask = np.zeros([rows, cols], dtype=np.bool)
            self.updateProgress.emit()
            self.rangeChanged.emit(self.tr("Prediction %p%"), rows)
            for i in xrange(rows):
                for j in xrange(cols):
                    if not mask[i, j]:
                        input = self.sampler.get_inputs(state, i, j)
                        if input != None:
                            out = self.getOutput(input)
                            res = self.outCategory(out)
                            predicted_band[i, j] = res

                            confidence = self.outputConfidence(out)
                            confidence_band[i, j] = confidence

                            if calcTransitions:
                                potentials = self.outputTransitions(out)
                                for cat in self.catlist:
                                    map = self.transitionPotentials[cat]
                                    map[i, j] = potentials[cat]
                        else:  # Input sample is incomplete => mask this pixel
                            mask[i, j] = True
                self.updateProgress.emit()
            predicted_bands = [
                np.ma.array(data=predicted_band, mask=mask, dtype=np.uint8)
            ]
            confidence_bands = [
                np.ma.array(data=confidence_band, mask=mask, dtype=np.uint8)
            ]

            self.prediction = Raster()
            self.prediction.create(predicted_bands, geodata)
            self.confidence = Raster()
            self.confidence.create(confidence_bands, geodata)

            if calcTransitions:
                for cat in self.catlist:
                    band = [
                        np.ma.array(data=self.transitionPotentials[cat],
                                    mask=mask,
                                    dtype=np.uint8)
                    ]
                    self.transitionPotentials[cat] = Raster()
                    self.transitionPotentials[cat].create(band, geodata)
        except MemoryError:
            self.errorReport.emit(
                self.tr("The system out of memory during ANN prediction"))
            raise
        except:
            self.errorReport.emit(
                self.tr("An unknown error occurs during ANN prediction"))
            raise

    def readMlp(self):
        pass

    def resetErrors(self):
        self.val_error = np.finfo(np.float).max
        self.train_error = np.finfo(np.float).max

    def resetMlp(self):
        self.MLP.reset()
        self.resetErrors()

    def saveMlp(self):
        pass

    def saveSamples(self, fileName):
        self.sampler.saveSamples(fileName)

    def setMlpWeights(self, w):
        '''Set weights of the MLP'''
        self.MLP.weights = w

    def setTrainingData(self,
                        state,
                        factors,
                        output,
                        shuffle=True,
                        mode='All',
                        samples=None):
        '''
        @param state            Raster of the current state (categories) values.
        @param factors          List of the factor rasters (predicting variables).
        @param output           Raster that contains categories to predict.
        @param shuffle          Perform random shuffle.
        @param mode             Type of sampling method:
                                    All             Get all pixels
                                    Random          Get samples. Count of samples in the data=samples.
                                    Stratified      Undersampling of major categories and/or oversampling of minor categories.
        @samples                Sample count of the training data (doesn't used in 'All' mode).
        '''
        if not self.MLP:
            raise MlpManagerError('You must create a MLP before!')

        # Normalize factors before sampling:
        for f in factors:
            f.normalize(mode='mean')

        self.sampler = Sampler(state, factors, output, self.ns)
        self.sampler.setTrainingData(state=state,
                                     output=output,
                                     shuffle=shuffle,
                                     mode=mode,
                                     samples=samples)

        outputVecLen = self.getOutputVectLen()
        stateVecLen = self.sampler.stateVecLen
        factorVectLen = self.sampler.factorVectLen
        size = len(self.sampler.data)

        self.data = np.zeros(size,
                             dtype=[('coords', float, 2),
                                    ('state', float, stateVecLen),
                                    ('factors', float, factorVectLen),
                                    ('output', float, outputVecLen)])
        self.data['coords'] = self.sampler.data['coords']
        self.data['state'] = self.sampler.data['state']
        self.data['factors'] = self.sampler.data['factors']
        self.data['output'] = [
            self.getOutputVector(sample['output'])
            for sample in self.sampler.data
        ]

    def setTrainError(self, error):
        self.train_error = error

    def setValError(self, error):
        self.val_error = error

    def setEpochs(self, epochs):
        self.epochs = epochs

    def setValPercent(self, value=20):
        self.valPercent = value

    def setLRate(self, value=0.1):
        self.lrate = value

    def setMomentum(self, value=0.01):
        self.momentum = value

    def setContinueTrain(self, value=False):
        self.continueTrain = value

    def startTrain(self):
        self.train(self.epochs, self.valPercent, self.lrate, self.momentum,
                   self.continueTrain)

    def stopTrain(self):
        self.interrupted = True

    def train(self,
              epochs,
              valPercent=20,
              lrate=0.1,
              momentum=0.01,
              continue_train=False):
        '''Perform the training procedure on the MLP and save the best neural net
        @param epoch            Max iteration count.
        @param valPercent       Percent of the validation set.
        @param lrate            Learning rate.
        @param momentum         Learning momentum.
        @param continue_train   If False then it is new training cycle, reset weights training and validation error. If True, then continue training.
        '''
        try:
            samples_count = len(self.data)
            val_sampl_count = samples_count * valPercent / 100
            apply_validation = True if val_sampl_count > 0 else False  # Use or not use validation set
            train_sampl_count = samples_count - val_sampl_count

            # Set first train_sampl_count as training set, the other as validation set
            train_indexes = (0, train_sampl_count)
            val_indexes = (train_sampl_count,
                           samples_count) if apply_validation else None

            if not continue_train: self.resetMlp()
            self.minValError = self.getValError(
            )  # The minimum error that is achieved on the validation set
            last_train_err = self.getTrainError()
            best_weights = self.copyWeights(
            )  # The MLP weights when minimum error that is achieved on the validation set

            self.rangeChanged.emit(self.tr("Train model %p%"), epochs)
            for epoch in range(epochs):
                self.trainEpoch(train_indexes, lrate, momentum)
                self.computePerformance(train_indexes, val_indexes)
                self.updateGraph.emit(self.getTrainError(), self.getValError())
                self.updateDeltaRMS.emit(self.getMinValError() -
                                         self.getValError())
                self.updateKappa.emit(self.getKappa())

                QCoreApplication.processEvents()
                if self.interrupted:
                    self.processInterrupted.emit()
                    break

                last_train_err = self.getTrainError()
                self.setTrainError(last_train_err)
                if apply_validation and (self.getValError() <
                                         self.getMinValError()):
                    self.minValError = self.getValError()
                    best_weights = self.copyWeights()
                    self.updateMinValErr.emit(self.getMinValError())
                self.updateProgress.emit()

            self.setMlpWeights(best_weights)
        except MemoryError:
            self.errorReport.emit(
                self.tr("The system out of memory during ANN training"))
            raise
        except:
            self.errorReport.emit(
                self.tr("An unknown error occurs during ANN trainig"))
            raise
        finally:
            self.processFinished.emit()

    def trainEpoch(self, train_indexes, lrate=0.1, momentum=0.01):
        '''Perform a training epoch on the MLP
        @param train_ind        Tuple of the min&max indexes of training samples in the samples data.
        @param val_ind          Tuple of the min&max indexes of validation samples in the samples data.
        @param lrate            Learning rate.
        @param momentum         Learning momentum.
        '''
        train_sampl = train_indexes[1] - train_indexes[0]

        for i in range(train_sampl):
            n = np.random.randint(*train_indexes)
            sample = self.data[n]
            input = np.hstack((sample['state'], sample['factors']))
            self.getOutput(input)  # Forward propagation
            self.MLP.propagate_backward(sample['output'], lrate, momentum)