def test_binarization(self): t = binaryzation(self.X, [0, 2]) answer1 = np.array([[ False, True, False, ], [ False, True, False, ], [True, False, True]]) answer2 = np.array([[ False, True, False, ], [ False, True, False, ], [True, False, True]]) assert_array_equal(t, answer1) assert_array_equal(t, answer2) mask = [[False, False, False], [False, False, False], [True, False, False]] data = [[False, True, False], [False, True, False], [False, False, True]] assert_array_equal(binaryzation(np.array(data), [True]), np.ma.array(data=data, mask=mask)) data = np.ma.array(data=data, mask=mask) assert_array_equal(binaryzation(data, [True]), np.ma.array(data=data, mask=mask))
def _predict(self, state, calcTransitions=False): ''' Predict the changes. ''' try: geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] self.transitionPotentials = None # Reset tr.potentials if they exist # Get locations where self.initStateNum is occurs band = state.getBand(1) initStateMask = binaryzation(band, [self.initStateNum]) mask = band.mask # Calculate summary map of factors weights # Transition potentials: # current implementation: potential and confidence are equal (two-class implementation) # Confidence: # confidence is summary map of factors scaled to 0-100, if current state = self.initState # confidence is 0, if current state != self.initState # Prediction: # predicted value is a constant = areaAnalyst.encode(initStateNum, finalStateNum), if current state = self.initState # predicted value is the transition code current_state -> current_state, if current state != self.initState confidence = np.zeros((rows,cols), dtype=np.uint8) weights = self.getWeights() weightNum = 0 # Number of processed weights for f in self.factors: if not f.geoDataMatch(state): raise MCEError('Geometries of the state and factor rasters are different!') f.normalize(mode = 'maxmin') for i in xrange(f.getBandsCount()): band = f.getBand(i+1) confidence = confidence + (band*weights[weightNum]*100).astype(np.uint8) mask = np.ma.mask_or(mask, band.mask) weightNum = weightNum + 1 confidence = confidence*initStateMask prediction = np.copy(state.getBand(1)) for code in self.areaAnalyst.categories: if code != self.initStateNum: prediction[prediction==code] = self.areaAnalyst.encode(code, code) else: prediction[prediction==code] = self.areaAnalyst.encode(self.initStateNum, self.finalStateNum) predicted_band = np.ma.array(data=prediction, mask=mask, dtype=np.uint8) self.prediction = Raster() self.prediction.create([predicted_band], geodata) confidence_band = np.ma.array(data=confidence, mask=mask, dtype=np.uint8) self.confidence = Raster() self.confidence.create([confidence_band], geodata) code = self.areaAnalyst.encode(self.initStateNum, self.finalStateNum) self.transitionPotentials = {code: self.confidence} except MemoryError: self.errorReport.emit(self.tr("The system out of memory during MCE prediction")) raise except: self.errorReport.emit(self.tr("An unknown error occurs during MCE prediction")) raise
def woe(factor, sites, unit_cell=1): '''Weight of evidence method (multiclass form). @param factor Multiclass pattern array used for prediction of point objects (sites). @param sites Array layer consisting of the locations at which the point objects are known to occur. @param unit_cell Method parameter, pixelsize of resampled rasters. @return masked array Array of total weights of each factor. ''' # Get list of categories from the factor raster categories = get_gradations(factor.compressed()) # Try to binarize sites: sCategories = get_gradations(sites.compressed()) if len(sCategories) != 2: raise WoeError('Site raster must be binary!') sites = binaryzation(sites, [sCategories[1]]) # List of the weights of evidence: # weights[0] is (wPlus, wMinus) for the first category, weights[1] is (wPlus, wMinus) for the second category, ... weights = [] if len(categories) >= 2: for cat in categories: fct = binaryzation(factor, [cat]) weights.append(_binary_woe(fct, sites, unit_cell)) else: raise WoeError('Wrong count of categories in the factor raster!') wTotalMin = sum([w[1] for w in weights]) # List of total weights of evidence of the categories: # wMap[0] is the total weight of the first category, wMap[1] is the total weight of the second category, ... wMap = [w[0] + wTotalMin - w[1] for w in weights] # If len(categories) = 2, then [w[0] + wTotalMin - w[1] for w in weights] increases the answer. # In this case: if len(categories) == 2: wMap = [w/2 for w in wMap] resultMap =np.zeros(ma.shape(factor)) for i,cat in enumerate(categories): resultMap[factor==cat] = wMap[i] resultMap = ma.array(data=resultMap, mask=factor.mask) result = {'map': resultMap, 'categories': categories, 'weights': wMap} return result
def woe(factor, sites, unit_cell=1): '''Weight of evidence method (multiclass form). @param factor Multiclass pattern array used for prediction of point objects (sites). @param sites Array layer consisting of the locations at which the point objects are known to occur. @param unit_cell Method parameter, pixelsize of resampled rasters. @return masked array Array of total weights of each factor. ''' # Get list of categories from the factor raster categories = get_gradations(factor.compressed()) # Try to binarize sites: sCategories = get_gradations(sites.compressed()) if len(sCategories) != 2: raise WoeError('Site raster must be binary!') sites = binaryzation(sites, [sCategories[1]]) # List of the weights of evidence: # weights[0] is (wPlus, wMinus) for the first category, weights[1] is (wPlus, wMinus) for the second category, ... weights = [] if len(categories) >= 2: for cat in categories: fct = binaryzation(factor, [cat]) weights.append(_binary_woe(fct, sites, unit_cell)) else: raise WoeError('Wrong count of categories in the factor raster!') wTotalMin = sum([w[1] for w in weights]) # List of total weights of evidence of the categories: # wMap[0] is the total weight of the first category, wMap[1] is the total weight of the second category, ... wMap = [w[0] + wTotalMin - w[1] for w in weights] # If len(categories) = 2, then [w[0] + wTotalMin - w[1] for w in weights] increases the answer. # In this case: if len(categories) == 2: wMap = [w / 2 for w in wMap] resultMap = np.zeros(ma.shape(factor)) for i, cat in enumerate(categories): resultMap[factor == cat] = wMap[i] resultMap = ma.array(data=resultMap, mask=factor.mask) result = {'map': resultMap, 'categories': categories, 'weights': wMap} return result
def __init__(self, referenceMap, simulatedMap): """ @param referenceMap Reference raster @param simulatedMap Simulated raster """ QObject.__init__(self) if referenceMap.getBandsCount() + simulatedMap.getBandsCount() != 2: raise EBError( 'The reference and simulated rasters must be 1-band rasters!') if not referenceMap.geoDataMatch(simulatedMap): raise EBError( 'Geometries of the reference and simulated rasters are different!' ) self.categories = referenceMap.getBandGradation(1) for s in simulatedMap.getBandGradation(1): if not s in self.categories: raise EBError( 'Categories in the reference and simulated rasters are different!' ) R = referenceMap.getBand(1) S = simulatedMap.getBand(1) self.shape = R.shape R, S = masks_identity(R, S, dtype=np.uint8) # Array for weight self.W = np.ones(self.shape) self.W = self.W - np.ma.getmask(R) R = np.ma.filled(R, 0) S = np.ma.filled(S, 0) # Proportion of category j in pixel n at the beginning resolution of the reference map self.Rj = {} for j in self.categories: self.Rj[j] = 1.0 * binaryzation(R, [j]) # Proportion of category j in pixel n at the beginning resolution of the simulated map self.Sj = {} for j in self.categories: self.Sj[j] = 1.0 * binaryzation(S, [j])
def train(self): """ Train the model """ self.transitionPotentials = {} try: iterCount = len(self.codes) * len(self.factors) self.rangeChanged.emit(self.tr("Training WoE... %p%"), iterCount) changeMap = self.changeMap.getBand(1) for code in self.codes: sites = binaryzation(changeMap, [code]) # Reclass factors (continuous factor -> ordinal factor) wMap = np.ma.zeros(changeMap.shape) # The map of summary weight of the all factors self.weights[code] = {} # Dictionary for storing wheights of every raster's band for k in xrange(len(self.factors)): fact = self.factors[k] self.weights[code][k] = {} # Weights of the factor factorW = self.weights[code][k] if self.bins: # Get bins of the factor bin = self.bins[k] if (bin != None) and fact.getBandsCount() != len(bin): raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!") else: bin = None for i in range(1, fact.getBandsCount() + 1): band = fact.getBand(i) if bin and bin[i - 1]: # band = reclass(band, bin[i - 1]) band, sites = masks_identity(band, sites, dtype=np.uint8) # Combine masks of the rasters woeRes = woe( band, sites, self.unit_cell ) # WoE for the 'code' (initState->finalState) transition and current 'factor'. weights = woeRes["map"] wMap = wMap + weights factorW[i] = woeRes["weights"] self.updateProgress.emit() # Reclassification finished => set WoE coefficients self.woe[code] = wMap # WoE for all factors and the transition code. # Potentials are WoE map rescaled to 0--100 percents band = (sigmoid(wMap) * 100).astype(np.uint8) p = Raster() p.create([band], self.geodata) self.transitionPotentials[code] = p gc.collect() except MemoryError: self.errorReport.emit("The system out of memory during WoE trainig") raise except: self.errorReport.emit(self.tr("An unknown error occurs during WoE trainig")) raise finally: self.processFinished.emit()
def __init__(self, factors, areaAnalyst, unit_cell=1, bins = None): ''' @param factors List of the pattern rasters used for prediction of point objects (sites). @param areaAnalyst AreaAnalyst that contains map of the changes, encodes and decodes class numbers. @param unit_cell Method parameter, pixelsize of resampled rasters. @param bins Dictionary of bins. Bins are binning boundaries that used for reduce count of classes. For example if factors = [f0, f1], then bins could be (for example) {0:[bins for f0], 1:[bins for f1]} = {0:[[10, 100, 250]],1:[[0.2, 1, 1.5, 4]]}. List of list used because a factor can be a multiband raster, we need get a list of bins for every band. For example: factors = [f0, 2-band-factor], bins= {0: [[10, 100, 250]], 1:[[0.2, 1, 1.5, 4], [3, 4, 7]] } ''' self.factors = factors self.analyst = areaAnalyst self.changeMap = areaAnalyst.getChangeMap() self.prediction = None self.confidence = None if (bins != None) and (len(factors) != len(bins.keys())): raise WoeManagerError('Lengths of bins and factors are different!') for r in self.factors: if not self.changeMap.geoDataMatch(r): raise WoeManagerError('Geometries of the input rasters are different!') if self.changeMap.getBandsCount() != 1: raise WoeManagerError('Change map must have one band!') # Get list of codes from the changeMap raster classes = self.changeMap.getBandStat(1)['gradation'] cMap = self.changeMap.getBand(1) self.codes = [int(c) for c in classes] # Codes of transitions initState->finalState (see AreaAnalyst.encode) self.woe = {} for code in self.codes: sites = binaryzation(cMap, [code]) # TODO: reclass factors (continuous factor -> ordinal factor) wMap = np.ma.zeros(cMap.shape) for k in xrange(len(factors)): fact = factors[k] if bins: # Get bins of the factor bin = bins[k] if (bin != None) and fact.getBandsCount() != len(bin): raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!") else: bin = None for i in range(1, fact.getBandsCount()+1): band = fact.getBand(i) if bin: band = reclass(band, bin[i-1]) band, sites = masks_identity(band, sites) # Combine masks of the rasters weights = woe(band, sites, unit_cell) # WoE for the 'code' (initState->finalState) transition and current 'factor'. wMap = wMap + weights self.woe[code]=wMap # WoE for all factors and the transition.
def woe(factor, sites, unit_cell=1): '''Weight of evidence method (multiclass form). @param factor Multiclass pattern array used for prediction of point objects (sites). @param sites Array layer consisting of the locations at which the point objects are known to occur. @param unit_cell Method parameter, pixelsize of resampled rasters. @return [wMap1, wMap2, ...] Total weights of each factor. ''' result =np.zeros(ma.shape(factor)) # Get list of classes from the factor raster classes = get_gradations(factor.compressed()) # Try to binarize sites: sClasses = get_gradations(sites.compressed()) if len(sClasses) != 2: raise WoeError('Site raster must be binary!') sites = binaryzation(sites, [sClasses[1]]) weights = [] # list of the weights of evidence if len(classes) >= 2: for cl in classes: fct = binaryzation(factor, [cl]) weights.append(_binary_woe(fct, sites, unit_cell)) else: raise WoeError('Wrong count of classes in the factor raster!') wTotalMin = sum([w[1] for w in weights]) wMap = [w[0] + wTotalMin - w[1] for w in weights] # If len(classes) = 2, then [w[0] + wTotalMin - w[1] for w in weights] increases the answer. # In this case: if len(classes) == 2: wMap = [w/2 for w in wMap] for i,cl in enumerate(classes): result[factor==cl] = wMap[i] result = ma.array(data=result, mask=factor.mask) return result
def train(self): ''' Train the model ''' self.transitionPotentials = {} try: iterCount = len(self.codes)*len(self.factors) self.rangeChanged.emit(self.tr("Training WoE... %p%"), iterCount) changeMap = self.changeMap.getBand(1) for code in self.codes: sites = binaryzation(changeMap, [code]) # Reclass factors (continuous factor -> ordinal factor) wMap = np.ma.zeros(changeMap.shape) # The map of summary weight of the all factors self.weights[code] = {} # Dictionary for storing wheights of every raster's band for k in xrange(len(self.factors)): fact = self.factors[k] self.weights[code][k] = {} # Weights of the factor factorW = self.weights[code][k] if self.bins: # Get bins of the factor bin = self.bins[k] if (bin != None) and fact.getBandsCount() != len(bin): raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!") else: bin = None for i in range(1, fact.getBandsCount()+1): band = fact.getBand(i) if bin and bin[i-1]: # band = reclass(band, bin[i-1]) band, sites = masks_identity(band, sites, dtype=np.uint8) # Combine masks of the rasters woeRes = woe(band, sites, self.unit_cell) # WoE for the 'code' (initState->finalState) transition and current 'factor'. weights = woeRes['map'] wMap = wMap + weights factorW[i] = woeRes['weights'] self.updateProgress.emit() # Reclassification finished => set WoE coefficients self.woe[code]=wMap # WoE for all factors and the transition code. # Potentials are WoE map rescaled to 0--100 percents band = (sigmoid(wMap)*100).astype(np.uint8) p = Raster() p.create([band], self.geodata) self.transitionPotentials[code] = p gc.collect() except MemoryError: self.errorReport.emit('The system out of memory during WoE trainig') raise except: self.errorReport.emit(self.tr("An unknown error occurs during WoE trainig")) raise finally: self.processFinished.emit()
def __init__ (self, referenceMap, simulatedMap): """ @param referenceMap Reference raster @param simulatedMap Simulated raster """ QObject.__init__(self) if referenceMap.getBandsCount() + simulatedMap.getBandsCount() !=2: raise EBError('The reference and simulated rasters must be 1-band rasters!') if not referenceMap.geoDataMatch(simulatedMap): raise EBError('Geometries of the reference and simulated rasters are different!') self.categories = referenceMap.getBandGradation(1) for s in simulatedMap.getBandGradation(1): if not s in self.categories: raise EBError('Categories in the reference and simulated rasters are different!') R = referenceMap.getBand(1) S = simulatedMap.getBand(1) self.shape = R.shape R, S = masks_identity(R,S, dtype=np.uint8) # Array for weight self.W = np.ones(self.shape) self.W = self.W - np.ma.getmask(R) R = np.ma.filled(R, 0) S = np.ma.filled(S, 0) # Proportion of category j in pixel n at the beginning resolution of the reference map self.Rj = {} for j in self.categories: self.Rj[j] = 1.0*binaryzation(R, [j]) # Proportion of category j in pixel n at the beginning resolution of the simulated map self.Sj = {} for j in self.categories: self.Sj[j] = 1.0*binaryzation(S, [j])
def test_binarization(self): t = binaryzation(self.X, [0,2]) answer1 = np.array([ [False, True, False,], [False, True, False,], [True, False,True ] ]) answer2 = np.array([ [False, True, False,], [False, True, False,], [True, False,True ] ]) assert_array_equal(t, answer1) assert_array_equal(t, answer2) mask = [[False, False, False], [False, False, False], [True, False, False]] data = [[False, True, False], [False, True, False], [False, False, True]] assert_array_equal(binaryzation(np.array(data), [True]), np.ma.array(data=data, mask = mask)) data = np.ma.array(data=data, mask=mask) assert_array_equal(binaryzation(data, [True]), np.ma.array(data=data, mask = mask))
def _predict(self, state): ''' Predict the changes. ''' geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] # Get locations where self.initStateNum is occurs band = state.getBand(1) initStateMask = binaryzation(band, [self.initStateNum]) mask = band.mask # Calculate summary map of factors weights # Confidence: # confidence is summary map of factors, if current state = self.initState # confidence is 0, if current state != self.initState # Prediction: # predicted value is a constant = self.finalStateNum, if current state = self.initState # predicted value is current state, if current state != self.initState confidence = np.zeros((rows,cols)) weights = self.getWeights() weightNum = 0 # Number of processed weights for f in self.factors: if not f.geoDataMatch(state): raise MCEError('Geometries of the state and factor rasters are different!') f.normalize(mode = 'maxmin') for i in xrange(f.getBandsCount()): band = f.getBand(i+1) confidence = confidence + band*weights[weightNum] mask = np.ma.mask_or(mask, band.mask) weightNum = weightNum + 1 confidence = confidence*initStateMask prediction = np.copy(state.getBand(1)) prediction = np.logical_not(initStateMask) * prediction prediction = prediction + initStateMask*self.finalStateNum predicted_band = np.ma.array(data=prediction, mask=mask) self.prediction = Raster() self.prediction.create([predicted_band], geodata) confidence_band = np.ma.array(data=confidence, mask=mask) self.confidence = Raster() self.confidence.create([confidence_band], geodata)