def createStage3ImageSubsets(self, inputImage, s2BordersImage, s3BordersClumps, subsetImgsDIR, subsetImgsMaskedDIR, subImgBaseName, minSize): segmentation.clump(s2BordersImage, s3BordersClumps, 'KEA', True, 0) rastergis.populateStats(s3BordersClumps, True, True) rastergis.spatialExtent(s3BordersClumps, 'minXX', 'minXY', 'maxXX', 'maxXY', 'minYX', 'minYY', 'maxYX', 'maxYY') rsgisUtils = rsgislib.RSGISPyUtils() dataType = rsgisUtils.getRSGISLibDataTypeFromImg(inputImage) ratDS = gdal.Open(s3BordersClumps, gdal.GA_Update) minX = rat.readColumn(ratDS, "minXX") maxX = rat.readColumn(ratDS, "maxXX") minY = rat.readColumn(ratDS, "minYY") maxY = rat.readColumn(ratDS, "maxYY") Histogram = rat.readColumn(ratDS, "Histogram") for i in range(minX.shape[0]): if i > 0: subImage = os.path.join(subsetImgsDIR, subImgBaseName + str(i) + '.kea') #print( "[" + str(minX[i]) + ", " + str(maxX[i]) + "][" + str(minY[i]) + ", " + str(maxY[i]) + "]" ) imageutils.subsetbbox(inputImage, subImage, 'KEA', dataType, minX[i], maxX[i], minY[i], maxY[i]) if Histogram[i] > minSize: maskedFile = os.path.join(subsetImgsMaskedDIR, subImgBaseName + str(i) + '_masked.kea') else: maskedFile = os.path.join(subsetImgsMaskedDIR, subImgBaseName + str(i) + '_burn.kea') imageutils.maskImage(subImage, s2BordersImage, maskedFile, 'KEA', dataType, 0, 0) rastergis.populateStats(maskedFile, True, False) ratDS = None
def run(self, cmdargs): refCol = rat.readColumn(cmdargs.inputFile, cmdargs.referenceCol) classCol = rat.readColumn(cmdargs.inputFile, cmdargs.classifiedCol) classes = np.unique(refCol) print "Classes (", classes.size, "): ", classes errMatrix = np.zeros((classes.size, classes.size)) self.buildErrorMatrix(errMatrix, classes, refCol, classCol) self.exportErrorMatrixAsASCII(cmdargs.outputFile, classes, errMatrix) if cmdargs.outputTexFile is not None: self.exportErrorMatrixAsTex(cmdargs.outputTexFile, classes, errMatrix)
def collapseClasses(inputFile): ratDataset = gdal.Open(inputFile, gdal.GA_Update) Red = rat.readColumn(ratDataset, "Red") Green = rat.readColumn(ratDataset, "Green") Blue = rat.readColumn(ratDataset, "Blue") # Water Red[0] = 135 Green[0] = 206 Blue[0] = 255 # Land Red[1] = 34 Green[1] = 139 Blue[1] = 34 rat.writeColumn(ratDataset, "Red", Red) rat.writeColumn(ratDataset, "Green", Green) rat.writeColumn(ratDataset, "Blue", Blue)
def testOutputSameFile(imgfile): # Now test the ratapplier inRats = ratapplier.RatAssociations() outRats = ratapplier.RatAssociations() controls = ratapplier.RatApplierControls() inRats.img = ratapplier.RatHandle(imgfile) outRats.img = inRats.img controls.setBlockLength(5) ratapplier.apply(myFunc, inRats, outRats, controls=controls) col = rat.readColumn(imgfile, 'Value') colSqrd = rat.readColumn(imgfile, 'sqrd') ok = True if (col**2 != colSqrd).any(): riostestutils.report(TESTNAME, "sqrd incorrect, in sameFile output") ok = False return ok
def testDifferentOutput(imgfile, imgfile2): makeTestFile(imgfile2, withRat=False) inRats = ratapplier.RatAssociations() outRats = ratapplier.RatAssociations() controls = ratapplier.RatApplierControls() inRats.img = ratapplier.RatHandle(imgfile) outRats.outimg = ratapplier.RatHandle(imgfile2) controls.setBlockLength(3) ratapplier.apply(myFuncDiffFile, inRats, outRats, controls=controls) col = rat.readColumn(imgfile, 'Value') colSqrd = rat.readColumn(imgfile2, 'sqrd') ok = True if (col**2 != colSqrd).any(): riostestutils.report(TESTNAME, "sqrd incorrect, in differentFile output") ok = False return ok
def readRATClassesColours(self, gdalRATInput, classNameCol): namesColoursList = list() try: ratDataset = gdal.Open(gdalRATInput, gdal.GA_ReadOnly) classNames = rat.readColumn(ratDataset, classNameCol) red = rat.readColumn(ratDataset, "Red") green = rat.readColumn(ratDataset, "Green") blue = rat.readColumn(ratDataset, "Blue") histo = rat.readColumn(ratDataset, "Histogram") for i in range(len(classNames)): if histo[i] > 0: classList = list() classList.append(classNames[i].strip()) classList.append(red[i]) classList.append(green[i]) classList.append(blue[i]) namesColoursList.append(classList) except Exception, e: print "Error: ", str(e) sys.exit()
def testReduceRat(imgfile, imgfile3): """ This test creates a new output image, with all odd pixel values replaced with the even number above it. The RAT must then be copied across with the same reduction performed. In this case, only the even numbered rows are written """ # First we copy the raster, with the reduction of pixel values infiles = applier.FilenameAssociations() outfiles = applier.FilenameAssociations() infiles.inimg = imgfile outfiles.outimg = imgfile3 # Make sure we use a format which actually supports RAT's controls = applier.ApplierControls() controls.setOutputDriverName('HFA') applier.apply(rasterReduceFunc, infiles, outfiles, controls=controls) # Now use ratapplier to reduce the RAT inRats = ratapplier.RatAssociations() outRats = ratapplier.RatAssociations() controls = ratapplier.RatApplierControls() inRats.img = ratapplier.RatHandle(imgfile) outRats.outimg = ratapplier.RatHandle(imgfile3) controls.setBlockLength(3) ratapplier.apply(ratReduceFunc, inRats, outRats, controls=controls) col = rat.readColumn(imgfile, 'Value') colEven = col[::2] colReduced = rat.readColumn(imgfile3, 'Value')[:len(colEven)] ok = True if (colEven != colReduced).any(): riostestutils.report( TESTNAME, "Reduced RAT incorrect: %s, %s" % (colEven, colReduced)) ok = False return ok
def testNewRat(imgfile4): makeTestFile(imgfile4, withRat=False) inRats = ratapplier.RatAssociations() outRats = ratapplier.RatAssociations() controls = ratapplier.RatApplierControls() controls.setRowCount(256) outRats.outimg = ratapplier.RatHandle(imgfile4) controls.setBlockLength(3) ratapplier.apply(myFuncNewRat, inRats, outRats, controls=controls) col = rat.readColumn(imgfile4, 'newCol') colIntended = numpy.arange(256, dtype=numpy.uint32) ok = (col == colIntended).all() if not ok: riostestutils.report(TESTNAME, "New RAT incorrect: %s, %s" % (col, colIntended)) return ok
def run(self, cmdargs): # Get variables from command line inputFilePath = cmdargs.inputFile.strip() selectColumn = cmdargs.column.strip() # Open the GDAL dataset ratDataset = gdal.Open(inputFilePath, gdal.GA_ReadOnly) # Check the GDAL dataset was correctly opened if ratDataset is None: print "The image dataset could not opened." sys.exit() # Read the two columns selectCol = rat.readColumn(ratDataset, selectColumn) # Find the unique class names classes = np.unique(selectCol) for className in classes: print className
def run(): """ Run tests of the rios.rat functions """ riostestutils.reportStart(TESTNAME) allOK = True imgfile = 'test.img' ratValues = makeTestFile(imgfile) nValues = len(ratValues) columnList = [("Int32", numpy.int32), ("Float32", numpy.float32), ("Unicode", numpy.dtype('U10'))] # Only test old string type for python 2 if sys.version_info.major < 3: columnList.append(("String", numpy.dtype('S10'))) allOK = True for (colName, arrayDtype) in columnList: # Write the array into the file, with the given datatype ratValues_type = ratValues.astype(arrayDtype) rat.writeColumn(imgfile, colName, ratValues_type) # Read it back, and check that the values are the same ratValues_fromFile = rat.readColumn(imgfile, colName)[:nValues].astype( ratValues.dtype) if not (ratValues_fromFile == ratValues).all(): riostestutils.report(TESTNAME, "Value mis-match for column %s" % (colName)) allOK = False if os.path.exists(imgfile): os.remove(imgfile) if allOK: riostestutils.report(TESTNAME, "Passed") return allOK
import sys import os import numpy as np from rios import rat from osgeo import gdal SegImg = 'Segmentation.kea' NewColName = 'Mask1' ####################################################### if not os.path.exists(SegImg): sys.exit('Error: Could not find the segmented image.') ratDataset = gdal.Open(SegImg, 1) # read the image in read-write mode. RefCol = rat.readColumn( ratDataset, 'Alpha') # read an existing RAT pythocolumn to get size. Ones = np.ones_like(RefCol, dtype='uint8') # create an array of ones. del RefCol Ones[0] = 0 # assign zero to the first clump because it contains no data. rat.writeColumn(ratDataset, NewColName, Ones) # write numpy array to RAT. del Ones, ratDataset print('Done.') ref = 'S1B_IW_GRDH_1SDV_20170423T165655_Sigma0_stack_lee_clumps2_erf_clumptrain_mode_snapped.tif' mask = 'S1B_IW_GRDH_1SDV_20170423T165655_Sigma0_stack_lee_clumps2_mean.kea' mask_snap = 'S1B_IW_GRDH_1SDV_20170423T165655_Sigma0_stack_lee_clumps2_mean_snap.kea' gdalFormat = 'KEA' rsgislib.imageutils.resampleImage2Match(ref,
def apply_sklearn_classifer(classTrainInfo, skClassifier, imgMask, imgMaskVal, imgFileInfo, outputImg, gdalformat, classClrNames=True): """ This function uses a trained classifier and applies it to the provided input image. :param classTrainInfo: dict (where the key is the class name) of rsgislib.classification.ClassSimpleInfoObj objects which will be used to train the classifier (i.e., train_sklearn_classifier()), provide pixel value id and RGB class values. :param skClassifier: a trained instance of a scikit-learn classifier (e.g., use train_sklearn_classifier or train_sklearn_classifer_gridsearch) :param imgMask: is an image file providing a mask to specify where should be classified. Simplest mask is all the valid data regions (rsgislib.imageutils.genValidMask) :param imgMaskVal: the pixel value within the imgMask to limit the region to which the classification is applied. Can be used to create a heirachical classification. :param imgFileInfo: a list of rsgislib.imageutils.ImageBandInfo objects (also used within rsgislib.imageutils.extractZoneImageBandValues2HDF) to identify which images and bands are to be used for the classification so it adheres to the training data. :param outputImg: output image file with the classification. Note. by default a colour table and class names column is added to the image. If an error is produced use HFA or KEA formats. :param gdalformat: is the output image format - all GDAL supported formats are supported. :param classClrNames: default is True and therefore a colour table will the colours specified in classTrainInfo and a ClassName column (from imgFileInfo) will be added to the output file. """ infiles = applier.FilenameAssociations() infiles.imageMask = imgMask numClassVars = 0 for imgFile in imgFileInfo: infiles.__dict__[imgFile.name] = imgFile.fileName numClassVars = numClassVars + len(imgFile.bands) outfiles = applier.FilenameAssociations() outfiles.outimage = outputImg otherargs = applier.OtherInputs() otherargs.classifier = skClassifier otherargs.mskVal = imgMaskVal otherargs.numClassVars = numClassVars otherargs.imgFileInfo = imgFileInfo try: import tqdm progress_bar = rsgislib.TQDMProgressBar() except: progress_bar = cuiprogress.GDALProgressBar() aControls = applier.ApplierControls() aControls.progress = progress_bar aControls.drivername = gdalformat aControls.omitPyramids = True aControls.calcStats = False # RIOS function to apply classifer def _applySKClassifier(info, inputs, outputs, otherargs): """ Internal function for rios applier. Used within applyClassifer. """ outClassVals = numpy.zeros_like(inputs.imageMask, dtype=numpy.uint32) if numpy.any(inputs.imageMask == otherargs.mskVal): outClassVals = outClassVals.flatten() imgMaskVals = inputs.imageMask.flatten() classVars = numpy.zeros( (outClassVals.shape[0], otherargs.numClassVars), dtype=numpy.float) # Array index which can be used to populate the output array following masking etc. ID = numpy.arange(imgMaskVals.shape[0]) classVarsIdx = 0 for imgFile in otherargs.imgFileInfo: imgArr = inputs.__dict__[imgFile.name] for band in imgFile.bands: classVars[..., classVarsIdx] = imgArr[(band - 1)].flatten() classVarsIdx = classVarsIdx + 1 classVars = classVars[imgMaskVals == otherargs.mskVal] ID = ID[imgMaskVals == otherargs.mskVal] predClass = otherargs.classifier.predict(classVars) outClassVals[ID] = predClass outClassVals = numpy.expand_dims(outClassVals.reshape( (inputs.imageMask.shape[1], inputs.imageMask.shape[2])), axis=0) outputs.outimage = outClassVals print("Applying the Classifier") applier.apply(_applySKClassifier, infiles, outfiles, otherargs, controls=aControls) print("Completed") rsgislib.rastergis.populateStats(clumps=outputImg, addclrtab=True, calcpyramids=True, ignorezero=True) if classClrNames: ratDataset = gdal.Open(outputImg, gdal.GA_Update) red = rat.readColumn(ratDataset, 'Red') green = rat.readColumn(ratDataset, 'Green') blue = rat.readColumn(ratDataset, 'Blue') ClassName = numpy.empty_like(red, dtype=numpy.dtype('a255')) for classKey in classTrainInfo: print("Apply Colour to class \'" + classKey + "\'") red[classTrainInfo[classKey].id] = classTrainInfo[classKey].red green[classTrainInfo[classKey].id] = classTrainInfo[classKey].green blue[classTrainInfo[classKey].id] = classTrainInfo[classKey].blue ClassName[classTrainInfo[classKey].id] = classKey rat.writeColumn(ratDataset, "Red", red) rat.writeColumn(ratDataset, "Green", green) rat.writeColumn(ratDataset, "Blue", blue) rat.writeColumn(ratDataset, "ClassName", ClassName) ratDataset = None
# Open RAT inRatFile = clumps ratDataset = gdal.Open(clumps, gdal.GA_Update) # Set column names x_col_names = ['VVAvg', 'VHAvg', 'VVdivVHAvg', 'VVStd', 'VHStd', 'VVdivVHStd'] # x_col_names = ['VVAvg','VHAvg', 'VVStd','VHStd'] y_col_name = 'ClassInt' # Set up list to hold data X = [] # Read in data from each column print('read data') for colName in x_col_names: X.append(rat.readColumn(ratDataset, colName)) # Read in training data print('read training data') y = rat.readColumn(ratDataset, y_col_name) # Set NA values to 0 y = np.where(y == b'NA', 0, y) y = y.astype(np.int16) X.append(y) X = np.array(X) X = X.transpose() # Remove rows with 0 (NA) for wetCode X_train = X[X[:, -1] != 0]
outColNum = cHorizonFields[outColName.strip()] # JOIN ATTRIBUTES FROM TEXT FILE print('Adding ' + outColName + ' (column ' + str(outColNum) + ') to RAT') # Open SSURGO text files componentFileName = os.path.join(inDIRName, 'tabular', 'comp.txt') chorizonFileName = os.path.join(inDIRName, 'tabular', 'chorizon.txt') componentFile = open(componentFileName, 'rU') chorizonFile = open(chorizonFileName,'rU') componentTxt = csv.reader(componentFile, delimiter='|') chorizonTxt = csv.reader(chorizonFile, delimiter='|') # Get mukey column from input file mukeyCol = rat.readColumn(outKEAFile, 'mukey') # Set up blank columns for output (one for each layer) outColH1 = numpy.zeros_like(mukeyCol) outColH2 = numpy.zeros_like(mukeyCol) outColH3 = numpy.zeros_like(mukeyCol) outColH4 = numpy.zeros_like(mukeyCol) outColH5 = numpy.zeros_like(mukeyCol) outColH6 = numpy.zeros_like(mukeyCol) # Set columns for mukey and cokey in componentTxt compMUKEYCol = 107 compCOKEYCol = 108 chorizonCOKEYCol = 169 chorizonHZNAMECol = 0
def collapseClasses(inputFile, lcdbColName, outputColName): ratDataset = gdal.Open(inputFile, gdal.GA_Update) lcdbCol = rat.readColumn(ratDataset, lcdbColName) outClassesCol = np.zeros_like(lcdbCol) # 0 Undefined UNDEFINED = 0 # 1 High Producing Exotic Herbaceous HIGH_PRODUCING_EXOTIC_HERBACEOUS = 1 # 2 Tall Tussock Grassland TALL_TUSSOCK_GRASSLAND = 2 # 3 Other Herbaceous OTHER_HERBACEOUS = 3 # 4 Scrub SCRUB = 4 # 5 Indigenous Forest INDIGENOUS_FOREST = 5 # 6 Exotic Forest EXOTIC_FOREST = 6 # 7 Other Woody OTHER_WOODY = 7 # 8 Sub Alpine Scrubland SUB_ALPINE_SCRUBLAND = 8 # 9 Built Up BUILT_UP = 9 # 10 Bare Ground BARE_GROUND = 10 # 11 Water WATER = 11 # 12 Perminant Snow and Ice PERMINANT_SNOW_ICE = 12 # Undefined -> UNDEFINED outClassesCol = np.where(lcdbCol == 0, UNDEFINED, outClassesCol) # Built-up Area (settlement) -> BUILT_UP outClassesCol = np.where(lcdbCol == 1, BUILT_UP, outClassesCol) # Urban Parkland/Open Space -> OTHER_HERBACEOUS outClassesCol = np.where(lcdbCol == 2, OTHER_HERBACEOUS, outClassesCol) # Transport Infrastructure -> BUILT_UP outClassesCol = np.where(lcdbCol == 5, BUILT_UP, outClassesCol) # Surface Mines and Dumps -> BARE_GROUND outClassesCol = np.where(lcdbCol == 6, BARE_GROUND, outClassesCol) # Coastal Sand and Gravel -> BARE_GROUND outClassesCol = np.where(lcdbCol == 10, BARE_GROUND, outClassesCol) # River and Lakeshore Gravel and Rock -> BARE_GROUND outClassesCol = np.where(lcdbCol == 11, BARE_GROUND, outClassesCol) # Landslide -> BARE_GROUND outClassesCol = np.where(lcdbCol == 12, BARE_GROUND, outClassesCol) # Alpine Gravel and Rock -> BARE_GROUND outClassesCol = np.where(lcdbCol == 13, BARE_GROUND, outClassesCol) # Permanent Snow and Ice -> PERMINANT_SNOW_ICE outClassesCol = np.where(lcdbCol == 14, PERMINANT_SNOW_ICE, outClassesCol) # Alpine Grass/Herbfield -> OTHER_HERBACEOUS outClassesCol = np.where(lcdbCol == 15, OTHER_HERBACEOUS, outClassesCol) # Lake and Pond -> WATER outClassesCol = np.where(lcdbCol == 20, WATER, outClassesCol) # River -> WATER outClassesCol = np.where(lcdbCol == 21, WATER, outClassesCol) # Estuarine Open Water -> WATER outClassesCol = np.where(lcdbCol == 22, WATER, outClassesCol) # Short-rotation Cropland -> HIGH_PRODUCING_EXOTIC_HERBACEOUS outClassesCol = np.where(lcdbCol == 30, HIGH_PRODUCING_EXOTIC_HERBACEOUS, outClassesCol) # Cultivation -> BARE_GROUND outClassesCol = np.where(lcdbCol == 31, BARE_GROUND, outClassesCol) # Orchard Vineyard & Other Perennial Crops -> HIGH_PRODUCING_EXOTIC_HERBACEOUS outClassesCol = np.where(lcdbCol == 33, HIGH_PRODUCING_EXOTIC_HERBACEOUS, outClassesCol) # High Producing Exotic Grassland -> HIGH_PRODUCING_EXOTIC_HERBACEOUS outClassesCol = np.where(lcdbCol == 40, HIGH_PRODUCING_EXOTIC_HERBACEOUS, outClassesCol) # Low Producing Grassland -> OTHER_HERBACEOUS outClassesCol = np.where(lcdbCol == 41, OTHER_HERBACEOUS, outClassesCol) # Tall Tussock Grassland -> TALL_TUSSOCK_GRASSLAND outClassesCol = np.where(lcdbCol == 43, TALL_TUSSOCK_GRASSLAND, outClassesCol) # Depleted Grassland -> OTHER_HERBACEOUS outClassesCol = np.where(lcdbCol == 44, OTHER_HERBACEOUS, outClassesCol) # Herbaceous Freshwater Vegetation -> OTHER_HERBACEOUS outClassesCol = np.where(lcdbCol == 45, OTHER_HERBACEOUS, outClassesCol) # Herbaceous Saline Vegetation -> OTHER_HERBACEOUS outClassesCol = np.where(lcdbCol == 46, OTHER_HERBACEOUS, outClassesCol) # Flaxland -> OTHER_HERBACEOUS outClassesCol = np.where(lcdbCol == 47, OTHER_HERBACEOUS, outClassesCol) # Fernland -> OTHER_HERBACEOUS outClassesCol = np.where(lcdbCol == 50, OTHER_HERBACEOUS, outClassesCol) # Gorse and/or Broom -> SCRUB outClassesCol = np.where(lcdbCol == 51, SCRUB, outClassesCol) # Manuka and/or Kanuka -> SCRUB outClassesCol = np.where(lcdbCol == 52, SCRUB, outClassesCol) # Broadleaved Indigenous Hardwoods -> INDIGENOUS_FOREST outClassesCol = np.where(lcdbCol == 54, INDIGENOUS_FOREST, outClassesCol) # Sub Alpine Shrubland -> SUB_ALPINE_SCRUBLAND outClassesCol = np.where(lcdbCol == 55, SUB_ALPINE_SCRUBLAND, outClassesCol) # Mixed Exotic Shrubland -> SCRUB outClassesCol = np.where(lcdbCol == 56, SCRUB, outClassesCol) # Matagouri or Grey Scrub -> SCRUB outClassesCol = np.where(lcdbCol == 58, SCRUB, outClassesCol) # Forest - Harvested -> BARE_GROUND outClassesCol = np.where(lcdbCol == 64, BARE_GROUND, outClassesCol) # Deciduous Hardwoods -> OTHER_WOODY outClassesCol = np.where(lcdbCol == 68, OTHER_WOODY, outClassesCol) # Indigenous Forest -> INDIGENOUS_FOREST outClassesCol = np.where(lcdbCol == 69, INDIGENOUS_FOREST, outClassesCol) # Mangroves -> OTHER_WOODY outClassesCol = np.where(lcdbCol == 70, OTHER_WOODY, outClassesCol) # Exotic Forest -> EXOTIC_FOREST outClassesCol = np.where(lcdbCol == 71, EXOTIC_FOREST, outClassesCol) rat.writeColumn(ratDataset, outputColName, outClassesCol)
# print('clumps: ' + clumps) print('') ratutils.populateImageStats(sel, clumps, calcMax=True, calcMean=True, calcMin=True) # add SEL statistics to RAT ratutils.populateImageStats(guf, clumps, calcMax=True) # add SEL statistics to RAT #ratutils.populateImageStats(waterPerm,clumps,calcMean=True) # add water permanance statistics to RAT # Open RAT ratDataset = gdal.Open(clumps, gdal.GA_Update) data = [] # Read in data from class_cert and sel columns data.append(rat.readColumn(ratDataset, 'OutClass_mode_cert')) data.append(rat.readColumn(ratDataset, 'SELMax')) data.append(rat.readColumn(ratDataset, 'gufMax')) mode_cert = data[0] sel_d = data[1] guf_d = data[2] mode_cert_sel = mode_cert #where statement to make sel > 60 objects 'other' mode_cert_sel[numpy.where((mode_cert_sel == 1) & (sel_d >= 60))] = 2 mode_cert_sel[numpy.where((mode_cert_sel == 3) & (guf_d > 0))] = 2 names = [] for i in mode_cert_sel: if i == 1:
i, colcount - 2, int(max(np.count_nonzero(band == i, axis=0)))) # max length/height segment_ds.SetValueAsInt( i, colcount - 1, int( max(np.count_nonzero(band == i, axis=0)) / max(np.count_nonzero(band == i, axis=1)) * 100)) # length/width ratio # ------------------------------------------------- # # Hierarchical Classification print("Hierarchical Classification...") # Reading in segment RAT NDWIAvg = rat.readColumn(segment_rat, "NDWIAvg") SWIRratioAvg = rat.readColumn(segment_rat, "SWIRratioAvg") RVIAvg = rat.readColumn(segment_rat, "RVIAvg") # Creating and population parent class column based on rules segment_ds.CreateColumn('p_class', gdal.GFT_Integer, gdal.GFU_Generic) for i in range(int(segment_ds.GetRowCount( ))): # iterate down rows for number of rows in input stack if segment_ds.GetValueAsInt( i, (segment_ds.GetColumnCount() - 1)) == 0: # if no class (0) in p_class column if NDWIAvg[i] > 0.6: segment_ds.SetValueAsInt(i, (segment_ds.GetColumnCount() - 1), 1) # water elif SWIRratioAvg[i] < 0.4:
#!/usr/bin/env python import sys from rios import rat fname = sys.argv[1] print rat.readColumn(fname, "floatstuff")
def classifyWithinRATTiled(clumpsImg, classesIntCol, classesNameCol, variables, classifier=RandomForestClassifier(n_estimators=100, max_features=3, oob_score=True, n_jobs=-1), outColInt="OutClass", outColStr="OutClassName", roiCol=None, roiVal=1, classColours=None, scaleVarsRange=False, justFit=False): """ A function which will perform a classification within the RAT using a classifier from scikit-learn using the rios ratapplier interface allowing very large RATs to be processed. :param clumpsImg: is the clumps image on which the classification is to be performed :param classesIntCol: is the column with the training data as int values :param classesNameCol: is the column with the training data as string class names :param variables: is an array of column names which are to be used for the classification :param classifier: is an instance of a scikit-learn classifier (e.g., RandomForests which is Default) :param outColInt: is the output column name for the int class representation (Default: 'OutClass') :param outColStr: is the output column name for the class names column (Default: 'OutClassName') :param roiCol: is a column name for a column which specifies the region to be classified. If None ignored (Default: None) :param roiVal: is a int value used within the roiCol to select a region to be classified (Default: 1) :param classColours: is a python dict using the class name as the key along with arrays of length 3 specifying the RGB colours for the class. :param scaleVarsRange: will rescale each variable independently to a range of 0-1 (default: False). :param justFit: is a boolean specifying that the classifier should just be fitted to the data and not applied (Default: False; i.e., apply classification) Example:: from sklearn.ensemble import ExtraTreesClassifier from rsgislib.classification import classratutils classifier = ExtraTreesClassifier(n_estimators=100, max_features=3, n_jobs=-1, verbose=0) classColours = dict() classColours['Forest'] = [0,138,0] classColours['NonForest'] = [200,200,200] variables = ['GreenAvg', 'RedAvg', 'NIR1Avg', 'NIR2Avg', 'NDVI'] classifyWithinRATTiled(clumpsImg, classesIntCol, classesNameCol, variables, classifier=classifier, classColours=classColours) # With using range scaling. classifyWithinRATTiled(clumpsImg, classesIntCol, classesNameCol, variables, classifier=classifier, classColours=classColours, scaleVarsRange=True) """ # Check gdal is available if not haveGDALPy: raise Exception( "The GDAL python bindings required for this function could not be imported\n\t" + gdalErr) # Check numpy is available if not haveNumpy: raise Exception( "The numpy module is required for this function could not be imported\n\t" + numErr) # Check rios rat is available if not haveRIOSRat: raise Exception( "The RIOS rat tools are required for this function could not be imported\n\t" + riosRatErr) # Check scikit-learn RF is available if not haveSKLearnRF: raise Exception( "The scikit-learn random forests tools are required for this function could not be imported\n\t" + sklearnRFErr) # Check scikit-learn pre-processing is available if not haveSKLearnPreProcess: raise Exception( "The scikit-learn pre-processing tools are required for this function could not be imported\n\t" + sklearnPreProcessErr) ratDataset = gdal.Open(clumpsImg, gdal.GA_Update) # Read in training classes classesInt = rat.readColumn(ratDataset, classesIntCol) classesStr = rat.readColumn(ratDataset, classesNameCol) ratDataset = None validClassStr = classesStr[classesInt > 0] validClassInt = classesInt[classesInt > 0] #print(validClassInt.shape) classNames = numpy.unique(validClassStr) classes = numpy.zeros_like(classNames, dtype=numpy.int16) i = 0 classNameIDs = dict() for className in classNames: classNameStr = str(className.decode()) if not classNameStr is '': #print(validClassInt[validClassStr == className]) classes[i] = validClassInt[validClassStr == className][0] classNameIDs[classNameStr] = classes[i] #print("Class \'" + classNameStr + "\' has numerical " + str(classes[i])) i = i + 1 trainLen = validClassInt.shape[0] numVars = len(variables) #print("Create numpy {} x {} array for training".format(trainLen, numVars)) trainData = numpy.zeros((trainLen, numVars), numpy.float64) in_rats = ratapplier.RatAssociations() out_rats = ratapplier.RatAssociations() in_rats.inrat = ratapplier.RatHandle(clumpsImg) otherargs = ratapplier.OtherArguments() otherargs.vars = variables otherargs.classIntCol = classesIntCol otherargs.trainData = trainData otherargs.trainDataOff = 0 print("Extract Training Data") ratapplier.apply(_extractTrainDataFromRAT, in_rats, out_rats, otherargs=otherargs, controls=None) print("100%") validClassInt = validClassInt[numpy.isfinite(trainData).all(axis=1)] validClassStr = validClassStr[numpy.isfinite(trainData).all(axis=1)] trainData = trainData[numpy.isfinite(trainData).all(axis=1)] print("Training data size: {} x {}".format(trainData.shape[0], trainData.shape[1])) print('Training Classifier') classifier.fit(trainData, validClassInt) print("Completed") print('Calc Classifier Accuracy') accVal = classifier.score(trainData, validClassInt) print('Classifier Score = {}'.format(round(accVal * 100, 2))) if not justFit: print("Apply Classifier") in_rats = ratapplier.RatAssociations() out_rats = ratapplier.RatAssociations() in_rats.inrat = ratapplier.RatHandle(clumpsImg) out_rats.outrat = ratapplier.RatHandle(clumpsImg) otherargs = ratapplier.OtherArguments() otherargs.vars = variables otherargs.classifier = classifier otherargs.outColInt = outColInt otherargs.outColStr = outColStr otherargs.roiCol = roiCol otherargs.roiVal = roiVal otherargs.classColours = classColours otherargs.classNameIDs = classNameIDs ratapplier.apply(_applyClassifier, in_rats, out_rats, otherargs=otherargs, controls=None) print("100%")
# Open RAT inRatFile = outputClumps ratDataset = gdal.Open(inRatFile, gdal.GA_Update) # define column names for output classifications runs=numpy.arange(1,51) x_col_names = [] for i in runs: # define output class column col_name='OutClass_'+str(i) x_col_names.append(col_name) X=[] # Read in data from each column for colName in x_col_names: X.append(rat.readColumn(ratDataset, colName)) mode = stats.mode(X) mode=numpy.asarray(mode[0][0]) rios.rat.writeColumn(outputClumps, 'OutClass_mode', mode, colType=gdal.GFT_Integer) # calc certainty from mode and count of mode X_arr=numpy.asarray(X) x_count=[] x_percent=[] for i, m in zip((range(X_arr.shape[1])),mode): b=X_arr[:,i] count=numpy.count_nonzero(b==m) x_percent.append(count/X_arr.shape[0]) x_percent=numpy.asarray(x_percent)
def clusterWithinRAT(clumpsImg, variables, clusterer=MiniBatchKMeans(n_clusters=8, init='k-means++', max_iter=100, batch_size=100), outColInt="OutCluster", roiCol=None, roiVal=1, clrClusters=True, clrSeed=10, addConnectivity=False, preProcessor=None): """ A function which will perform a clustering within the RAT using a clustering algorithm from scikit-learn :param clumpsImg: is the clumps image on which the classification is to be performed. :param variables: is an array of column names which are to be used for the clustering. :param clusterer: is an instance of a scikit-learn clusterer (e.g., MiniBatchKMeans which is Default; Note with 8 clusters). :param outColInt: is the output column name identifying the clusters (Default: 'OutCluster'). :param roiCol: is a column name for a column which specifies the region to be clustered. If None ignored (Default: None). :param roiVal: is a int value used within the roiCol to select a region to be clustered (Default: 1). :param clrClusters: is a boolean specifying whether the colour table should be updated to correspond to the clusters (Default: True). :param clrSeed: is an integer seeding the random generator used to generate the colours (Default=10; if None provided system time used). :param addConnectivity: is a boolean which adds a kneighbors_graph to the clusterer (just an option for the AgglomerativeClustering algorithm) :param preProcessor: is a scikit-learn processors such as sklearn.preprocessing.MaxAbsScaler() which can rescale the input variables independently as read in (Define: None; i.e., not in use). Example:: from rsgislib.classification import classratutils from sklearn.cluster import DBSCAN sklearnClusterer = DBSCAN(eps=1, min_samples=50) classratutils.clusterWithinRAT('MangroveClumps.kea', ['MinX', 'MinY'], clusterer=sklearnClusterer, outColInt="OutCluster", roiCol=None, roiVal=1, clrClusters=True, clrSeed=10, addConnectivity=False) # With pre-processor from sklearn.preprocessing import MaxAbsScaler classratutils.clusterWithinRAT('MangroveClumps.kea', ['MinX', 'MinY'], clusterer=sklearnClusterer, outColInt="OutCluster", roiCol=None, roiVal=1, clrClusters=True, clrSeed=10, addConnectivity=False, preProcessor=MaxAbsScaler()) """ # Check gdal is available if not haveGDALPy: raise Exception( "The GDAL python bindings required for this function could not be imported\n\t" + gdalErr) # Check numpy is available if not haveNumpy: raise Exception( "The numpy module is required for this function could not be imported\n\t" + numErr) # Check rios rat is available if not haveRIOSRat: raise Exception( "The RIOS rat tools are required for this function could not be imported\n\t" + riosRatErr) # Check scikit-learn RF is available if not haveSKLearnKM: raise Exception( "The scikit-learn Mini Batch KMeans tools are required for this function could not be imported\n\t" + sklearnMBKMErr) # Check scikit-learn pre-processing is available if not haveSKLearnPreProcess: raise Exception( "The scikit-learn pre-processing tools are required for this function could not be imported\n\t" + sklearnPreProcessErr) ratDataset = gdal.Open(clumpsImg, gdal.GA_Update) Histogram = rat.readColumn(ratDataset, 'Histogram') numpyVars = [] for var in variables: print("Reading " + var) tmpArr = rat.readColumn(ratDataset, var) if not preProcessor is None: tmpArr = tmpArr.reshape(-1, 1) tmpArr = preProcessor.fit_transform(tmpArr) tmpArr = tmpArr.reshape(-1) numpyVars.append(tmpArr) roi = None if not ((roiCol == None) or (roiCol == "")): roi = rat.readColumn(ratDataset, roiCol) # Set up output array outLabels = numpy.zeros_like(Histogram, dtype=numpy.int16) ID = numpy.arange(outLabels.shape[0]) xData = numpy.array(numpyVars) xData = xData.transpose() ID = ID[numpy.isfinite(xData).all(axis=1)] if not roi is None: roi = roi[numpy.isfinite(xData).all(axis=1)] xData = xData[numpy.isfinite(xData).all(axis=1)] if not roi is None: xData = xData[roi == roiVal] ID = ID[roi == roiVal] print("Input Data Size: {} x {}".format(xData.shape[0], xData.shape[1])) if addConnectivity: from sklearn.neighbors import kneighbors_graph inConnectivity = kneighbors_graph(xData, n_neighbors=10, include_self=False) clusterer.set_params(**{'connectivity': inConnectivity}) print('Fit Clusterer') outClust = clusterer.fit_predict(xData) minClusterID = numpy.min(outClust) if minClusterID <= 0: minOff = 1 - minClusterID outClust = outClust + minOff outLabels[ID] = outClust print("Writing Columns") rat.writeColumn(ratDataset, outColInt, outLabels) print("Create and Write Output Class Names") clustersIDs = numpy.unique(outClust) if clrClusters: import random random.seed(clrSeed) print("Set Colours") red = rat.readColumn(ratDataset, "Red") green = rat.readColumn(ratDataset, "Green") blue = rat.readColumn(ratDataset, "Blue") # Set Background to black red[...] = 0 green[...] = 0 blue[...] = 0 # Set colours for clusterID in clustersIDs: print("Colouring cluster: " + str(clusterID)) red = numpy.where(outLabels == clusterID, random.randint(0, 255), red) green = numpy.where(outLabels == clusterID, random.randint(0, 255), green) blue = numpy.where(outLabels == clusterID, random.randint(0, 255), blue) rat.writeColumn(ratDataset, "Red", red) rat.writeColumn(ratDataset, "Green", green) rat.writeColumn(ratDataset, "Blue", blue) ratDataset = None
outColNum = cHorizonFields[outColName.strip()] # JOIN ATTRIBUTES FROM TEXT FILE print('Adding ' + outColName + ' (column ' + str(outColNum) + ') to RAT') # Open SSURGO text files componentFileName = os.path.join(inDIRName, 'tabular','comp.txt') chorizonFileName = os.path.join(inDIRName, 'tabular','chorizon.txt') componentFile = open(componentFileName,'rU') chorizonFile = open(chorizonFileName,'rU') componentTxt = csv.reader(componentFile,delimiter='|') chorizonTxt = csv.reader(chorizonFile,delimiter='|') # Get mukey column from input file mukeyCol = rat.readColumn(outKEAFile, 'mukey') # Set up blank columns for output (one for each layer) outColH1 = numpy.zeros_like(mukeyCol) outColH2 = numpy.zeros_like(mukeyCol) outColH3 = numpy.zeros_like(mukeyCol) outColH4 = numpy.zeros_like(mukeyCol) outColH5 = numpy.zeros_like(mukeyCol) outColH6 = numpy.zeros_like(mukeyCol) # Set columns for mukey and cokey in componentTxt compMUKEYCol = 107 compCOKEYCol = 108 chorizonCOKEYCol = 169 chorizonHZNAMECol = 0
def findClassifierParameters(clumpsImg, classesIntCol, variables, preProcessor=None, gridSearch=GridSearchCV(RandomForestClassifier(), {})): """ Find the optimal parameters for a classifier using a grid search and return a classifier instance with those optimal parameters. :param clumpsImg: is the clumps image on which the classification is to be performed :param classesIntCol: is the column with the training data as int values :param variables: is an array of column names which are to be used for the classification :param preProcessor: is a scikit-learn processors such as sklearn.preprocessing.MaxAbsScaler() which can rescale the input variables independently as read in (Define: None; i.e., not in use). :param gridSearch: is an instance of GridSearchCV parameterised with a classifier and parameters to be searched. :return: Instance of the classifier with optimal parameters defined. Example:: from rsgislib.classification import classratutils from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV from sklearn.preprocessing import MaxAbsScaler clumpsImg = "./LS8_20150621_lat10lon652_r67p233_clumps.kea" classesIntCol = 'ClassInt' classParameters = {'kernel':['linear', 'rbf', 'poly', 'sigmoid'], 'C':[1, 2, 3, 4, 5, 10, 100, 400, 500, 1e3, 5e3, 1e4, 5e4, 1e5], 'gamma':[0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 'auto'], 'degree':[2, 3, 4, 5, 6, 7, 8], 'class_weight':['', 'balanced'], 'decision_function_shape':['ovo', 'ovr', None]} variables = ['BlueRefl', 'GreenRefl', 'RedRefl', 'NIRRefl', 'SWIR1Refl', 'SWIR2Refl'] gSearch = GridSearchCV(SVC(), classParameters) classifier = classratutils.findClassifierParameters(clumpsImg, classesIntCol, variables, preProcessor=MaxAbsScaler(), gridSearch=gSearch) """ # Check gdal is available if not haveGDALPy: raise Exception( "The GDAL python bindings required for this function could not be imported\n\t" + gdalErr) # Check numpy is available if not haveNumpy: raise Exception( "The numpy module is required for this function could not be imported\n\t" + numErr) # Check rios rat is available if not haveRIOSRat: raise Exception( "The RIOS rat tools are required for this function could not be imported\n\t" + riosRatErr) # Check scikit-learn pre-processing is available if not haveSKLearnPreProcess: raise Exception( "The scikit-learn pre-processing tools are required for this function could not be imported\n\t" + sklearnPreProcessErr) # Check scikit-learn Grid Search is available if not haveSKLearnGS: raise Exception( "The scikit-learn grid search tools are required for this function could not be imported\n\t" + sklearnGSErr) ratDataset = gdal.Open(clumpsImg, gdal.GA_Update) numpyVars = [] for var in variables: print("Reading " + var) tmpArr = rat.readColumn(ratDataset, var) if not preProcessor is None: tmpArr = tmpArr.reshape(-1, 1) tmpArr = preProcessor.fit_transform(tmpArr) tmpArr = tmpArr.reshape(-1) numpyVars.append(tmpArr) # Read in training classes classesInt = rat.readColumn(ratDataset, classesIntCol) xData = numpy.array(numpyVars) xData = xData.transpose() xData = numpy.where(numpy.isfinite(xData), xData, 0) print("Input data size: {} x {}".format(xData.shape[0], xData.shape[1])) trainingData = xData[numpy.isfinite(xData).all(axis=1)] classesInt = classesInt[numpy.isfinite(xData).all(axis=1)] trainingData = trainingData[classesInt > 0] classesInt = classesInt[classesInt > 0] print("Training data size: {} x {}".format(trainingData.shape[0], trainingData.shape[1])) print("Training data IDs size: {}".format(classesInt.shape[0])) classIDs = numpy.unique(classesInt) print(classIDs) for id in classIDs: print("Class {} has {} samples.".format( id, classesInt[classesInt == id].shape[0])) gridSearch.fit(trainingData, classesInt) if not gridSearch.refit: raise Exception("Grid Search did no find a fit therefore failed...") print("Best score was {} and has parameters {}.".format( gridSearch.best_score_, gridSearch.best_params_)) return gridSearch.best_estimator_
def apply_keras_pixel_classifier(classTrainInfo, keras_cls_mdl, imgMask, imgMaskVal, imgFileInfo, outClassImg, gdalformat, pred_batch_size=32, classClrNames=True): """ This function applies a trained single pixel keras model to an image. The function train_keras_pixel_classifer can be used to train such as model. The output image will contain the hard membership of the predicted class. :param classTrainInfo: dict (where the key is the class name) of rsgislib.classification.ClassInfoObj objects which will be used to train the classifier (i.e., train_keras_pixel_classifer()), provide pixel value id and RGB class values. :param keras_cls_mdl: a trained keras model object, with a input dimensions equivlent to the number of image bands specified in the imgFileInfo input and output layer which provides an output array of the length of the number of classes. :param imgMask: is an image file providing a mask to specify where should be classified. Simplest mask is all the valid data regions (rsgislib.imageutils.genValidMask) :param imgMaskVal: the pixel value within the imgMask to limit the region to which the classification is applied. Can be used to create a heirachical classification. :param imgFileInfo: a list of rsgislib.imageutils.ImageBandInfo objects (also used within rsgislib.imageutils.extractZoneImageBandValues2HDF) to identify which images and bands are to be used for the classification so it adheres to the training data. :param outClassImg: Output image which will contain the hard classification. :param gdalformat: is the output image format - all GDAL supported formats are supported. :param pred_batch_size: the batch size used for the classification. :param classClrNames: default is True and therefore a colour table will the colours specified in ClassInfoObj and a ClassName (from classTrainInfo) column will be added to the output file. """ def _applyKerasPxlClassifier(info, inputs, outputs, otherargs): outClassIdVals = numpy.zeros_like(inputs.imageMask, dtype=numpy.uint16) if numpy.any(inputs.imageMask == otherargs.mskVal): n_pxls = inputs.imageMask.shape[1] * inputs.imageMask.shape[2] outClassIdVals = outClassIdVals.flatten() imgMaskVals = inputs.imageMask.flatten() classVars = numpy.zeros((n_pxls, otherargs.numClassVars), dtype=numpy.float) # Array index which can be used to populate the output array following masking etc. ID = numpy.arange(imgMaskVals.shape[0]) classVarsIdx = 0 for imgFile in otherargs.imgFileInfo: imgArr = inputs.__dict__[imgFile.name] for band in imgFile.bands: classVars[..., classVarsIdx] = imgArr[(band - 1)].flatten() classVarsIdx = classVarsIdx + 1 classVars = classVars[imgMaskVals == otherargs.mskVal] ID = ID[imgMaskVals == otherargs.mskVal] preds_idxs = numpy.argmax(otherargs.classifier.predict( classVars, batch_size=otherargs.pred_batch_size), axis=1) preds_cls_ids = numpy.zeros_like(preds_idxs, dtype=numpy.uint16) for cld_id, idx in zip(otherargs.cls_id_lut, numpy.arange(0, len(otherargs.cls_id_lut))): preds_cls_ids[preds_idxs == idx] = cld_id outClassIdVals[ID] = preds_cls_ids outClassIdVals = numpy.expand_dims(outClassIdVals.reshape( (inputs.imageMask.shape[1], inputs.imageMask.shape[2])), axis=0) outputs.outclsimage = outClassIdVals infiles = applier.FilenameAssociations() infiles.imageMask = imgMask numClassVars = 0 for imgFile in imgFileInfo: infiles.__dict__[imgFile.name] = imgFile.fileName numClassVars = numClassVars + len(imgFile.bands) n_classes = len(classTrainInfo) cls_id_lut = numpy.zeros(n_classes) for clsname in classTrainInfo: if classTrainInfo[clsname].id >= n_classes: raise ( "ClassInfoObj '{}' id ({}) is not consecutive starting from 0." .format(clsname, classTrainInfo[clsname].id)) cls_id_lut[classTrainInfo[clsname].id] = classTrainInfo[clsname].out_id outfiles = applier.FilenameAssociations() outfiles.outclsimage = outClassImg otherargs = applier.OtherInputs() otherargs.classifier = keras_cls_mdl otherargs.pred_batch_size = pred_batch_size otherargs.mskVal = imgMaskVal otherargs.numClassVars = numClassVars otherargs.imgFileInfo = imgFileInfo otherargs.n_classes = n_classes otherargs.cls_id_lut = cls_id_lut try: import tqdm progress_bar = rsgislib.TQDMProgressBar() except: progress_bar = cuiprogress.GDALProgressBar() aControls = applier.ApplierControls() aControls.progress = progress_bar aControls.drivername = gdalformat aControls.omitPyramids = True aControls.calcStats = False print("Applying the Classifier") applier.apply(_applyKerasPxlClassifier, infiles, outfiles, otherargs, controls=aControls) print("Completed Classification") if classClrNames: rsgislib.rastergis.populateStats(outClassImg, addclrtab=True, calcpyramids=True, ignorezero=True) ratDataset = gdal.Open(outClassImg, gdal.GA_Update) red = rat.readColumn(ratDataset, 'Red') green = rat.readColumn(ratDataset, 'Green') blue = rat.readColumn(ratDataset, 'Blue') ClassName = numpy.empty_like(red, dtype=numpy.dtype('a255')) ClassName[...] = "" for classKey in classTrainInfo: print("Apply Colour to class \'" + classKey + "\'") red[classTrainInfo[classKey].out_id] = classTrainInfo[classKey].red green[classTrainInfo[classKey]. out_id] = classTrainInfo[classKey].green blue[classTrainInfo[classKey]. out_id] = classTrainInfo[classKey].blue ClassName[classTrainInfo[classKey].out_id] = classKey rat.writeColumn(ratDataset, "Red", red) rat.writeColumn(ratDataset, "Green", green) rat.writeColumn(ratDataset, "Blue", blue) rat.writeColumn(ratDataset, "ClassName", ClassName) ratDataset = None
def balanceSampleTrainingRandom(clumpsImg, trainCol, outTrainCol, minNoSamples, maxNoSamples): """ A function to balance the number of training samples for classification so the number is above a minimum threshold (minNoSamples) and all equal to the class with the smallest number of samples unless that is above a set maximum (maxNoSamples). :param clumpsImg: is a string with the file path to the input image with RAT :param trainCol: is a string for the name of the input column specifying the training samples (zero is no data) :param outTrainCol: is a string with the name of the outputted training samples. :param minNoSamples: is an int specifying the minimum number of training samples for a class (if below threshold class is removed). :param maxNoSamples: is an int specifiying the maximum number of training samples per class. """ # Check gdal is available if not haveGDALPy: raise Exception( "The GDAL python bindings required for this function could not be imported\n\t" + gdalErr) # Check numpy is available if not haveNumpy: raise Exception( "The numpy module is required for this function could not be imported\n\t" + numErr) # Check rios rat is available if not haveRIOSRat: raise Exception( "The RIOS rat tools are required for this function could not be imported\n\t" + riosRatErr) ratDataset = gdal.Open(clumpsImg, gdal.GA_Update) trainColVals = rat.readColumn(ratDataset, trainCol) trainColOutVals = numpy.zeros_like(trainColVals) classIDs = numpy.unique(trainColVals) classIDs = classIDs[classIDs != 0] numSampPerClass = [] print("Number of input samples:") for id in classIDs: numVals = trainColVals[trainColVals == id].shape[0] print("\tClass {} has {} samples.".format(id, numVals)) numSampPerClass.append(numVals) minNumSamples = 0 first = True for i in range(len(numSampPerClass)): if numSampPerClass[i] < minNoSamples: trainColOutVals[trainColVals == classIDs[i]] = 0 else: if first: minNumSamples = numSampPerClass[i] first = False elif numSampPerClass[i] < minNumSamples: minNumSamples = numSampPerClass[i] if minNumSamples > maxNoSamples: minNumSamples = maxNoSamples print("Number of output samples:") for i in range(len(numSampPerClass)): if numSampPerClass[i] >= minNoSamples: indexes = numpy.where(trainColVals == classIDs[i]) sampleIdx = numpy.random.choice(indexes[0], minNumSamples, replace=False) trainColOutVals[sampleIdx] = classIDs[i] print("\tClass {} has {} samples.".format( classIDs[i], trainColOutVals[trainColOutVals == classIDs[i]].shape[0])) rat.writeColumn(ratDataset, outTrainCol, trainColOutVals) ratDataset = None
print('') classesIntCol = 'ClassInt' rsgislib.classification.classratutils.balanceSampleTrainingRandom(outputClumps, classesIntCol, 'classesIntColBal', 50, 5000) # rebalance the training data classesIntCol='classesIntColBal' # run the classifier classratutils.classifyWithinRAT(outputClumps, classesIntCol, classesNameCol, variables, classifier=classifier, classColours=classColours,preProcessor=MaxAbsScaler(),outColInt=outColInt) ########################################################################################### # Read all results from RAT and extract mode, providing final result # Also, mask out nan values from the classification where vvMax==0 inRatFile = outputClumps ratDataset = gdal.Open(inRatFile, gdal.GA_Update) # Open RAT vvMax_val=[] vvMax_val.append(rat.readColumn(ratDataset, 'VVMax')) # read in urban footprint column vvMax_val=numpy.asarray(vvMax_val[0]) guf_val=[] guf_val.append(rat.readColumn(ratDataset, 'gufMax')) # read in urban footprint column guf_val=numpy.asarray(guf_val[0]) # define column names for output classifications #runs=numpy.arange(1,51) x_col_names = [] for i in runs: # define output class column col_name='OutClass_'+str(i) x_col_names.append(col_name) X=[]
def classifyWithinRAT(clumpsImg, classesIntCol, classesNameCol, variables, classifier=RandomForestClassifier(n_estimators=100, max_features=3, oob_score=True, n_jobs=-1), outColInt="OutClass", outColStr="OutClassName", roiCol=None, roiVal=1, classColours=None, preProcessor=None, justFit=False): """ A function which will perform a classification within the RAT using a classifier from scikit-learn :param clumpsImg: is the clumps image on which the classification is to be performed :param classesIntCol: is the column with the training data as int values :param classesNameCol: is the column with the training data as string class names :param variables: is an array of column names which are to be used for the classification :param classifier: is an instance of a scikit-learn classifier (e.g., RandomForests which is Default) :param outColInt: is the output column name for the int class representation (Default: 'OutClass') :param outColStr: is the output column name for the class names column (Default: 'OutClassName') :param roiCol: is a column name for a column which specifies the region to be classified. If None ignored (Default: None) :param roiVal: is a int value used within the roiCol to select a region to be classified (Default: 1) :param classColours: is a python dict using the class name as the key along with arrays of length 3 specifying the RGB colours for the class. :param preProcessor: is a scikit-learn processors such as sklearn.preprocessing.MaxAbsScaler() which can rescale the input variables independently as read in (Define: None; i.e., not in use). :param justFit: is a boolean specifying that the classifier should just be fitted to the data and not applied (Default: False; i.e., apply classification) Example:: from sklearn.ensemble import ExtraTreesClassifier from rsgislib.classification import classratutils classifier = ExtraTreesClassifier(n_estimators=100, max_features=3, n_jobs=-1, verbose=0) classColours = dict() classColours['Forest'] = [0,138,0] classColours['NonForest'] = [200,200,200] variables = ['GreenAvg', 'RedAvg', 'NIR1Avg', 'NIR2Avg', 'NDVI'] classifyWithinRAT(clumpsImg, classesIntCol, classesNameCol, variables, classifier=classifier, classColours=classColours) from sklearn.preprocessing import MaxAbsScaler # With pre-processor classifyWithinRAT(clumpsImg, classesIntCol, classesNameCol, variables, classifier=classifier, classColours=classColours, preProcessor=MaxAbsScaler()) """ # Check gdal is available if not haveGDALPy: raise Exception( "The GDAL python bindings required for this function could not be imported\n\t" + gdalErr) # Check numpy is available if not haveNumpy: raise Exception( "The numpy module is required for this function could not be imported\n\t" + numErr) # Check rios rat is available if not haveRIOSRat: raise Exception( "The RIOS rat tools are required for this function could not be imported\n\t" + riosRatErr) # Check scikit-learn RF is available if not haveSKLearnRF: raise Exception( "The scikit-learn random forests tools are required for this function could not be imported\n\t" + sklearnRFErr) # Check scikit-learn pre-processing is available if not haveSKLearnPreProcess: raise Exception( "The scikit-learn pre-processing tools are required for this function could not be imported\n\t" + sklearnPreProcessErr) ratDataset = gdal.Open(clumpsImg, gdal.GA_Update) numpyVars = [] for var in variables: print("Reading " + var) tmpArr = rat.readColumn(ratDataset, var) if not preProcessor is None: tmpArr = tmpArr.reshape(-1, 1) tmpArr = preProcessor.fit_transform(tmpArr) tmpArr = tmpArr.reshape(-1) numpyVars.append(tmpArr) # Read in training classes classesInt = rat.readColumn(ratDataset, classesIntCol) classesStr = rat.readColumn(ratDataset, classesNameCol) roi = None if not ((roiCol == None) or (roiCol == "")): roi = rat.readColumn(ratDataset, roiCol) # Set up output array outLabels = numpy.zeros_like(classesInt, dtype=numpy.int16) outClassNames = numpy.empty_like(classesInt, dtype=numpy.dtype('a255')) ID = numpy.arange(outLabels.shape[0]) xData = numpy.array(numpyVars) xData = xData.transpose() xData = numpy.where(numpy.isfinite(xData), xData, 0) print("Input data size: {} x {}".format(xData.shape[0], xData.shape[1])) trainingData = xData[numpy.isfinite(xData).all(axis=1)] classesInt = classesInt[numpy.isfinite(xData).all(axis=1)] classesStr = classesStr[numpy.isfinite(xData).all(axis=1)] ID = ID[numpy.isfinite(xData).all(axis=1)] trainingData = trainingData[classesInt > 0] classesStr = classesStr[classesInt > 0] classesInt = classesInt[classesInt > 0] print("Training data size: {} x {}".format(trainingData.shape[0], trainingData.shape[1])) print('Training Classifier') classifier.fit(trainingData, classesInt) print('Calc Classifier Accuracy') accVal = classifier.score(trainingData, classesInt) print('Classifier Score = {}'.format(round(accVal * 100, 2))) if not justFit: if not roi is None: xData = xData[roi == roiVal] ID = ID[roi == roiVal] print("ROI Subsetted data size: {} x {}".format( xData.shape[0], xData.shape[1])) predClass = classifier.predict(xData) outLabels[ID] = predClass print("Writing Columns") rat.writeColumn(ratDataset, outColInt, outLabels) print("Create and Write Output Class Names") classNames = numpy.unique(classesStr) classes = numpy.zeros_like(classNames, dtype=numpy.int16) i = 0 classNameIDs = dict() for className in classNames: classNameStr = str(className.decode()) if not classNameStr is '': classes[i] = classesInt[classesStr == className][0] classNameIDs[classNameStr] = classes[i] print("Class \'" + classNameStr + "\' has numerical " + str(classes[i])) i = i + 1 outClassNames[...] = '' for className in classNameIDs: classID = classNameIDs[className] outClassNames[outLabels == classID] = className rat.writeColumn(ratDataset, outColStr, outClassNames) if not classColours is None: print("Set Colours") red = rat.readColumn(ratDataset, "Red") green = rat.readColumn(ratDataset, "Green") blue = rat.readColumn(ratDataset, "Blue") # Set Background to black red[...] = 0 green[...] = 0 blue[...] = 0 # Set colours for className in classNameIDs: print("Colouring class " + className) classID = classNameIDs[className] colours = classColours[className] red = numpy.where(outLabels == classID, colours[0], red) green = numpy.where(outLabels == classID, colours[1], green) blue = numpy.where(outLabels == classID, colours[2], blue) rat.writeColumn(ratDataset, "Red", red) rat.writeColumn(ratDataset, "Green", green) rat.writeColumn(ratDataset, "Blue", blue) ratDataset = None
def perform_voting_classification(skClassifiers, trainSamplesInfo, imgFileInfo, classAreaMask, classMaskPxlVal, tmpDIR, tmpImgBase, outClassImg, gdalformat='KEA', numCores=-1): """ A function which will perform a number of classification creating a combined classification by a simple vote. The classifier parameters can be differed as a list of classifiers is provided (the length of the list is equal to the number of votes), where the training data is resampled for each classifier. The analysis can be performed using multiple processing cores. Where: :param skClassifiers: a list of classifiers (from scikit-learn), the number of classifiers defined will be equal to the number of votes. :param trainSamplesInfo: a list of rsgislib.classification.classimgutils.SamplesInfoObj objects used to parameters the classifer and extract training data. :param imgFileInfo: a list of rsgislib.imageutils.ImageBandInfo objects (also used within rsgislib.imageutils.extractZoneImageBandValues2HDF) to identify which images and bands are to be used for the classification so it adheres to the training data. :param classAreaMask: a mask image which is used to specified the areas of the scene which are to be classified. :param classMaskPxlVal: is the pixel value within the classAreaMask image for the areas of the image which are to be classified. :param tmpDIR: a temporary file location which will be created and removed during processing. :param tmpImgBase: the same name of files written to the tmpDIR :param outClassImg: the final output image file. :param gdalformat: the output file format for outClassImg :param numCores: is the number of processing cores to be used for the analysis (if -1 then all cores on the machine will be used). Example:: classVoteTemp = os.path.join(imgTmp, 'ClassVoteTemp') imgFileInfo = [rsgislib.imageutils.ImageBandInfo(img2010dB, 'sardb', [1,2]), rsgislib.imageutils.ImageBandInfo(imgSRTM, 'srtm', [1])] trainSamplesInfo = [] trainSamplesInfo.append(SamplesInfoObj(className='Water', classID=1, maskImg=classTrainRegionsMask, maskPxlVal=1, outSampImgFile='WaterSamples.kea', numSamps=500, samplesH5File='WaterSamples_pxlvals.h5', red=0, green=0, blue=255)) trainSamplesInfo.append(SamplesInfoObj(className='Land', classID=2, maskImg=classTrainRegionsMask, maskPxlVal=2, outSampImgFile='LandSamples.kea', numSamps=500, samplesH5File='LandSamples_pxlvals.h5', red=150, green=150, blue=150)) trainSamplesInfo.append(SamplesInfoObj(className='Mangroves', classID=3, maskImg=classTrainRegionsMask, maskPxlVal=3, outSampImgFile='MangroveSamples.kea', numSamps=500, samplesH5File='MangroveSamples_pxlvals.h5', red=0, green=153, blue=0)) skClassifiers = [] for i in range(5): skClassifiers.append(ExtraTreesClassifier(n_estimators=50)) for i in range(5): skClassifiers.append(ExtraTreesClassifier(n_estimators=100)) for i in range(5): skClassifiers.append(ExtraTreesClassifier(n_estimators=50, max_depth=2)) for i in range(5): skClassifiers.append(ExtraTreesClassifier(n_estimators=100, max_depth=2)) mangroveRegionClassImg = MangroveRegionClass.kea classsklearn.perform_voting_classification(skClassifiers, trainSamplesInfo, imgFileInfo, classWithinMask, 1, classVoteTemp, 'ClassImgSample', mangroveRegionClassImg, gdalformat='KEA', numCores=-1) """ def _apply_voting_classifier(inParams): """ Internal function which is used by performVotingClassification """ skClassifier = inParams['skClassifier'] cTmpDIR = inParams['cTmpDIR'] classAreaMask = inParams['classAreaMask'] classMaskPxlVal = inParams['classMaskPxlVal'] imgFileInfo = inParams['imgFileInfo'] tmpClassImgOut = inParams['tmpClassImgOut'] gdalformat = inParams['gdalformat'] trainSamplesInfo = inParams['trainSamplesInfo'] rndSeed = inParams['rndSeed'] classTrainInfo = dict() for trainSamples in trainSamplesInfo: rsgislib.imageutils.performRandomPxlSampleInMaskLowPxlCount( inputImage=trainSamples.maskImg, outputImage=os.path.join(cTmpDIR, trainSamples.outSampImgFile), gdalformat=gdalformat, maskvals=[trainSamples.maskPxlVal], numSamples=trainSamples.numSamps, rndSeed=rndSeed) rsgislib.imageutils.extractZoneImageBandValues2HDF( imgFileInfo, os.path.join(cTmpDIR, trainSamples.outSampImgFile), os.path.join(cTmpDIR, trainSamples.samplesH5File), trainSamples.maskPxlVal) classTrainInfo[trainSamples.className] = ClassSimpleInfoObj( id=trainSamples.classID, fileH5=os.path.join(cTmpDIR, trainSamples.samplesH5File), red=trainSamples.red, green=trainSamples.green, blue=trainSamples.blue) train_sklearn_classifier(classTrainInfo, skClassifier) apply_sklearn_classifer(classTrainInfo, skClassifier, classAreaMask, classMaskPxlVal, imgFileInfo, tmpClassImgOut, gdalformat) rsgisUtils = rsgislib.RSGISPyUtils() if type(skClassifiers) is not list: raise rsgislib.RSGISPyException( "A list of classifiers must be provided") numOfVotes = len(skClassifiers) if numCores <= 0: numCores = rsgisUtils.numProcessCores() tmpPresent = True if not os.path.exists(tmpDIR): os.makedirs(tmpDIR) tmpPresent = False outClassImgs = [] mCoreParams = [] dirs2DEL = [] rndGen = random.seed() for i in range(numOfVotes): cTmpDIR = os.path.join(tmpDIR, str(i)) if os.path.exists(cTmpDIR): shutil.rmtree(cTmpDIR, ignore_errors=True) os.makedirs(cTmpDIR) dirs2DEL.append(cTmpDIR) tmpClassImgOut = os.path.join(tmpDIR, tmpImgBase + '_' + str(i) + '.kea') outClassImgs.append(tmpClassImgOut) inParams = dict() inParams['skClassifier'] = skClassifiers[i] inParams['cTmpDIR'] = cTmpDIR inParams['classAreaMask'] = classAreaMask inParams['classMaskPxlVal'] = classMaskPxlVal inParams['imgFileInfo'] = imgFileInfo inParams['tmpClassImgOut'] = tmpClassImgOut inParams['gdalformat'] = 'KEA' inParams['trainSamplesInfo'] = trainSamplesInfo inParams['rndSeed'] = random.randrange(1000) mCoreParams.append(inParams) # Run processing on multiple cores. mProccesPool = Pool(numCores) mProccesPool.map(_apply_voting_classifier, mCoreParams) # Combine results using MODE. rsgislib.imagecalc.calcMultiImgBandStats(outClassImgs, outClassImg, rsgislib.SUMTYPE_MODE, gdalformat, rsgislib.TYPE_8UINT, 0, True) rsgislib.rastergis.populateStats(clumps=outClassImg, addclrtab=True, calcpyramids=True, ignorezero=True) # Colour output classification image. ratDataset = gdal.Open(outClassImg, gdal.GA_Update) red = rat.readColumn(ratDataset, 'Red') green = rat.readColumn(ratDataset, 'Green') blue = rat.readColumn(ratDataset, 'Blue') ClassName = numpy.empty_like(red, dtype=numpy.dtype('a255')) for trainSample in trainSamplesInfo: print("Apply Colour to class \'" + trainSample.className + "\'") red[trainSample.classID] = trainSample.red green[trainSample.classID] = trainSample.green blue[trainSample.classID] = trainSample.blue ClassName[trainSample.classID] = trainSample.className rat.writeColumn(ratDataset, "Red", red) rat.writeColumn(ratDataset, "Green", green) rat.writeColumn(ratDataset, "Blue", blue) rat.writeColumn(ratDataset, "ClassName", ClassName) ratDataset = None if not tmpPresent: shutil.rmtree(tmpDIR, ignore_errors=True) else: for cDIR in dirs2DEL: shutil.rmtree(cDIR, ignore_errors=True)
outSelectCol='NDVISamplingMang', propOfSample=0.1, binWidth=0.01, classColumn='Class', classVal='2') rastergis.histoSampling(clumps=clumpsImg, varCol='NDVI', outSelectCol='NDVISamplingOther', propOfSample=0.05, binWidth=0.01, classColumn='Class', classVal='3') print("Open GDAL Dataset") ratDataset = gdal.Open(clumpsImg, gdal.GA_Update) HHSamplingWater = rat.readColumn(ratDataset, "HHSamplingWater") HHSamplingMang = rat.readColumn(ratDataset, "HHSamplingMang") HHSamplingOther = rat.readColumn(ratDataset, "HHSamplingOther") NDVISamplingWater = rat.readColumn(ratDataset, "NDVISamplingWater") NDVISamplingMang = rat.readColumn(ratDataset, "NDVISamplingMang") NDVISamplingOther = rat.readColumn(ratDataset, "NDVISamplingOther") Training = numpy.empty_like(HHSamplingWater, dtype=int) Training[...] = 0 Training = numpy.where(((HHSamplingWater == 1) | (HHSamplingMang == 1) | (HHSamplingOther == 1) | (NDVISamplingWater == 1) | (NDVISamplingMang == 1) | (NDVISamplingOther == 1)), 1, Training) # Export column to RAT rat.writeColumn(ratDataset, "Training", Training) ratDataset = None
def calcClearSkyRegions(cloudsImg, validAreaImg, outputClearSkyMask, outFormat, tmpPath='./tmpClearSky', deleteTmpFiles=True, initClearSkyRegionDist=5000, initClearSkyRegionMinSize=3000, finalClearSkyRegionDist=1000, morphSize=21): """ Given a cloud mask, identify the larger extent regions of useful clear-sky regions. :param cloudsImg: An image with the input mask of the cloud (pixel == 1) and shadow (pixel == 2) :param validAreaImg: A mask of the image data area (1 = valid and 0 = not-valid; i.e., outside of the data area) :param outputClearSkyMask: The output mask of the clear sky areas :param outFormat: The output image format. :param tmpPath: The path for temporay images produced during the processing to be stored (Default: './tmpClearSky'; Note. all temp files are generated as KEA files). :param deleteTmpFiles: Boolean as to whether the intermediate files should be deleted following processing (Default: True - delete files). :param initClearSkyRegionDist: The distance in metres from a cloud/shadow object for the initial identification of clear sky regions (Default: 5000) :param initClearSkyRegionMinSize: The minimum size (in pixels) of the initial clear sky regions (Default: 3000 pixels) :param finalClearSkyRegionDist: The distance in metres from a cloud/shadow object for the final boundaries of the clear sky regions (Default: 1000) :param morphSize: the size of the circular morphological operator used to tidy up the result (Default: 21) Example:: import rsgislib.imagecalibration cloudsImg = "./Outputs/LS8_20160605_lat52lon261_r24p203_clouds.kea" validAreaImg = "./Outputs/LS8_20160605_lat52lon261_r24p203_valid.kea" outputMask = "./Outputs/LS8_20160605_lat52lon261_r24p203_openskyvalid.kea" tmpPath = "./temp" rsgislib.imagecalibration.calcClearSkyRegions(cloudsImg, validAreaImg, outputMask, 'KEA', tmpPath) """ import rsgislib import rsgislib.imagecalc import rsgislib.imageutils import rsgislib.segmentation import rsgislib.rastergis import rsgislib.vectorutils import rsgislib.imagemorphology import os.path import osgeo.gdal as gdal from rios import rat import numpy if morphSize % 2 == 0: raise rsgislib.RSGISPyException("The size of the morphology operator must be odd.") baseDataName = os.path.splitext(os.path.basename(cloudsImg))[0] tmpCloudsImgDist2Clouds = os.path.join(tmpPath, baseDataName+"_dist2clouds.kea") tmpCloudsImgDist2CloudsNoData = os.path.join(tmpPath, baseDataName+"_dist2clouds_masked.kea") tmpInitClearSkyRegions = os.path.join(tmpPath, baseDataName+"initclearsky.kea") tmpInitClearSkyRegionsClumps = os.path.join(tmpPath, baseDataName+"initclearskyClumps.kea") tmpInitClearSkyRegionsRmSmall = os.path.join(tmpPath, baseDataName+"initclearskyClumpsRMSmall.kea") tmpInitClearSkyRegionsFinal = os.path.join(tmpPath, baseDataName+"initclearskyClumpsFinal.kea") tmpClearSkyRegionsFullExtent = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtent.kea") tmpClearSkyRegionsFullExtentClumps = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtentClumps.kea") tmpClearSkyRegionsFullExtentSelectClumps = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtentSelectClumps.kea") tmpClearSkyRegionsFullExtentSelectClumpsOpen = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtentSelectClumpsOpen.kea") tmpClearSkyRegionsFullExtentSelectClumpsOpenClump = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtentSelectClumpsOpenClump.kea") tmpClearSkyRegionsFullExtentSelectClumpsOpenClumpRMSmall = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtentSelectClumpsOpenClumpRMSmall.kea") tmpMorphOperator = os.path.join(tmpPath, 'CircularMorphOp.gmtxt') rsgislib.imagecalc.calcDist2ImgVals(cloudsImg, tmpCloudsImgDist2Clouds, pxlVals=[1,2]) rsgislib.imageutils.maskImage(tmpCloudsImgDist2Clouds, validAreaImg, tmpCloudsImgDist2CloudsNoData, 'KEA', rsgislib.TYPE_32INT, -1, 0) rsgislib.imagecalc.imageMath(tmpCloudsImgDist2CloudsNoData, tmpInitClearSkyRegions, 'b1 > '+str(initClearSkyRegionDist), outFormat, rsgislib.TYPE_32UINT) rsgislib.segmentation.clump(tmpInitClearSkyRegions, tmpInitClearSkyRegionsClumps, 'KEA', False, 0.0, False) rsgislib.rastergis.populateStats(tmpInitClearSkyRegionsClumps, True, True) rsgislib.segmentation.rmSmallClumps(tmpInitClearSkyRegionsClumps, tmpInitClearSkyRegionsRmSmall, initClearSkyRegionMinSize, 'KEA') rsgislib.segmentation.relabelClumps(tmpInitClearSkyRegionsRmSmall, tmpInitClearSkyRegionsFinal, 'KEA', False) rsgislib.rastergis.populateStats(tmpInitClearSkyRegionsFinal, True, True) rsgislib.imagecalc.imageMath(tmpCloudsImgDist2CloudsNoData, tmpClearSkyRegionsFullExtent, 'b1 > '+str(finalClearSkyRegionDist), outFormat, rsgislib.TYPE_32UINT) rsgislib.segmentation.clump(tmpClearSkyRegionsFullExtent, tmpClearSkyRegionsFullExtentClumps, 'KEA', False, 0.0, False) rsgislib.rastergis.populateStats(tmpClearSkyRegionsFullExtentClumps, True, True) rsgislib.rastergis.populateRATWithStats(tmpInitClearSkyRegionsFinal, tmpClearSkyRegionsFullExtentClumps, [rsgislib.rastergis.BandAttStats(band=1, maxField='InitRegionInter')]) ratDataset = gdal.Open( tmpClearSkyRegionsFullExtentClumps, gdal.GA_Update ) InitRegionInter = rat.readColumn(ratDataset, "InitRegionInter") ValidClumps = numpy.zeros_like(InitRegionInter, dtype=numpy.dtype('int')) ValidClumps[InitRegionInter>0] = 1 rat.writeColumn(ratDataset, "ValidClumps", ValidClumps) ratDataset = None rsgislib.rastergis.collapseRAT(tmpClearSkyRegionsFullExtentClumps, 'ValidClumps', tmpClearSkyRegionsFullExtentSelectClumps, 'KEA', 1) rsgislib.rastergis.populateStats(tmpClearSkyRegionsFullExtentSelectClumps, True, True) rsgislib.imagemorphology.createCircularOp(outputFile=tmpMorphOperator, opSize=morphSize) rsgislib.imagemorphology.imageOpening(inputImage=tmpClearSkyRegionsFullExtentSelectClumps, outputImage=tmpClearSkyRegionsFullExtentSelectClumpsOpen, tempImage="", morphOperator=tmpMorphOperator, useOpFile=True, opSize=21, gdalformat='KEA', datatype=rsgislib.TYPE_32UINT) rsgislib.segmentation.clump(tmpClearSkyRegionsFullExtentSelectClumpsOpen, tmpClearSkyRegionsFullExtentSelectClumpsOpenClump, 'KEA', False, 0.0, False) rsgislib.rastergis.populateStats(tmpClearSkyRegionsFullExtentSelectClumpsOpenClump, True, True) rsgislib.segmentation.rmSmallClumps(tmpClearSkyRegionsFullExtentSelectClumpsOpenClump, tmpClearSkyRegionsFullExtentSelectClumpsOpenClumpRMSmall, initClearSkyRegionMinSize, 'KEA') rsgislib.imagecalc.imageMath(tmpClearSkyRegionsFullExtentSelectClumpsOpenClumpRMSmall, outputClearSkyMask, "b1>0?1:0", outFormat, rsgislib.TYPE_8UINT) if deleteTmpFiles: rsgisUtils = rsgislib.RSGISPyUtils() rsgisUtils.deleteFileWithBasename(tmpCloudsImgDist2Clouds) rsgisUtils.deleteFileWithBasename(tmpCloudsImgDist2CloudsNoData) rsgisUtils.deleteFileWithBasename(tmpInitClearSkyRegions) rsgisUtils.deleteFileWithBasename(tmpInitClearSkyRegionsClumps) rsgisUtils.deleteFileWithBasename(tmpInitClearSkyRegionsRmSmall) rsgisUtils.deleteFileWithBasename(tmpInitClearSkyRegionsFinal) rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtent) rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtentClumps) rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtentSelectClumps) rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtentSelectClumpsOpen) rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtentSelectClumpsOpenClump) rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtentSelectClumpsOpenClumpRMSmall) rsgisUtils.deleteFileWithBasename(tmpMorphOperator)
def applyClassifer(classTrainInfo, skClassifier, imgMask, imgMaskVal, imgFileInfo, outputImg, gdalformat, classClrNames=True): """ This function uses a trained classifier and applies it to the provided input image. :param classTrainInfo: dict (where the key is the class name) of ClassInfoObj objects which will be used to train the classifier (i.e., trainClassifier()), provide pixel value id and RGB class values. :param skClassifier: a trained instance of a scikit-learn classifier (e.g., use trainClassifier or findClassifierParametersAndTrain) :param imgMask: is an image file providing a mask to specify where should be classified. Simplest mask is all the valid data regions (rsgislib.imageutils.genValidMask) :param imgMaskVal: the pixel value within the imgMask to limit the region to which the classification is applied. Can be used to create a heirachical classification. :param imgFileInfo: a list of rsgislib.imageutils.ImageBandInfo objects (also used within rsgislib.imageutils.extractZoneImageBandValues2HDF) to identify which images and bands are to be used for the classification so it adheres to the training data. :param outputImg: output image file with the classification. Note. by default a colour table and class names column is added to the image. If an error is produced use HFA or KEA formats. :param gdalformat: is the output image format - all GDAL supported formats are supported. :param classClrNames: default is True and therefore a colour table will the colours specified in classTrainInfo and a ClassName column (from imgFileInfo) will be added to the output file. """ if not haveRIOS: raise Exception( "The rios module is required for this function could not be imported\n\t" + riosErr) infiles = applier.FilenameAssociations() infiles.imageMask = imgMask numClassVars = 0 for imgFile in imgFileInfo: infiles.__dict__[imgFile.name] = imgFile.fileName numClassVars = numClassVars + len(imgFile.bands) outfiles = applier.FilenameAssociations() outfiles.outimage = outputImg otherargs = applier.OtherInputs() otherargs.classifier = skClassifier otherargs.mskVal = imgMaskVal otherargs.numClassVars = numClassVars otherargs.imgFileInfo = imgFileInfo aControls = applier.ApplierControls() aControls.progress = cuiprogress.CUIProgressBar() aControls.drivername = gdalformat aControls.omitPyramids = True aControls.calcStats = False print("Applying the Classifier") applier.apply(_applySKClassifier, infiles, outfiles, otherargs, controls=aControls) print("Completed") rsgislib.rastergis.populateStats(clumps=outputImg, addclrtab=True, calcpyramids=True, ignorezero=True) if classClrNames: ratDataset = gdal.Open(outputImg, gdal.GA_Update) red = rat.readColumn(ratDataset, 'Red') green = rat.readColumn(ratDataset, 'Green') blue = rat.readColumn(ratDataset, 'Blue') ClassName = numpy.empty_like(red, dtype=numpy.dtype('a255')) for classKey in classTrainInfo: print("Apply Colour to class \'" + classKey + "\'") red[classTrainInfo[classKey].id] = classTrainInfo[classKey].red green[classTrainInfo[classKey].id] = classTrainInfo[classKey].green blue[classTrainInfo[classKey].id] = classTrainInfo[classKey].blue ClassName[classTrainInfo[classKey].id] = classKey rat.writeColumn(ratDataset, "Red", red) rat.writeColumn(ratDataset, "Green", green) rat.writeColumn(ratDataset, "Blue", blue) rat.writeColumn(ratDataset, "ClassName", ClassName) ratDataset = None