def createStage3ImageSubsets(self, inputImage, s2BordersImage, s3BordersClumps, subsetImgsDIR, subsetImgsMaskedDIR, subImgBaseName, minSize):
     segmentation.clump(s2BordersImage, s3BordersClumps, 'KEA', True, 0)
     rastergis.populateStats(s3BordersClumps, True, True)
         
     rastergis.spatialExtent(s3BordersClumps, 'minXX', 'minXY', 'maxXX', 'maxXY', 'minYX', 'minYY', 'maxYX', 'maxYY')
     
     rsgisUtils = rsgislib.RSGISPyUtils()
     dataType = rsgisUtils.getRSGISLibDataTypeFromImg(inputImage)
     
     ratDS = gdal.Open(s3BordersClumps, gdal.GA_Update)
     minX = rat.readColumn(ratDS, "minXX")
     maxX = rat.readColumn(ratDS, "maxXX")
     minY = rat.readColumn(ratDS, "minYY")
     maxY = rat.readColumn(ratDS, "maxYY")
     Histogram = rat.readColumn(ratDS, "Histogram")
     for i in range(minX.shape[0]):
         if i > 0:
             subImage = os.path.join(subsetImgsDIR, subImgBaseName + str(i) + '.kea')
             #print( "[" + str(minX[i]) + ", " + str(maxX[i]) + "][" + str(minY[i]) + ", " + str(maxY[i]) + "]" )
             imageutils.subsetbbox(inputImage, subImage, 'KEA', dataType, minX[i], maxX[i], minY[i], maxY[i])
             if Histogram[i] > minSize:
                 maskedFile = os.path.join(subsetImgsMaskedDIR, subImgBaseName + str(i) + '_masked.kea')
             else:
                 maskedFile = os.path.join(subsetImgsMaskedDIR, subImgBaseName + str(i) + '_burn.kea')
             imageutils.maskImage(subImage, s2BordersImage, maskedFile, 'KEA', dataType, 0, 0)
             rastergis.populateStats(maskedFile, True, False)
     ratDS = None
Exemple #2
0
    def run(self, cmdargs):
        refCol = rat.readColumn(cmdargs.inputFile, cmdargs.referenceCol)
        classCol = rat.readColumn(cmdargs.inputFile, cmdargs.classifiedCol)

        classes = np.unique(refCol)

        print "Classes (", classes.size, "): ", classes

        errMatrix = np.zeros((classes.size, classes.size))

        self.buildErrorMatrix(errMatrix, classes, refCol, classCol)

        self.exportErrorMatrixAsASCII(cmdargs.outputFile, classes, errMatrix)

        if cmdargs.outputTexFile is not None:
            self.exportErrorMatrixAsTex(cmdargs.outputTexFile, classes,
                                        errMatrix)
Exemple #3
0
def collapseClasses(inputFile):
    ratDataset = gdal.Open(inputFile, gdal.GA_Update)
    Red = rat.readColumn(ratDataset, "Red")
    Green = rat.readColumn(ratDataset, "Green")
    Blue = rat.readColumn(ratDataset, "Blue")
    
    # Water
    Red[0] = 135
    Green[0] = 206
    Blue[0] = 255
    
    # Land
    Red[1] = 34
    Green[1] = 139
    Blue[1] = 34
    
    rat.writeColumn(ratDataset, "Red", Red)
    rat.writeColumn(ratDataset, "Green", Green)
    rat.writeColumn(ratDataset, "Blue", Blue)
Exemple #4
0
def testOutputSameFile(imgfile):
    # Now test the ratapplier
    inRats = ratapplier.RatAssociations()
    outRats = ratapplier.RatAssociations()
    controls = ratapplier.RatApplierControls()

    inRats.img = ratapplier.RatHandle(imgfile)
    outRats.img = inRats.img
    controls.setBlockLength(5)

    ratapplier.apply(myFunc, inRats, outRats, controls=controls)

    col = rat.readColumn(imgfile, 'Value')
    colSqrd = rat.readColumn(imgfile, 'sqrd')
    ok = True
    if (col**2 != colSqrd).any():
        riostestutils.report(TESTNAME, "sqrd incorrect, in sameFile output")
        ok = False
    return ok
Exemple #5
0
def testDifferentOutput(imgfile, imgfile2):
    makeTestFile(imgfile2, withRat=False)

    inRats = ratapplier.RatAssociations()
    outRats = ratapplier.RatAssociations()
    controls = ratapplier.RatApplierControls()

    inRats.img = ratapplier.RatHandle(imgfile)
    outRats.outimg = ratapplier.RatHandle(imgfile2)
    controls.setBlockLength(3)

    ratapplier.apply(myFuncDiffFile, inRats, outRats, controls=controls)

    col = rat.readColumn(imgfile, 'Value')
    colSqrd = rat.readColumn(imgfile2, 'sqrd')
    ok = True
    if (col**2 != colSqrd).any():
        riostestutils.report(TESTNAME,
                             "sqrd incorrect, in differentFile output")
        ok = False
    return ok
    def readRATClassesColours(self, gdalRATInput, classNameCol):
        namesColoursList = list()
        try:
            ratDataset = gdal.Open(gdalRATInput, gdal.GA_ReadOnly)
            classNames = rat.readColumn(ratDataset, classNameCol)
            red = rat.readColumn(ratDataset, "Red")
            green = rat.readColumn(ratDataset, "Green")
            blue = rat.readColumn(ratDataset, "Blue")
            histo = rat.readColumn(ratDataset, "Histogram")

            for i in range(len(classNames)):
                if histo[i] > 0:
                    classList = list()
                    classList.append(classNames[i].strip())
                    classList.append(red[i])
                    classList.append(green[i])
                    classList.append(blue[i])
                    namesColoursList.append(classList)
        except Exception, e:
            print "Error: ", str(e)
            sys.exit()
Exemple #7
0
def testReduceRat(imgfile, imgfile3):
    """
    This test creates a new output image, with all odd pixel values 
    replaced with the even number above it. The RAT must then be copied across
    with the same reduction performed. In this case, only the even numbered 
    rows are written
    """
    # First we copy the raster, with the reduction of pixel values
    infiles = applier.FilenameAssociations()
    outfiles = applier.FilenameAssociations()
    infiles.inimg = imgfile
    outfiles.outimg = imgfile3
    # Make sure we use a format which actually supports RAT's
    controls = applier.ApplierControls()
    controls.setOutputDriverName('HFA')
    applier.apply(rasterReduceFunc, infiles, outfiles, controls=controls)

    # Now use ratapplier to reduce the RAT
    inRats = ratapplier.RatAssociations()
    outRats = ratapplier.RatAssociations()
    controls = ratapplier.RatApplierControls()

    inRats.img = ratapplier.RatHandle(imgfile)
    outRats.outimg = ratapplier.RatHandle(imgfile3)
    controls.setBlockLength(3)

    ratapplier.apply(ratReduceFunc, inRats, outRats, controls=controls)

    col = rat.readColumn(imgfile, 'Value')
    colEven = col[::2]
    colReduced = rat.readColumn(imgfile3, 'Value')[:len(colEven)]
    ok = True
    if (colEven != colReduced).any():
        riostestutils.report(
            TESTNAME, "Reduced RAT incorrect: %s, %s" % (colEven, colReduced))
        ok = False
    return ok
Exemple #8
0
def testNewRat(imgfile4):
    makeTestFile(imgfile4, withRat=False)

    inRats = ratapplier.RatAssociations()
    outRats = ratapplier.RatAssociations()
    controls = ratapplier.RatApplierControls()
    controls.setRowCount(256)

    outRats.outimg = ratapplier.RatHandle(imgfile4)
    controls.setBlockLength(3)

    ratapplier.apply(myFuncNewRat, inRats, outRats, controls=controls)

    col = rat.readColumn(imgfile4, 'newCol')
    colIntended = numpy.arange(256, dtype=numpy.uint32)
    ok = (col == colIntended).all()
    if not ok:
        riostestutils.report(TESTNAME,
                             "New RAT incorrect: %s, %s" % (col, colIntended))
    return ok
Exemple #9
0
 def run(self, cmdargs):
     # Get variables from command line
     inputFilePath = cmdargs.inputFile.strip()
     selectColumn = cmdargs.column.strip()
     
     # Open the GDAL dataset 
     ratDataset = gdal.Open(inputFilePath, gdal.GA_ReadOnly)
     
     # Check the GDAL dataset was correctly opened
     if ratDataset is None:
         print "The image dataset could not opened."
         sys.exit()
     
     # Read the two columns
     selectCol = rat.readColumn(ratDataset, selectColumn)
     
     # Find the unique class names
     classes = np.unique(selectCol)
     for className in classes:
         print className
Exemple #10
0
def run():
    """
    Run tests of the rios.rat functions
    """
    riostestutils.reportStart(TESTNAME)
    allOK = True

    imgfile = 'test.img'
    ratValues = makeTestFile(imgfile)
    nValues = len(ratValues)

    columnList = [("Int32", numpy.int32), ("Float32", numpy.float32),
                  ("Unicode", numpy.dtype('U10'))]
    # Only test old string type for python 2
    if sys.version_info.major < 3:
        columnList.append(("String", numpy.dtype('S10')))

    allOK = True
    for (colName, arrayDtype) in columnList:
        # Write the array into the file, with the given datatype
        ratValues_type = ratValues.astype(arrayDtype)
        rat.writeColumn(imgfile, colName, ratValues_type)

        # Read it back, and check that the values are the same
        ratValues_fromFile = rat.readColumn(imgfile, colName)[:nValues].astype(
            ratValues.dtype)
        if not (ratValues_fromFile == ratValues).all():
            riostestutils.report(TESTNAME,
                                 "Value mis-match for column %s" % (colName))
            allOK = False

    if os.path.exists(imgfile):
        os.remove(imgfile)

    if allOK:
        riostestutils.report(TESTNAME, "Passed")

    return allOK
import sys
import os
import numpy as np
from rios import rat
from osgeo import gdal

SegImg = 'Segmentation.kea'
NewColName = 'Mask1'

#######################################################
if not os.path.exists(SegImg):
    sys.exit('Error: Could not find the segmented image.')

ratDataset = gdal.Open(SegImg, 1)  # read the image in read-write mode.
RefCol = rat.readColumn(
    ratDataset, 'Alpha')  # read an existing RAT pythocolumn to get size.

Ones = np.ones_like(RefCol, dtype='uint8')  # create an array of ones.
del RefCol

Ones[0] = 0  # assign zero to the first clump because it contains no data.

rat.writeColumn(ratDataset, NewColName, Ones)  # write numpy array to RAT.
del Ones, ratDataset
print('Done.')

ref = 'S1B_IW_GRDH_1SDV_20170423T165655_Sigma0_stack_lee_clumps2_erf_clumptrain_mode_snapped.tif'
mask = 'S1B_IW_GRDH_1SDV_20170423T165655_Sigma0_stack_lee_clumps2_mean.kea'
mask_snap = 'S1B_IW_GRDH_1SDV_20170423T165655_Sigma0_stack_lee_clumps2_mean_snap.kea'
gdalFormat = 'KEA'
rsgislib.imageutils.resampleImage2Match(ref,
Exemple #12
0
def apply_sklearn_classifer(classTrainInfo,
                            skClassifier,
                            imgMask,
                            imgMaskVal,
                            imgFileInfo,
                            outputImg,
                            gdalformat,
                            classClrNames=True):
    """
This function uses a trained classifier and applies it to the provided input image.

:param classTrainInfo: dict (where the key is the class name) of rsgislib.classification.ClassSimpleInfoObj
                       objects which will be used to train the classifier (i.e., train_sklearn_classifier()),
                       provide pixel value id and RGB class values.
:param skClassifier: a trained instance of a scikit-learn classifier
                     (e.g., use train_sklearn_classifier or train_sklearn_classifer_gridsearch)
:param imgMask: is an image file providing a mask to specify where should be classified. Simplest mask is all
                the valid data regions (rsgislib.imageutils.genValidMask)
:param imgMaskVal: the pixel value within the imgMask to limit the region to which the classification is applied.
                   Can be used to create a heirachical classification.
:param imgFileInfo: a list of rsgislib.imageutils.ImageBandInfo objects (also used within
                    rsgislib.imageutils.extractZoneImageBandValues2HDF) to identify which images and bands are to
                    be used for the classification so it adheres to the training data.
:param outputImg: output image file with the classification. Note. by default a colour table and class names column
                  is added to the image. If an error is produced use HFA or KEA formats.
:param gdalformat: is the output image format - all GDAL supported formats are supported.
:param classClrNames: default is True and therefore a colour table will the colours specified in classTrainInfo
                      and a ClassName column (from imgFileInfo) will be added to the output file.

    """
    infiles = applier.FilenameAssociations()
    infiles.imageMask = imgMask
    numClassVars = 0
    for imgFile in imgFileInfo:
        infiles.__dict__[imgFile.name] = imgFile.fileName
        numClassVars = numClassVars + len(imgFile.bands)

    outfiles = applier.FilenameAssociations()
    outfiles.outimage = outputImg
    otherargs = applier.OtherInputs()
    otherargs.classifier = skClassifier
    otherargs.mskVal = imgMaskVal
    otherargs.numClassVars = numClassVars
    otherargs.imgFileInfo = imgFileInfo

    try:
        import tqdm
        progress_bar = rsgislib.TQDMProgressBar()
    except:
        progress_bar = cuiprogress.GDALProgressBar()

    aControls = applier.ApplierControls()
    aControls.progress = progress_bar
    aControls.drivername = gdalformat
    aControls.omitPyramids = True
    aControls.calcStats = False

    # RIOS function to apply classifer
    def _applySKClassifier(info, inputs, outputs, otherargs):
        """
        Internal function for rios applier. Used within applyClassifer.
        """
        outClassVals = numpy.zeros_like(inputs.imageMask, dtype=numpy.uint32)
        if numpy.any(inputs.imageMask == otherargs.mskVal):
            outClassVals = outClassVals.flatten()
            imgMaskVals = inputs.imageMask.flatten()
            classVars = numpy.zeros(
                (outClassVals.shape[0], otherargs.numClassVars),
                dtype=numpy.float)
            # Array index which can be used to populate the output array following masking etc.
            ID = numpy.arange(imgMaskVals.shape[0])
            classVarsIdx = 0
            for imgFile in otherargs.imgFileInfo:
                imgArr = inputs.__dict__[imgFile.name]
                for band in imgFile.bands:
                    classVars[..., classVarsIdx] = imgArr[(band - 1)].flatten()
                    classVarsIdx = classVarsIdx + 1
            classVars = classVars[imgMaskVals == otherargs.mskVal]
            ID = ID[imgMaskVals == otherargs.mskVal]
            predClass = otherargs.classifier.predict(classVars)
            outClassVals[ID] = predClass
            outClassVals = numpy.expand_dims(outClassVals.reshape(
                (inputs.imageMask.shape[1], inputs.imageMask.shape[2])),
                                             axis=0)
        outputs.outimage = outClassVals

    print("Applying the Classifier")
    applier.apply(_applySKClassifier,
                  infiles,
                  outfiles,
                  otherargs,
                  controls=aControls)
    print("Completed")
    rsgislib.rastergis.populateStats(clumps=outputImg,
                                     addclrtab=True,
                                     calcpyramids=True,
                                     ignorezero=True)

    if classClrNames:
        ratDataset = gdal.Open(outputImg, gdal.GA_Update)
        red = rat.readColumn(ratDataset, 'Red')
        green = rat.readColumn(ratDataset, 'Green')
        blue = rat.readColumn(ratDataset, 'Blue')
        ClassName = numpy.empty_like(red, dtype=numpy.dtype('a255'))

        for classKey in classTrainInfo:
            print("Apply Colour to class \'" + classKey + "\'")
            red[classTrainInfo[classKey].id] = classTrainInfo[classKey].red
            green[classTrainInfo[classKey].id] = classTrainInfo[classKey].green
            blue[classTrainInfo[classKey].id] = classTrainInfo[classKey].blue
            ClassName[classTrainInfo[classKey].id] = classKey

        rat.writeColumn(ratDataset, "Red", red)
        rat.writeColumn(ratDataset, "Green", green)
        rat.writeColumn(ratDataset, "Blue", blue)
        rat.writeColumn(ratDataset, "ClassName", ClassName)
        ratDataset = None
# Open RAT
inRatFile = clumps
ratDataset = gdal.Open(clumps, gdal.GA_Update)

# Set column names
x_col_names = ['VVAvg', 'VHAvg', 'VVdivVHAvg', 'VVStd', 'VHStd', 'VVdivVHStd']
# x_col_names = ['VVAvg','VHAvg', 'VVStd','VHStd']
y_col_name = 'ClassInt'

# Set up list to hold data
X = []

# Read in data from each column
print('read data')
for colName in x_col_names:
    X.append(rat.readColumn(ratDataset, colName))

# Read in training data
print('read training data')
y = rat.readColumn(ratDataset, y_col_name)
# Set NA values to 0
y = np.where(y == b'NA', 0, y)
y = y.astype(np.int16)

X.append(y)

X = np.array(X)
X = X.transpose()

# Remove rows with 0 (NA) for wetCode
X_train = X[X[:, -1] != 0]
Exemple #14
0
    outColNum = cHorizonFields[outColName.strip()]

    # JOIN ATTRIBUTES FROM TEXT FILE
    print('Adding ' + outColName + ' (column ' + str(outColNum) + ') to RAT')
    # Open SSURGO text files
    componentFileName = os.path.join(inDIRName, 'tabular', 'comp.txt')
    chorizonFileName = os.path.join(inDIRName, 'tabular', 'chorizon.txt')

    componentFile = open(componentFileName, 'rU')
    chorizonFile = open(chorizonFileName,'rU')

    componentTxt = csv.reader(componentFile, delimiter='|')
    chorizonTxt = csv.reader(chorizonFile, delimiter='|')

    # Get mukey column from input file
    mukeyCol = rat.readColumn(outKEAFile, 'mukey')
 
    # Set up blank columns for output (one for each layer)
    outColH1 = numpy.zeros_like(mukeyCol)
    outColH2 = numpy.zeros_like(mukeyCol) 
    outColH3 = numpy.zeros_like(mukeyCol) 
    outColH4 = numpy.zeros_like(mukeyCol)
    outColH5 = numpy.zeros_like(mukeyCol)
    outColH6 = numpy.zeros_like(mukeyCol)
    
    # Set columns for mukey and cokey in componentTxt
    compMUKEYCol = 107
    compCOKEYCol = 108
    
    chorizonCOKEYCol = 169
    chorizonHZNAMECol = 0
Exemple #15
0
def collapseClasses(inputFile, lcdbColName, outputColName):
    ratDataset = gdal.Open(inputFile, gdal.GA_Update)
    lcdbCol = rat.readColumn(ratDataset, lcdbColName)

    outClassesCol = np.zeros_like(lcdbCol)

    # 0 Undefined
    UNDEFINED = 0
    # 1 High Producing Exotic Herbaceous
    HIGH_PRODUCING_EXOTIC_HERBACEOUS = 1
    # 2 Tall Tussock Grassland
    TALL_TUSSOCK_GRASSLAND = 2
    # 3 Other Herbaceous
    OTHER_HERBACEOUS = 3
    # 4 Scrub
    SCRUB = 4
    # 5 Indigenous Forest
    INDIGENOUS_FOREST = 5
    # 6 Exotic Forest
    EXOTIC_FOREST = 6
    # 7 Other Woody
    OTHER_WOODY = 7
    # 8 Sub Alpine Scrubland
    SUB_ALPINE_SCRUBLAND = 8
    # 9 Built Up
    BUILT_UP = 9
    # 10 Bare Ground
    BARE_GROUND = 10
    # 11 Water
    WATER = 11
    # 12 Perminant Snow and Ice
    PERMINANT_SNOW_ICE = 12

    # Undefined -> UNDEFINED
    outClassesCol = np.where(lcdbCol == 0, UNDEFINED, outClassesCol)
    # Built-up Area (settlement) -> BUILT_UP
    outClassesCol = np.where(lcdbCol == 1, BUILT_UP, outClassesCol)
    # Urban Parkland/Open Space -> OTHER_HERBACEOUS
    outClassesCol = np.where(lcdbCol == 2, OTHER_HERBACEOUS, outClassesCol)
    # Transport Infrastructure -> BUILT_UP
    outClassesCol = np.where(lcdbCol == 5, BUILT_UP, outClassesCol)
    # Surface Mines and Dumps -> BARE_GROUND
    outClassesCol = np.where(lcdbCol == 6, BARE_GROUND, outClassesCol)
    # Coastal Sand and Gravel -> BARE_GROUND
    outClassesCol = np.where(lcdbCol == 10, BARE_GROUND, outClassesCol)
    # River and Lakeshore Gravel and Rock -> BARE_GROUND
    outClassesCol = np.where(lcdbCol == 11, BARE_GROUND, outClassesCol)
    # Landslide -> BARE_GROUND
    outClassesCol = np.where(lcdbCol == 12, BARE_GROUND, outClassesCol)
    # Alpine Gravel and Rock -> BARE_GROUND
    outClassesCol = np.where(lcdbCol == 13, BARE_GROUND, outClassesCol)
    # Permanent Snow and Ice -> PERMINANT_SNOW_ICE
    outClassesCol = np.where(lcdbCol == 14, PERMINANT_SNOW_ICE, outClassesCol)
    # Alpine Grass/Herbfield -> OTHER_HERBACEOUS
    outClassesCol = np.where(lcdbCol == 15, OTHER_HERBACEOUS, outClassesCol)
    # Lake and Pond -> WATER
    outClassesCol = np.where(lcdbCol == 20, WATER, outClassesCol)
    # River -> WATER
    outClassesCol = np.where(lcdbCol == 21, WATER, outClassesCol)
    # Estuarine Open Water -> WATER
    outClassesCol = np.where(lcdbCol == 22, WATER, outClassesCol)
    # Short-rotation Cropland -> HIGH_PRODUCING_EXOTIC_HERBACEOUS
    outClassesCol = np.where(lcdbCol == 30, HIGH_PRODUCING_EXOTIC_HERBACEOUS,
                             outClassesCol)
    # Cultivation -> BARE_GROUND
    outClassesCol = np.where(lcdbCol == 31, BARE_GROUND, outClassesCol)
    # Orchard Vineyard & Other Perennial Crops -> HIGH_PRODUCING_EXOTIC_HERBACEOUS
    outClassesCol = np.where(lcdbCol == 33, HIGH_PRODUCING_EXOTIC_HERBACEOUS,
                             outClassesCol)
    # High Producing Exotic Grassland -> HIGH_PRODUCING_EXOTIC_HERBACEOUS
    outClassesCol = np.where(lcdbCol == 40, HIGH_PRODUCING_EXOTIC_HERBACEOUS,
                             outClassesCol)
    # Low Producing Grassland -> OTHER_HERBACEOUS
    outClassesCol = np.where(lcdbCol == 41, OTHER_HERBACEOUS, outClassesCol)
    # Tall Tussock Grassland -> TALL_TUSSOCK_GRASSLAND
    outClassesCol = np.where(lcdbCol == 43, TALL_TUSSOCK_GRASSLAND,
                             outClassesCol)
    # Depleted Grassland -> OTHER_HERBACEOUS
    outClassesCol = np.where(lcdbCol == 44, OTHER_HERBACEOUS, outClassesCol)
    # Herbaceous Freshwater Vegetation -> OTHER_HERBACEOUS
    outClassesCol = np.where(lcdbCol == 45, OTHER_HERBACEOUS, outClassesCol)
    # Herbaceous Saline Vegetation -> OTHER_HERBACEOUS
    outClassesCol = np.where(lcdbCol == 46, OTHER_HERBACEOUS, outClassesCol)
    # Flaxland -> OTHER_HERBACEOUS
    outClassesCol = np.where(lcdbCol == 47, OTHER_HERBACEOUS, outClassesCol)
    # Fernland -> OTHER_HERBACEOUS
    outClassesCol = np.where(lcdbCol == 50, OTHER_HERBACEOUS, outClassesCol)
    # Gorse and/or Broom -> SCRUB
    outClassesCol = np.where(lcdbCol == 51, SCRUB, outClassesCol)
    # Manuka and/or Kanuka -> SCRUB
    outClassesCol = np.where(lcdbCol == 52, SCRUB, outClassesCol)
    # Broadleaved Indigenous Hardwoods -> INDIGENOUS_FOREST
    outClassesCol = np.where(lcdbCol == 54, INDIGENOUS_FOREST, outClassesCol)
    # Sub Alpine Shrubland -> SUB_ALPINE_SCRUBLAND
    outClassesCol = np.where(lcdbCol == 55, SUB_ALPINE_SCRUBLAND,
                             outClassesCol)
    # Mixed Exotic Shrubland -> SCRUB
    outClassesCol = np.where(lcdbCol == 56, SCRUB, outClassesCol)
    # Matagouri or Grey Scrub -> SCRUB
    outClassesCol = np.where(lcdbCol == 58, SCRUB, outClassesCol)
    # Forest - Harvested -> BARE_GROUND
    outClassesCol = np.where(lcdbCol == 64, BARE_GROUND, outClassesCol)
    # Deciduous Hardwoods -> OTHER_WOODY
    outClassesCol = np.where(lcdbCol == 68, OTHER_WOODY, outClassesCol)
    # Indigenous Forest -> INDIGENOUS_FOREST
    outClassesCol = np.where(lcdbCol == 69, INDIGENOUS_FOREST, outClassesCol)
    # Mangroves -> OTHER_WOODY
    outClassesCol = np.where(lcdbCol == 70, OTHER_WOODY, outClassesCol)
    # Exotic Forest -> EXOTIC_FOREST
    outClassesCol = np.where(lcdbCol == 71, EXOTIC_FOREST, outClassesCol)

    rat.writeColumn(ratDataset, outputColName, outClassesCol)
Exemple #16
0
        #	print('clumps: ' + clumps)
        print('')
        ratutils.populateImageStats(sel,
                                    clumps,
                                    calcMax=True,
                                    calcMean=True,
                                    calcMin=True)  # add SEL statistics to RAT
        ratutils.populateImageStats(guf, clumps,
                                    calcMax=True)  # add SEL statistics to RAT
        #ratutils.populateImageStats(waterPerm,clumps,calcMean=True) # add water permanance statistics to RAT

        # Open RAT
        ratDataset = gdal.Open(clumps, gdal.GA_Update)
        data = []
        # Read in data from class_cert and sel columns
        data.append(rat.readColumn(ratDataset, 'OutClass_mode_cert'))
        data.append(rat.readColumn(ratDataset, 'SELMax'))
        data.append(rat.readColumn(ratDataset, 'gufMax'))

        mode_cert = data[0]
        sel_d = data[1]
        guf_d = data[2]
        mode_cert_sel = mode_cert

        #where statement to make sel > 60 objects 'other'
        mode_cert_sel[numpy.where((mode_cert_sel == 1) & (sel_d >= 60))] = 2
        mode_cert_sel[numpy.where((mode_cert_sel == 3) & (guf_d > 0))] = 2

        names = []
        for i in mode_cert_sel:
            if i == 1:
Exemple #17
0
                i, colcount - 2,
                int(max(np.count_nonzero(band == i,
                                         axis=0))))  # max length/height
            segment_ds.SetValueAsInt(
                i, colcount - 1,
                int(
                    max(np.count_nonzero(band == i, axis=0)) /
                    max(np.count_nonzero(band == i, axis=1)) *
                    100))  # length/width ratio

        # ------------------------------------------------- #
        # Hierarchical Classification
        print("Hierarchical Classification...")

        # Reading in segment RAT
        NDWIAvg = rat.readColumn(segment_rat, "NDWIAvg")
        SWIRratioAvg = rat.readColumn(segment_rat, "SWIRratioAvg")
        RVIAvg = rat.readColumn(segment_rat, "RVIAvg")

        # Creating and population parent class column based on rules
        segment_ds.CreateColumn('p_class', gdal.GFT_Integer, gdal.GFU_Generic)
        for i in range(int(segment_ds.GetRowCount(
        ))):  # iterate down rows for number of rows in input stack
            if segment_ds.GetValueAsInt(
                    i, (segment_ds.GetColumnCount() -
                        1)) == 0:  # if no class (0) in p_class column
                if NDWIAvg[i] > 0.6:
                    segment_ds.SetValueAsInt(i,
                                             (segment_ds.GetColumnCount() - 1),
                                             1)  # water
                elif SWIRratioAvg[i] < 0.4:
Exemple #18
0
#!/usr/bin/env python

import sys
from rios import rat
fname = sys.argv[1]

print rat.readColumn(fname, "floatstuff")
Exemple #19
0
def classifyWithinRATTiled(clumpsImg,
                           classesIntCol,
                           classesNameCol,
                           variables,
                           classifier=RandomForestClassifier(n_estimators=100,
                                                             max_features=3,
                                                             oob_score=True,
                                                             n_jobs=-1),
                           outColInt="OutClass",
                           outColStr="OutClassName",
                           roiCol=None,
                           roiVal=1,
                           classColours=None,
                           scaleVarsRange=False,
                           justFit=False):
    """
A function which will perform a classification within the RAT using a classifier from scikit-learn using the rios ratapplier interface allowing very large RATs to be processed. 

:param clumpsImg: is the clumps image on which the classification is to be performed
:param classesIntCol: is the column with the training data as int values
:param classesNameCol: is the column with the training data as string class names
:param variables: is an array of column names which are to be used for the classification
:param classifier: is an instance of a scikit-learn classifier (e.g., RandomForests which is Default)
:param outColInt: is the output column name for the int class representation (Default: 'OutClass')
:param outColStr: is the output column name for the class names column (Default: 'OutClassName')
:param roiCol: is a column name for a column which specifies the region to be classified. If None ignored (Default: None)
:param roiVal: is a int value used within the roiCol to select a region to be classified (Default: 1)
:param classColours: is a python dict using the class name as the key along with arrays of length 3 specifying the RGB colours for the class.
:param scaleVarsRange: will rescale each variable independently to a range of 0-1 (default: False).
:param justFit: is a boolean specifying that the classifier should just be fitted to the data and not applied (Default: False; i.e., apply classification)


Example::

    from sklearn.ensemble import ExtraTreesClassifier
    from rsgislib.classification import classratutils
    
    classifier = ExtraTreesClassifier(n_estimators=100, max_features=3, n_jobs=-1, verbose=0)
    
    classColours = dict()
    classColours['Forest'] = [0,138,0]
    classColours['NonForest'] = [200,200,200]
    
    variables = ['GreenAvg', 'RedAvg', 'NIR1Avg', 'NIR2Avg', 'NDVI']
    classifyWithinRATTiled(clumpsImg, classesIntCol, classesNameCol, variables, classifier=classifier, classColours=classColours)
        
    # With using range scaling.
    classifyWithinRATTiled(clumpsImg, classesIntCol, classesNameCol, variables, classifier=classifier, classColours=classColours, scaleVarsRange=True)

"""
    # Check gdal is available
    if not haveGDALPy:
        raise Exception(
            "The GDAL python bindings required for this function could not be imported\n\t"
            + gdalErr)
    # Check numpy is available
    if not haveNumpy:
        raise Exception(
            "The numpy module is required for this function could not be imported\n\t"
            + numErr)
    # Check rios rat is available
    if not haveRIOSRat:
        raise Exception(
            "The RIOS rat tools are required for this function could not be imported\n\t"
            + riosRatErr)
    # Check scikit-learn RF is available
    if not haveSKLearnRF:
        raise Exception(
            "The scikit-learn random forests tools are required for this function could not be imported\n\t"
            + sklearnRFErr)
    # Check scikit-learn pre-processing is available
    if not haveSKLearnPreProcess:
        raise Exception(
            "The scikit-learn pre-processing tools are required for this function could not be imported\n\t"
            + sklearnPreProcessErr)

    ratDataset = gdal.Open(clumpsImg, gdal.GA_Update)

    # Read in training classes
    classesInt = rat.readColumn(ratDataset, classesIntCol)
    classesStr = rat.readColumn(ratDataset, classesNameCol)
    ratDataset = None

    validClassStr = classesStr[classesInt > 0]
    validClassInt = classesInt[classesInt > 0]

    #print(validClassInt.shape)
    classNames = numpy.unique(validClassStr)
    classes = numpy.zeros_like(classNames, dtype=numpy.int16)

    i = 0
    classNameIDs = dict()
    for className in classNames:
        classNameStr = str(className.decode())
        if not classNameStr is '':
            #print(validClassInt[validClassStr == className])
            classes[i] = validClassInt[validClassStr == className][0]
            classNameIDs[classNameStr] = classes[i]
            #print("Class \'" + classNameStr + "\' has numerical " + str(classes[i]))
            i = i + 1

    trainLen = validClassInt.shape[0]
    numVars = len(variables)

    #print("Create numpy {} x {} array for training".format(trainLen, numVars))

    trainData = numpy.zeros((trainLen, numVars), numpy.float64)

    in_rats = ratapplier.RatAssociations()
    out_rats = ratapplier.RatAssociations()
    in_rats.inrat = ratapplier.RatHandle(clumpsImg)

    otherargs = ratapplier.OtherArguments()
    otherargs.vars = variables
    otherargs.classIntCol = classesIntCol
    otherargs.trainData = trainData
    otherargs.trainDataOff = 0

    print("Extract Training Data")
    ratapplier.apply(_extractTrainDataFromRAT,
                     in_rats,
                     out_rats,
                     otherargs=otherargs,
                     controls=None)
    print("100%")

    validClassInt = validClassInt[numpy.isfinite(trainData).all(axis=1)]
    validClassStr = validClassStr[numpy.isfinite(trainData).all(axis=1)]
    trainData = trainData[numpy.isfinite(trainData).all(axis=1)]

    print("Training data size: {} x {}".format(trainData.shape[0],
                                               trainData.shape[1]))

    print('Training Classifier')
    classifier.fit(trainData, validClassInt)
    print("Completed")

    print('Calc Classifier Accuracy')
    accVal = classifier.score(trainData, validClassInt)
    print('Classifier Score = {}'.format(round(accVal * 100, 2)))

    if not justFit:
        print("Apply Classifier")
        in_rats = ratapplier.RatAssociations()
        out_rats = ratapplier.RatAssociations()
        in_rats.inrat = ratapplier.RatHandle(clumpsImg)
        out_rats.outrat = ratapplier.RatHandle(clumpsImg)

        otherargs = ratapplier.OtherArguments()
        otherargs.vars = variables
        otherargs.classifier = classifier
        otherargs.outColInt = outColInt
        otherargs.outColStr = outColStr
        otherargs.roiCol = roiCol
        otherargs.roiVal = roiVal
        otherargs.classColours = classColours
        otherargs.classNameIDs = classNameIDs

        ratapplier.apply(_applyClassifier,
                         in_rats,
                         out_rats,
                         otherargs=otherargs,
                         controls=None)
        print("100%")
# Open RAT
inRatFile = outputClumps
ratDataset = gdal.Open(inRatFile, gdal.GA_Update)
 
# define column names for output classifications
runs=numpy.arange(1,51)
x_col_names = []
for i in runs:
	# define output class column
	col_name='OutClass_'+str(i)
	x_col_names.append(col_name)

X=[]
# Read in data from each column
for colName in x_col_names:
    X.append(rat.readColumn(ratDataset, colName))

mode = stats.mode(X)
mode=numpy.asarray(mode[0][0])
rios.rat.writeColumn(outputClumps, 'OutClass_mode', mode, colType=gdal.GFT_Integer)

# calc certainty from mode and count of mode
X_arr=numpy.asarray(X)
x_count=[]
x_percent=[]
for i, m in zip((range(X_arr.shape[1])),mode):
	b=X_arr[:,i]
	count=numpy.count_nonzero(b==m)
	x_percent.append(count/X_arr.shape[0])

x_percent=numpy.asarray(x_percent)
Exemple #21
0
def clusterWithinRAT(clumpsImg,
                     variables,
                     clusterer=MiniBatchKMeans(n_clusters=8,
                                               init='k-means++',
                                               max_iter=100,
                                               batch_size=100),
                     outColInt="OutCluster",
                     roiCol=None,
                     roiVal=1,
                     clrClusters=True,
                     clrSeed=10,
                     addConnectivity=False,
                     preProcessor=None):
    """
A function which will perform a clustering within the RAT using a clustering algorithm from scikit-learn

:param clumpsImg: is the clumps image on which the classification is to be performed.
:param variables: is an array of column names which are to be used for the clustering.
:param clusterer: is an instance of a scikit-learn clusterer (e.g., MiniBatchKMeans which is Default; Note with 8 clusters).
:param outColInt: is the output column name identifying the clusters (Default: 'OutCluster').
:param roiCol: is a column name for a column which specifies the region to be clustered. If None ignored (Default: None).
:param roiVal: is a int value used within the roiCol to select a region to be clustered (Default: 1).
:param clrClusters: is a boolean specifying whether the colour table should be updated to correspond to the clusters (Default: True).
:param clrSeed: is an integer seeding the random generator used to generate the colours (Default=10; if None provided system time used).
:param addConnectivity: is a boolean which adds a kneighbors_graph to the clusterer (just an option for the AgglomerativeClustering algorithm)
:param preProcessor: is a scikit-learn processors such as sklearn.preprocessing.MaxAbsScaler() which can rescale the input variables independently as read in (Define: None; i.e., not in use).


Example::

    from rsgislib.classification import classratutils
    from sklearn.cluster import DBSCAN
    
    sklearnClusterer = DBSCAN(eps=1, min_samples=50)
    classratutils.clusterWithinRAT('MangroveClumps.kea', ['MinX', 'MinY'], clusterer=sklearnClusterer, outColInt="OutCluster", roiCol=None, roiVal=1, clrClusters=True, clrSeed=10, addConnectivity=False)
    
    # With pre-processor
    from sklearn.preprocessing import MaxAbsScaler
    classratutils.clusterWithinRAT('MangroveClumps.kea', ['MinX', 'MinY'], clusterer=sklearnClusterer, outColInt="OutCluster", roiCol=None, roiVal=1, clrClusters=True, clrSeed=10, addConnectivity=False, preProcessor=MaxAbsScaler())

"""
    # Check gdal is available
    if not haveGDALPy:
        raise Exception(
            "The GDAL python bindings required for this function could not be imported\n\t"
            + gdalErr)
    # Check numpy is available
    if not haveNumpy:
        raise Exception(
            "The numpy module is required for this function could not be imported\n\t"
            + numErr)
    # Check rios rat is available
    if not haveRIOSRat:
        raise Exception(
            "The RIOS rat tools are required for this function could not be imported\n\t"
            + riosRatErr)
    # Check scikit-learn RF is available
    if not haveSKLearnKM:
        raise Exception(
            "The scikit-learn Mini Batch KMeans tools are required for this function could not be imported\n\t"
            + sklearnMBKMErr)
    # Check scikit-learn pre-processing is available
    if not haveSKLearnPreProcess:
        raise Exception(
            "The scikit-learn pre-processing tools are required for this function could not be imported\n\t"
            + sklearnPreProcessErr)

    ratDataset = gdal.Open(clumpsImg, gdal.GA_Update)
    Histogram = rat.readColumn(ratDataset, 'Histogram')
    numpyVars = []
    for var in variables:
        print("Reading " + var)
        tmpArr = rat.readColumn(ratDataset, var)
        if not preProcessor is None:
            tmpArr = tmpArr.reshape(-1, 1)
            tmpArr = preProcessor.fit_transform(tmpArr)
            tmpArr = tmpArr.reshape(-1)
        numpyVars.append(tmpArr)

    roi = None
    if not ((roiCol == None) or (roiCol == "")):
        roi = rat.readColumn(ratDataset, roiCol)

    # Set up output array
    outLabels = numpy.zeros_like(Histogram, dtype=numpy.int16)
    ID = numpy.arange(outLabels.shape[0])

    xData = numpy.array(numpyVars)
    xData = xData.transpose()
    ID = ID[numpy.isfinite(xData).all(axis=1)]
    if not roi is None:
        roi = roi[numpy.isfinite(xData).all(axis=1)]
    xData = xData[numpy.isfinite(xData).all(axis=1)]

    if not roi is None:
        xData = xData[roi == roiVal]
        ID = ID[roi == roiVal]

    print("Input Data Size: {} x {}".format(xData.shape[0], xData.shape[1]))

    if addConnectivity:
        from sklearn.neighbors import kneighbors_graph
        inConnectivity = kneighbors_graph(xData,
                                          n_neighbors=10,
                                          include_self=False)
        clusterer.set_params(**{'connectivity': inConnectivity})

    print('Fit Clusterer')
    outClust = clusterer.fit_predict(xData)

    minClusterID = numpy.min(outClust)
    if minClusterID <= 0:
        minOff = 1 - minClusterID
        outClust = outClust + minOff

    outLabels[ID] = outClust

    print("Writing Columns")
    rat.writeColumn(ratDataset, outColInt, outLabels)

    print("Create and Write Output Class Names")
    clustersIDs = numpy.unique(outClust)

    if clrClusters:
        import random
        random.seed(clrSeed)

        print("Set Colours")
        red = rat.readColumn(ratDataset, "Red")
        green = rat.readColumn(ratDataset, "Green")
        blue = rat.readColumn(ratDataset, "Blue")

        # Set Background to black
        red[...] = 0
        green[...] = 0
        blue[...] = 0

        # Set colours
        for clusterID in clustersIDs:
            print("Colouring cluster: " + str(clusterID))

            red = numpy.where(outLabels == clusterID, random.randint(0, 255),
                              red)
            green = numpy.where(outLabels == clusterID, random.randint(0, 255),
                                green)
            blue = numpy.where(outLabels == clusterID, random.randint(0, 255),
                               blue)

        rat.writeColumn(ratDataset, "Red", red)
        rat.writeColumn(ratDataset, "Green", green)
        rat.writeColumn(ratDataset, "Blue", blue)

    ratDataset = None
    outColNum = cHorizonFields[outColName.strip()]

    # JOIN ATTRIBUTES FROM TEXT FILE
    print('Adding ' + outColName + ' (column ' + str(outColNum) + ') to RAT')
    # Open SSURGO text files
    componentFileName = os.path.join(inDIRName, 'tabular','comp.txt')
    chorizonFileName = os.path.join(inDIRName, 'tabular','chorizon.txt')
    
    componentFile = open(componentFileName,'rU')
    chorizonFile = open(chorizonFileName,'rU')
    
    componentTxt = csv.reader(componentFile,delimiter='|')
    chorizonTxt = csv.reader(chorizonFile,delimiter='|')
    
    # Get mukey column from input file
    mukeyCol = rat.readColumn(outKEAFile, 'mukey')
    
    # Set up blank columns for output (one for each layer)
    outColH1 = numpy.zeros_like(mukeyCol)
    outColH2 = numpy.zeros_like(mukeyCol) 
    outColH3 = numpy.zeros_like(mukeyCol) 
    outColH4 = numpy.zeros_like(mukeyCol)
    outColH5 = numpy.zeros_like(mukeyCol)
    outColH6 = numpy.zeros_like(mukeyCol)
    
    # Set columns for mukey and cokey in componentTxt
    compMUKEYCol = 107
    compCOKEYCol = 108
    
    chorizonCOKEYCol = 169
    chorizonHZNAMECol = 0
Exemple #23
0
def findClassifierParameters(clumpsImg,
                             classesIntCol,
                             variables,
                             preProcessor=None,
                             gridSearch=GridSearchCV(RandomForestClassifier(),
                                                     {})):
    """
Find the optimal parameters for a classifier using a grid search and return a classifier instance with those optimal parameters.

:param clumpsImg: is the clumps image on which the classification is to be performed
:param classesIntCol: is the column with the training data as int values
:param variables: is an array of column names which are to be used for the classification
:param preProcessor: is a scikit-learn processors such as sklearn.preprocessing.MaxAbsScaler() which can rescale the input variables independently as read in (Define: None; i.e., not in use).
:param gridSearch: is an instance of GridSearchCV parameterised with a classifier and parameters to be searched.

:return: Instance of the classifier with optimal parameters defined.

Example::

    from rsgislib.classification import classratutils
    from sklearn.svm import SVC
    from sklearn.model_selection import GridSearchCV
    from sklearn.preprocessing import MaxAbsScaler
    
    clumpsImg = "./LS8_20150621_lat10lon652_r67p233_clumps.kea"
    classesIntCol = 'ClassInt'
    
    classParameters = {'kernel':['linear', 'rbf',  'poly', 'sigmoid'], 'C':[1, 2, 3, 4, 5, 10, 100, 400, 500, 1e3, 5e3, 1e4, 5e4, 1e5], 'gamma':[0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 'auto'], 'degree':[2, 3, 4, 5, 6, 7, 8], 'class_weight':['', 'balanced'], 'decision_function_shape':['ovo', 'ovr', None]}
    variables = ['BlueRefl', 'GreenRefl', 'RedRefl', 'NIRRefl', 'SWIR1Refl', 'SWIR2Refl']
    
    gSearch = GridSearchCV(SVC(), classParameters)
    classifier = classratutils.findClassifierParameters(clumpsImg, classesIntCol, variables, preProcessor=MaxAbsScaler(), gridSearch=gSearch)

"""
    # Check gdal is available
    if not haveGDALPy:
        raise Exception(
            "The GDAL python bindings required for this function could not be imported\n\t"
            + gdalErr)
    # Check numpy is available
    if not haveNumpy:
        raise Exception(
            "The numpy module is required for this function could not be imported\n\t"
            + numErr)
    # Check rios rat is available
    if not haveRIOSRat:
        raise Exception(
            "The RIOS rat tools are required for this function could not be imported\n\t"
            + riosRatErr)
    # Check scikit-learn pre-processing is available
    if not haveSKLearnPreProcess:
        raise Exception(
            "The scikit-learn pre-processing tools are required for this function could not be imported\n\t"
            + sklearnPreProcessErr)
    # Check scikit-learn Grid Search is available
    if not haveSKLearnGS:
        raise Exception(
            "The scikit-learn grid search tools are required for this function could not be imported\n\t"
            + sklearnGSErr)

    ratDataset = gdal.Open(clumpsImg, gdal.GA_Update)
    numpyVars = []
    for var in variables:
        print("Reading " + var)
        tmpArr = rat.readColumn(ratDataset, var)
        if not preProcessor is None:
            tmpArr = tmpArr.reshape(-1, 1)
            tmpArr = preProcessor.fit_transform(tmpArr)
            tmpArr = tmpArr.reshape(-1)
        numpyVars.append(tmpArr)

    # Read in training classes
    classesInt = rat.readColumn(ratDataset, classesIntCol)

    xData = numpy.array(numpyVars)
    xData = xData.transpose()
    xData = numpy.where(numpy.isfinite(xData), xData, 0)

    print("Input data size: {} x {}".format(xData.shape[0], xData.shape[1]))

    trainingData = xData[numpy.isfinite(xData).all(axis=1)]
    classesInt = classesInt[numpy.isfinite(xData).all(axis=1)]

    trainingData = trainingData[classesInt > 0]
    classesInt = classesInt[classesInt > 0]

    print("Training data size: {} x {}".format(trainingData.shape[0],
                                               trainingData.shape[1]))
    print("Training data IDs size: {}".format(classesInt.shape[0]))

    classIDs = numpy.unique(classesInt)
    print(classIDs)
    for id in classIDs:
        print("Class {} has {} samples.".format(
            id, classesInt[classesInt == id].shape[0]))

    gridSearch.fit(trainingData, classesInt)
    if not gridSearch.refit:
        raise Exception("Grid Search did no find a fit therefore failed...")

    print("Best score was {} and has parameters {}.".format(
        gridSearch.best_score_, gridSearch.best_params_))

    return gridSearch.best_estimator_
Exemple #24
0
def apply_keras_pixel_classifier(classTrainInfo,
                                 keras_cls_mdl,
                                 imgMask,
                                 imgMaskVal,
                                 imgFileInfo,
                                 outClassImg,
                                 gdalformat,
                                 pred_batch_size=32,
                                 classClrNames=True):
    """
This function applies a trained single pixel keras model to an image. The function train_keras_pixel_classifer
can be used to train such as model. The output image will contain the hard membership of the predicted class. 

:param classTrainInfo: dict (where the key is the class name) of rsgislib.classification.ClassInfoObj
                       objects which will be used to train the classifier (i.e., train_keras_pixel_classifer()),
                       provide pixel value id and RGB class values.
:param keras_cls_mdl: a trained keras model object, with a input dimensions equivlent to the number of image
                      bands specified in the imgFileInfo input and output layer which provides an output array
                      of the length of the number of classes.
:param imgMask: is an image file providing a mask to specify where should be classified. Simplest mask is all the
                valid data regions (rsgislib.imageutils.genValidMask)
:param imgMaskVal: the pixel value within the imgMask to limit the region to which the classification is applied.
                   Can be used to create a heirachical classification.
:param imgFileInfo: a list of rsgislib.imageutils.ImageBandInfo objects (also used within
                    rsgislib.imageutils.extractZoneImageBandValues2HDF) to identify which images and bands are to
                    be used for the classification so it adheres to the training data.
:param outClassImg: Output image which will contain the hard classification.
:param gdalformat: is the output image format - all GDAL supported formats are supported.
:param pred_batch_size: the batch size used for the classification.
:param classClrNames: default is True and therefore a colour table will the colours specified in ClassInfoObj
                      and a ClassName (from classTrainInfo) column will be added to the output file.

    """
    def _applyKerasPxlClassifier(info, inputs, outputs, otherargs):
        outClassIdVals = numpy.zeros_like(inputs.imageMask, dtype=numpy.uint16)
        if numpy.any(inputs.imageMask == otherargs.mskVal):
            n_pxls = inputs.imageMask.shape[1] * inputs.imageMask.shape[2]
            outClassIdVals = outClassIdVals.flatten()
            imgMaskVals = inputs.imageMask.flatten()
            classVars = numpy.zeros((n_pxls, otherargs.numClassVars),
                                    dtype=numpy.float)
            # Array index which can be used to populate the output array following masking etc.
            ID = numpy.arange(imgMaskVals.shape[0])
            classVarsIdx = 0
            for imgFile in otherargs.imgFileInfo:
                imgArr = inputs.__dict__[imgFile.name]
                for band in imgFile.bands:
                    classVars[..., classVarsIdx] = imgArr[(band - 1)].flatten()
                    classVarsIdx = classVarsIdx + 1
            classVars = classVars[imgMaskVals == otherargs.mskVal]
            ID = ID[imgMaskVals == otherargs.mskVal]
            preds_idxs = numpy.argmax(otherargs.classifier.predict(
                classVars, batch_size=otherargs.pred_batch_size),
                                      axis=1)
            preds_cls_ids = numpy.zeros_like(preds_idxs, dtype=numpy.uint16)
            for cld_id, idx in zip(otherargs.cls_id_lut,
                                   numpy.arange(0, len(otherargs.cls_id_lut))):
                preds_cls_ids[preds_idxs == idx] = cld_id

            outClassIdVals[ID] = preds_cls_ids
            outClassIdVals = numpy.expand_dims(outClassIdVals.reshape(
                (inputs.imageMask.shape[1], inputs.imageMask.shape[2])),
                                               axis=0)
        outputs.outclsimage = outClassIdVals

    infiles = applier.FilenameAssociations()
    infiles.imageMask = imgMask
    numClassVars = 0
    for imgFile in imgFileInfo:
        infiles.__dict__[imgFile.name] = imgFile.fileName
        numClassVars = numClassVars + len(imgFile.bands)

    n_classes = len(classTrainInfo)
    cls_id_lut = numpy.zeros(n_classes)
    for clsname in classTrainInfo:
        if classTrainInfo[clsname].id >= n_classes:
            raise (
                "ClassInfoObj '{}' id ({}) is not consecutive starting from 0."
                .format(clsname, classTrainInfo[clsname].id))
        cls_id_lut[classTrainInfo[clsname].id] = classTrainInfo[clsname].out_id

    outfiles = applier.FilenameAssociations()
    outfiles.outclsimage = outClassImg
    otherargs = applier.OtherInputs()
    otherargs.classifier = keras_cls_mdl
    otherargs.pred_batch_size = pred_batch_size
    otherargs.mskVal = imgMaskVal
    otherargs.numClassVars = numClassVars
    otherargs.imgFileInfo = imgFileInfo
    otherargs.n_classes = n_classes
    otherargs.cls_id_lut = cls_id_lut

    try:
        import tqdm
        progress_bar = rsgislib.TQDMProgressBar()
    except:
        progress_bar = cuiprogress.GDALProgressBar()

    aControls = applier.ApplierControls()
    aControls.progress = progress_bar
    aControls.drivername = gdalformat
    aControls.omitPyramids = True
    aControls.calcStats = False
    print("Applying the Classifier")
    applier.apply(_applyKerasPxlClassifier,
                  infiles,
                  outfiles,
                  otherargs,
                  controls=aControls)
    print("Completed Classification")

    if classClrNames:
        rsgislib.rastergis.populateStats(outClassImg,
                                         addclrtab=True,
                                         calcpyramids=True,
                                         ignorezero=True)
        ratDataset = gdal.Open(outClassImg, gdal.GA_Update)
        red = rat.readColumn(ratDataset, 'Red')
        green = rat.readColumn(ratDataset, 'Green')
        blue = rat.readColumn(ratDataset, 'Blue')
        ClassName = numpy.empty_like(red, dtype=numpy.dtype('a255'))
        ClassName[...] = ""

        for classKey in classTrainInfo:
            print("Apply Colour to class \'" + classKey + "\'")
            red[classTrainInfo[classKey].out_id] = classTrainInfo[classKey].red
            green[classTrainInfo[classKey].
                  out_id] = classTrainInfo[classKey].green
            blue[classTrainInfo[classKey].
                 out_id] = classTrainInfo[classKey].blue
            ClassName[classTrainInfo[classKey].out_id] = classKey

        rat.writeColumn(ratDataset, "Red", red)
        rat.writeColumn(ratDataset, "Green", green)
        rat.writeColumn(ratDataset, "Blue", blue)
        rat.writeColumn(ratDataset, "ClassName", ClassName)
        ratDataset = None
Exemple #25
0
def balanceSampleTrainingRandom(clumpsImg, trainCol, outTrainCol, minNoSamples,
                                maxNoSamples):
    """
A function to balance the number of training samples for classification so the number is above
a minimum threshold (minNoSamples) and all equal to the class with the smallest number of samples
unless that is above a set maximum (maxNoSamples).

:param clumpsImg: is a string with the file path to the input image with RAT
:param trainCol: is a string for the name of the input column specifying the training samples (zero is no data)
:param outTrainCol: is a string with the name of the outputted training samples.
:param minNoSamples: is an int specifying the minimum number of training samples for a class (if below threshold class is removed).
:param maxNoSamples: is an int specifiying the maximum number of training samples per class.

"""
    # Check gdal is available
    if not haveGDALPy:
        raise Exception(
            "The GDAL python bindings required for this function could not be imported\n\t"
            + gdalErr)
    # Check numpy is available
    if not haveNumpy:
        raise Exception(
            "The numpy module is required for this function could not be imported\n\t"
            + numErr)
    # Check rios rat is available
    if not haveRIOSRat:
        raise Exception(
            "The RIOS rat tools are required for this function could not be imported\n\t"
            + riosRatErr)

    ratDataset = gdal.Open(clumpsImg, gdal.GA_Update)
    trainColVals = rat.readColumn(ratDataset, trainCol)
    trainColOutVals = numpy.zeros_like(trainColVals)

    classIDs = numpy.unique(trainColVals)
    classIDs = classIDs[classIDs != 0]

    numSampPerClass = []
    print("Number of input samples:")
    for id in classIDs:
        numVals = trainColVals[trainColVals == id].shape[0]
        print("\tClass {} has {} samples.".format(id, numVals))
        numSampPerClass.append(numVals)

    minNumSamples = 0
    first = True
    for i in range(len(numSampPerClass)):
        if numSampPerClass[i] < minNoSamples:
            trainColOutVals[trainColVals == classIDs[i]] = 0
        else:
            if first:
                minNumSamples = numSampPerClass[i]
                first = False
            elif numSampPerClass[i] < minNumSamples:
                minNumSamples = numSampPerClass[i]

    if minNumSamples > maxNoSamples:
        minNumSamples = maxNoSamples

    print("Number of output samples:")
    for i in range(len(numSampPerClass)):
        if numSampPerClass[i] >= minNoSamples:
            indexes = numpy.where(trainColVals == classIDs[i])
            sampleIdx = numpy.random.choice(indexes[0],
                                            minNumSamples,
                                            replace=False)
            trainColOutVals[sampleIdx] = classIDs[i]
        print("\tClass {} has {} samples.".format(
            classIDs[i],
            trainColOutVals[trainColOutVals == classIDs[i]].shape[0]))

    rat.writeColumn(ratDataset, outTrainCol, trainColOutVals)

    ratDataset = None
	print('')
	classesIntCol = 'ClassInt'
	rsgislib.classification.classratutils.balanceSampleTrainingRandom(outputClumps, classesIntCol, 'classesIntColBal', 50, 5000) # rebalance the training data
	classesIntCol='classesIntColBal'
	# run the classifier
	classratutils.classifyWithinRAT(outputClumps, classesIntCol, classesNameCol, variables, classifier=classifier, classColours=classColours,preProcessor=MaxAbsScaler(),outColInt=outColInt)

###########################################################################################
# Read all results from RAT and extract mode, providing final result
# Also, mask out nan values from the classification where vvMax==0

inRatFile = outputClumps
ratDataset = gdal.Open(inRatFile, gdal.GA_Update) # Open RAT

vvMax_val=[]
vvMax_val.append(rat.readColumn(ratDataset, 'VVMax')) # read in urban footprint column
vvMax_val=numpy.asarray(vvMax_val[0])

guf_val=[]
guf_val.append(rat.readColumn(ratDataset, 'gufMax')) # read in urban footprint column
guf_val=numpy.asarray(guf_val[0])

# define column names for output classifications
#runs=numpy.arange(1,51)
x_col_names = []
for i in runs:
	# define output class column
	col_name='OutClass_'+str(i)
	x_col_names.append(col_name)

X=[]
Exemple #27
0
def classifyWithinRAT(clumpsImg,
                      classesIntCol,
                      classesNameCol,
                      variables,
                      classifier=RandomForestClassifier(n_estimators=100,
                                                        max_features=3,
                                                        oob_score=True,
                                                        n_jobs=-1),
                      outColInt="OutClass",
                      outColStr="OutClassName",
                      roiCol=None,
                      roiVal=1,
                      classColours=None,
                      preProcessor=None,
                      justFit=False):
    """
A function which will perform a classification within the RAT using a classifier from scikit-learn

:param clumpsImg: is the clumps image on which the classification is to be performed
:param classesIntCol: is the column with the training data as int values
:param classesNameCol: is the column with the training data as string class names
:param variables: is an array of column names which are to be used for the classification
:param classifier: is an instance of a scikit-learn classifier (e.g., RandomForests which is Default)
:param outColInt: is the output column name for the int class representation (Default: 'OutClass')
:param outColStr: is the output column name for the class names column (Default: 'OutClassName')
:param roiCol: is a column name for a column which specifies the region to be classified. If None ignored (Default: None)
:param roiVal: is a int value used within the roiCol to select a region to be classified (Default: 1)
:param classColours: is a python dict using the class name as the key along with arrays of length 3 specifying the RGB colours for the class.
:param preProcessor: is a scikit-learn processors such as sklearn.preprocessing.MaxAbsScaler() which can rescale the input variables independently as read in (Define: None; i.e., not in use).
:param justFit: is a boolean specifying that the classifier should just be fitted to the data and not applied (Default: False; i.e., apply classification)


Example::

    from sklearn.ensemble import ExtraTreesClassifier
    from rsgislib.classification import classratutils
    
    classifier = ExtraTreesClassifier(n_estimators=100, max_features=3, n_jobs=-1, verbose=0)
    
    classColours = dict()
    classColours['Forest'] = [0,138,0]
    classColours['NonForest'] = [200,200,200]
    
    variables = ['GreenAvg', 'RedAvg', 'NIR1Avg', 'NIR2Avg', 'NDVI']
    classifyWithinRAT(clumpsImg, classesIntCol, classesNameCol, variables, classifier=classifier, classColours=classColours)
    
    from sklearn.preprocessing import MaxAbsScaler
    
    # With pre-processor
    classifyWithinRAT(clumpsImg, classesIntCol, classesNameCol, variables, classifier=classifier, classColours=classColours, preProcessor=MaxAbsScaler())

"""
    # Check gdal is available
    if not haveGDALPy:
        raise Exception(
            "The GDAL python bindings required for this function could not be imported\n\t"
            + gdalErr)
    # Check numpy is available
    if not haveNumpy:
        raise Exception(
            "The numpy module is required for this function could not be imported\n\t"
            + numErr)
    # Check rios rat is available
    if not haveRIOSRat:
        raise Exception(
            "The RIOS rat tools are required for this function could not be imported\n\t"
            + riosRatErr)
    # Check scikit-learn RF is available
    if not haveSKLearnRF:
        raise Exception(
            "The scikit-learn random forests tools are required for this function could not be imported\n\t"
            + sklearnRFErr)
    # Check scikit-learn pre-processing is available
    if not haveSKLearnPreProcess:
        raise Exception(
            "The scikit-learn pre-processing tools are required for this function could not be imported\n\t"
            + sklearnPreProcessErr)

    ratDataset = gdal.Open(clumpsImg, gdal.GA_Update)
    numpyVars = []
    for var in variables:
        print("Reading " + var)
        tmpArr = rat.readColumn(ratDataset, var)
        if not preProcessor is None:
            tmpArr = tmpArr.reshape(-1, 1)
            tmpArr = preProcessor.fit_transform(tmpArr)
            tmpArr = tmpArr.reshape(-1)
        numpyVars.append(tmpArr)

    # Read in training classes
    classesInt = rat.readColumn(ratDataset, classesIntCol)
    classesStr = rat.readColumn(ratDataset, classesNameCol)

    roi = None
    if not ((roiCol == None) or (roiCol == "")):
        roi = rat.readColumn(ratDataset, roiCol)

    # Set up output array
    outLabels = numpy.zeros_like(classesInt, dtype=numpy.int16)
    outClassNames = numpy.empty_like(classesInt, dtype=numpy.dtype('a255'))
    ID = numpy.arange(outLabels.shape[0])

    xData = numpy.array(numpyVars)
    xData = xData.transpose()
    xData = numpy.where(numpy.isfinite(xData), xData, 0)

    print("Input data size: {} x {}".format(xData.shape[0], xData.shape[1]))

    trainingData = xData[numpy.isfinite(xData).all(axis=1)]
    classesInt = classesInt[numpy.isfinite(xData).all(axis=1)]
    classesStr = classesStr[numpy.isfinite(xData).all(axis=1)]
    ID = ID[numpy.isfinite(xData).all(axis=1)]

    trainingData = trainingData[classesInt > 0]
    classesStr = classesStr[classesInt > 0]
    classesInt = classesInt[classesInt > 0]

    print("Training data size: {} x {}".format(trainingData.shape[0],
                                               trainingData.shape[1]))

    print('Training Classifier')
    classifier.fit(trainingData, classesInt)

    print('Calc Classifier Accuracy')
    accVal = classifier.score(trainingData, classesInt)
    print('Classifier Score = {}'.format(round(accVal * 100, 2)))

    if not justFit:
        if not roi is None:
            xData = xData[roi == roiVal]
            ID = ID[roi == roiVal]
            print("ROI Subsetted data size: {} x {}".format(
                xData.shape[0], xData.shape[1]))

        predClass = classifier.predict(xData)

        outLabels[ID] = predClass

        print("Writing Columns")
        rat.writeColumn(ratDataset, outColInt, outLabels)

        print("Create and Write Output Class Names")
        classNames = numpy.unique(classesStr)
        classes = numpy.zeros_like(classNames, dtype=numpy.int16)

        i = 0
        classNameIDs = dict()
        for className in classNames:
            classNameStr = str(className.decode())
            if not classNameStr is '':
                classes[i] = classesInt[classesStr == className][0]
                classNameIDs[classNameStr] = classes[i]
                print("Class \'" + classNameStr + "\' has numerical " +
                      str(classes[i]))
                i = i + 1

        outClassNames[...] = ''
        for className in classNameIDs:
            classID = classNameIDs[className]
            outClassNames[outLabels == classID] = className

        rat.writeColumn(ratDataset, outColStr, outClassNames)

        if not classColours is None:
            print("Set Colours")
            red = rat.readColumn(ratDataset, "Red")
            green = rat.readColumn(ratDataset, "Green")
            blue = rat.readColumn(ratDataset, "Blue")

            # Set Background to black
            red[...] = 0
            green[...] = 0
            blue[...] = 0

            # Set colours
            for className in classNameIDs:
                print("Colouring class " + className)
                classID = classNameIDs[className]
                colours = classColours[className]

                red = numpy.where(outLabels == classID, colours[0], red)
                green = numpy.where(outLabels == classID, colours[1], green)
                blue = numpy.where(outLabels == classID, colours[2], blue)

            rat.writeColumn(ratDataset, "Red", red)
            rat.writeColumn(ratDataset, "Green", green)
            rat.writeColumn(ratDataset, "Blue", blue)

    ratDataset = None
Exemple #28
0
def perform_voting_classification(skClassifiers,
                                  trainSamplesInfo,
                                  imgFileInfo,
                                  classAreaMask,
                                  classMaskPxlVal,
                                  tmpDIR,
                                  tmpImgBase,
                                  outClassImg,
                                  gdalformat='KEA',
                                  numCores=-1):
    """
A function which will perform a number of classification creating a combined classification by a simple vote.
The classifier parameters can be differed as a list of classifiers is provided (the length of the list is equal
to the number of votes), where the training data is resampled for each classifier. The analysis can be performed
using multiple processing cores.

Where:

:param skClassifiers: a list of classifiers (from scikit-learn), the number of classifiers defined
                      will be equal to the number of votes.
:param trainSamplesInfo: a list of rsgislib.classification.classimgutils.SamplesInfoObj objects used to
                         parameters the classifer and extract training data.
:param imgFileInfo: a list of rsgislib.imageutils.ImageBandInfo objects (also used within
                    rsgislib.imageutils.extractZoneImageBandValues2HDF) to identify which images and bands are
                    to be used for the classification so it adheres to the training data.
:param classAreaMask: a mask image which is used to specified the areas of the scene which are to be classified.
:param classMaskPxlVal: is the pixel value within the classAreaMask image for the areas of the image
                        which are to be classified.
:param tmpDIR: a temporary file location which will be created and removed during processing.
:param tmpImgBase: the same name of files written to the tmpDIR
:param outClassImg: the final output image file.
:param gdalformat: the output file format for outClassImg
:param numCores: is the number of processing cores to be used for the analysis (if -1 then all cores on the machine will be used).

Example::

    classVoteTemp = os.path.join(imgTmp, 'ClassVoteTemp')

    imgFileInfo = [rsgislib.imageutils.ImageBandInfo(img2010dB, 'sardb', [1,2]), rsgislib.imageutils.ImageBandInfo(imgSRTM, 'srtm', [1])]
    trainSamplesInfo = []
    trainSamplesInfo.append(SamplesInfoObj(className='Water', classID=1, maskImg=classTrainRegionsMask, maskPxlVal=1, outSampImgFile='WaterSamples.kea', numSamps=500, samplesH5File='WaterSamples_pxlvals.h5', red=0, green=0, blue=255))
    trainSamplesInfo.append(SamplesInfoObj(className='Land', classID=2, maskImg=classTrainRegionsMask, maskPxlVal=2, outSampImgFile='LandSamples.kea', numSamps=500, samplesH5File='LandSamples_pxlvals.h5', red=150, green=150, blue=150))
    trainSamplesInfo.append(SamplesInfoObj(className='Mangroves', classID=3, maskImg=classTrainRegionsMask, maskPxlVal=3, outSampImgFile='MangroveSamples.kea', numSamps=500, samplesH5File='MangroveSamples_pxlvals.h5', red=0, green=153, blue=0))

    skClassifiers = []
    for i in range(5):
        skClassifiers.append(ExtraTreesClassifier(n_estimators=50))

    for i in range(5):
        skClassifiers.append(ExtraTreesClassifier(n_estimators=100))

    for i in range(5):
        skClassifiers.append(ExtraTreesClassifier(n_estimators=50, max_depth=2))

    for i in range(5):
        skClassifiers.append(ExtraTreesClassifier(n_estimators=100, max_depth=2))

    mangroveRegionClassImg = MangroveRegionClass.kea
    classsklearn.perform_voting_classification(skClassifiers, trainSamplesInfo, imgFileInfo, classWithinMask, 1, classVoteTemp, 'ClassImgSample', mangroveRegionClassImg, gdalformat='KEA', numCores=-1)

    """
    def _apply_voting_classifier(inParams):
        """
        Internal function which is used by performVotingClassification
        """

        skClassifier = inParams['skClassifier']
        cTmpDIR = inParams['cTmpDIR']
        classAreaMask = inParams['classAreaMask']
        classMaskPxlVal = inParams['classMaskPxlVal']
        imgFileInfo = inParams['imgFileInfo']
        tmpClassImgOut = inParams['tmpClassImgOut']
        gdalformat = inParams['gdalformat']
        trainSamplesInfo = inParams['trainSamplesInfo']
        rndSeed = inParams['rndSeed']

        classTrainInfo = dict()
        for trainSamples in trainSamplesInfo:
            rsgislib.imageutils.performRandomPxlSampleInMaskLowPxlCount(
                inputImage=trainSamples.maskImg,
                outputImage=os.path.join(cTmpDIR, trainSamples.outSampImgFile),
                gdalformat=gdalformat,
                maskvals=[trainSamples.maskPxlVal],
                numSamples=trainSamples.numSamps,
                rndSeed=rndSeed)
            rsgislib.imageutils.extractZoneImageBandValues2HDF(
                imgFileInfo, os.path.join(cTmpDIR,
                                          trainSamples.outSampImgFile),
                os.path.join(cTmpDIR, trainSamples.samplesH5File),
                trainSamples.maskPxlVal)
            classTrainInfo[trainSamples.className] = ClassSimpleInfoObj(
                id=trainSamples.classID,
                fileH5=os.path.join(cTmpDIR, trainSamples.samplesH5File),
                red=trainSamples.red,
                green=trainSamples.green,
                blue=trainSamples.blue)

        train_sklearn_classifier(classTrainInfo, skClassifier)
        apply_sklearn_classifer(classTrainInfo, skClassifier, classAreaMask,
                                classMaskPxlVal, imgFileInfo, tmpClassImgOut,
                                gdalformat)

    rsgisUtils = rsgislib.RSGISPyUtils()

    if type(skClassifiers) is not list:
        raise rsgislib.RSGISPyException(
            "A list of classifiers must be provided")

    numOfVotes = len(skClassifiers)

    if numCores <= 0:
        numCores = rsgisUtils.numProcessCores()

    tmpPresent = True
    if not os.path.exists(tmpDIR):
        os.makedirs(tmpDIR)
        tmpPresent = False

    outClassImgs = []
    mCoreParams = []
    dirs2DEL = []
    rndGen = random.seed()
    for i in range(numOfVotes):
        cTmpDIR = os.path.join(tmpDIR, str(i))
        if os.path.exists(cTmpDIR):
            shutil.rmtree(cTmpDIR, ignore_errors=True)
        os.makedirs(cTmpDIR)
        dirs2DEL.append(cTmpDIR)

        tmpClassImgOut = os.path.join(tmpDIR,
                                      tmpImgBase + '_' + str(i) + '.kea')
        outClassImgs.append(tmpClassImgOut)
        inParams = dict()
        inParams['skClassifier'] = skClassifiers[i]
        inParams['cTmpDIR'] = cTmpDIR
        inParams['classAreaMask'] = classAreaMask
        inParams['classMaskPxlVal'] = classMaskPxlVal
        inParams['imgFileInfo'] = imgFileInfo
        inParams['tmpClassImgOut'] = tmpClassImgOut
        inParams['gdalformat'] = 'KEA'
        inParams['trainSamplesInfo'] = trainSamplesInfo
        inParams['rndSeed'] = random.randrange(1000)
        mCoreParams.append(inParams)

    # Run processing on multiple cores.
    mProccesPool = Pool(numCores)
    mProccesPool.map(_apply_voting_classifier, mCoreParams)

    # Combine results using MODE.
    rsgislib.imagecalc.calcMultiImgBandStats(outClassImgs, outClassImg,
                                             rsgislib.SUMTYPE_MODE, gdalformat,
                                             rsgislib.TYPE_8UINT, 0, True)
    rsgislib.rastergis.populateStats(clumps=outClassImg,
                                     addclrtab=True,
                                     calcpyramids=True,
                                     ignorezero=True)

    # Colour output classification image.
    ratDataset = gdal.Open(outClassImg, gdal.GA_Update)
    red = rat.readColumn(ratDataset, 'Red')
    green = rat.readColumn(ratDataset, 'Green')
    blue = rat.readColumn(ratDataset, 'Blue')
    ClassName = numpy.empty_like(red, dtype=numpy.dtype('a255'))

    for trainSample in trainSamplesInfo:
        print("Apply Colour to class \'" + trainSample.className + "\'")
        red[trainSample.classID] = trainSample.red
        green[trainSample.classID] = trainSample.green
        blue[trainSample.classID] = trainSample.blue
        ClassName[trainSample.classID] = trainSample.className

    rat.writeColumn(ratDataset, "Red", red)
    rat.writeColumn(ratDataset, "Green", green)
    rat.writeColumn(ratDataset, "Blue", blue)
    rat.writeColumn(ratDataset, "ClassName", ClassName)
    ratDataset = None

    if not tmpPresent:
        shutil.rmtree(tmpDIR, ignore_errors=True)
    else:
        for cDIR in dirs2DEL:
            shutil.rmtree(cDIR, ignore_errors=True)
Exemple #29
0
                        outSelectCol='NDVISamplingMang',
                        propOfSample=0.1,
                        binWidth=0.01,
                        classColumn='Class',
                        classVal='2')
rastergis.histoSampling(clumps=clumpsImg,
                        varCol='NDVI',
                        outSelectCol='NDVISamplingOther',
                        propOfSample=0.05,
                        binWidth=0.01,
                        classColumn='Class',
                        classVal='3')

print("Open GDAL Dataset")
ratDataset = gdal.Open(clumpsImg, gdal.GA_Update)
HHSamplingWater = rat.readColumn(ratDataset, "HHSamplingWater")
HHSamplingMang = rat.readColumn(ratDataset, "HHSamplingMang")
HHSamplingOther = rat.readColumn(ratDataset, "HHSamplingOther")
NDVISamplingWater = rat.readColumn(ratDataset, "NDVISamplingWater")
NDVISamplingMang = rat.readColumn(ratDataset, "NDVISamplingMang")
NDVISamplingOther = rat.readColumn(ratDataset, "NDVISamplingOther")
Training = numpy.empty_like(HHSamplingWater, dtype=int)
Training[...] = 0
Training = numpy.where(((HHSamplingWater == 1) | (HHSamplingMang == 1) |
                        (HHSamplingOther == 1) | (NDVISamplingWater == 1) |
                        (NDVISamplingMang == 1) | (NDVISamplingOther == 1)), 1,
                       Training)
# Export column to RAT
rat.writeColumn(ratDataset, "Training", Training)
ratDataset = None
Exemple #30
0
def calcClearSkyRegions(cloudsImg, validAreaImg, outputClearSkyMask, outFormat, tmpPath='./tmpClearSky', deleteTmpFiles=True, initClearSkyRegionDist=5000, initClearSkyRegionMinSize=3000, finalClearSkyRegionDist=1000, morphSize=21):
    """
Given a cloud mask, identify the larger extent regions of useful clear-sky regions.

:param cloudsImg: An image with the input mask of the cloud (pixel == 1) and shadow (pixel == 2)
:param validAreaImg: A mask of the image data area (1 = valid and 0 = not-valid; i.e., outside of the data area)
:param outputClearSkyMask: The output mask of the clear sky areas
:param outFormat: The output image format.
:param tmpPath: The path for temporay images produced during the processing to be stored (Default: './tmpClearSky'; Note. all temp files are generated as KEA files).
:param deleteTmpFiles: Boolean as to whether the intermediate files should be deleted following processing (Default: True - delete files).
:param initClearSkyRegionDist: The distance in metres from a cloud/shadow object for the initial identification of clear sky regions (Default: 5000)
:param initClearSkyRegionMinSize: The minimum size (in pixels) of the initial clear sky regions (Default: 3000 pixels)
:param finalClearSkyRegionDist: The distance in metres from a cloud/shadow object for the final boundaries of the clear sky regions (Default: 1000)
:param morphSize: the size of the circular morphological operator used to tidy up the result (Default: 21)

Example::

    import rsgislib.imagecalibration
    cloudsImg = "./Outputs/LS8_20160605_lat52lon261_r24p203_clouds.kea"
    validAreaImg = "./Outputs/LS8_20160605_lat52lon261_r24p203_valid.kea"
    outputMask = "./Outputs/LS8_20160605_lat52lon261_r24p203_openskyvalid.kea"
    tmpPath = "./temp"
    rsgislib.imagecalibration.calcClearSkyRegions(cloudsImg, validAreaImg, outputMask, 'KEA', tmpPath)

"""
    
    import rsgislib
    import rsgislib.imagecalc
    import rsgislib.imageutils
    import rsgislib.segmentation
    import rsgislib.rastergis
    import rsgislib.vectorutils
    import rsgislib.imagemorphology
    import os.path
    import osgeo.gdal as gdal
    from rios import rat
    import numpy
    
    if morphSize % 2 == 0:
        raise rsgislib.RSGISPyException("The size of the morphology operator must be odd.")
    
    baseDataName = os.path.splitext(os.path.basename(cloudsImg))[0]  
    tmpCloudsImgDist2Clouds = os.path.join(tmpPath, baseDataName+"_dist2clouds.kea")
    tmpCloudsImgDist2CloudsNoData = os.path.join(tmpPath, baseDataName+"_dist2clouds_masked.kea")
    tmpInitClearSkyRegions = os.path.join(tmpPath, baseDataName+"initclearsky.kea")
    tmpInitClearSkyRegionsClumps = os.path.join(tmpPath, baseDataName+"initclearskyClumps.kea")
    tmpInitClearSkyRegionsRmSmall = os.path.join(tmpPath, baseDataName+"initclearskyClumpsRMSmall.kea")
    tmpInitClearSkyRegionsFinal = os.path.join(tmpPath, baseDataName+"initclearskyClumpsFinal.kea")
    tmpClearSkyRegionsFullExtent = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtent.kea")
    tmpClearSkyRegionsFullExtentClumps = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtentClumps.kea")
    tmpClearSkyRegionsFullExtentSelectClumps = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtentSelectClumps.kea")
    tmpClearSkyRegionsFullExtentSelectClumpsOpen = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtentSelectClumpsOpen.kea")
    tmpClearSkyRegionsFullExtentSelectClumpsOpenClump = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtentSelectClumpsOpenClump.kea")
    tmpClearSkyRegionsFullExtentSelectClumpsOpenClumpRMSmall = os.path.join(tmpPath, baseDataName+"clearskyClumpsFullExtentSelectClumpsOpenClumpRMSmall.kea")
    tmpMorphOperator = os.path.join(tmpPath, 'CircularMorphOp.gmtxt')
    
    rsgislib.imagecalc.calcDist2ImgVals(cloudsImg, tmpCloudsImgDist2Clouds, pxlVals=[1,2])
        
    rsgislib.imageutils.maskImage(tmpCloudsImgDist2Clouds, validAreaImg, tmpCloudsImgDist2CloudsNoData, 'KEA', rsgislib.TYPE_32INT, -1, 0)    
            
    rsgislib.imagecalc.imageMath(tmpCloudsImgDist2CloudsNoData, tmpInitClearSkyRegions, 'b1 > '+str(initClearSkyRegionDist), outFormat, rsgislib.TYPE_32UINT)
    
    rsgislib.segmentation.clump(tmpInitClearSkyRegions, tmpInitClearSkyRegionsClumps, 'KEA', False, 0.0, False)
    
    rsgislib.rastergis.populateStats(tmpInitClearSkyRegionsClumps, True, True)
    
    rsgislib.segmentation.rmSmallClumps(tmpInitClearSkyRegionsClumps, tmpInitClearSkyRegionsRmSmall, initClearSkyRegionMinSize, 'KEA')
    
    rsgislib.segmentation.relabelClumps(tmpInitClearSkyRegionsRmSmall, tmpInitClearSkyRegionsFinal, 'KEA', False)
    
    rsgislib.rastergis.populateStats(tmpInitClearSkyRegionsFinal, True, True)
    
    rsgislib.imagecalc.imageMath(tmpCloudsImgDist2CloudsNoData, tmpClearSkyRegionsFullExtent, 'b1 > '+str(finalClearSkyRegionDist), outFormat, rsgislib.TYPE_32UINT)
    
    rsgislib.segmentation.clump(tmpClearSkyRegionsFullExtent, tmpClearSkyRegionsFullExtentClumps, 'KEA', False, 0.0, False)
    
    rsgislib.rastergis.populateStats(tmpClearSkyRegionsFullExtentClumps, True, True)
    
    rsgislib.rastergis.populateRATWithStats(tmpInitClearSkyRegionsFinal, tmpClearSkyRegionsFullExtentClumps, [rsgislib.rastergis.BandAttStats(band=1, maxField='InitRegionInter')])
    
    ratDataset = gdal.Open( tmpClearSkyRegionsFullExtentClumps, gdal.GA_Update )
    InitRegionInter = rat.readColumn(ratDataset, "InitRegionInter")
    ValidClumps = numpy.zeros_like(InitRegionInter, dtype=numpy.dtype('int'))
    ValidClumps[InitRegionInter>0] = 1
    rat.writeColumn(ratDataset, "ValidClumps", ValidClumps)
    ratDataset = None
    
    rsgislib.rastergis.collapseRAT(tmpClearSkyRegionsFullExtentClumps, 'ValidClumps', tmpClearSkyRegionsFullExtentSelectClumps, 'KEA', 1)
    
    rsgislib.rastergis.populateStats(tmpClearSkyRegionsFullExtentSelectClumps, True, True)
    
    rsgislib.imagemorphology.createCircularOp(outputFile=tmpMorphOperator, opSize=morphSize)
    
    rsgislib.imagemorphology.imageOpening(inputImage=tmpClearSkyRegionsFullExtentSelectClumps, outputImage=tmpClearSkyRegionsFullExtentSelectClumpsOpen, tempImage="", morphOperator=tmpMorphOperator, useOpFile=True, opSize=21, gdalformat='KEA', datatype=rsgislib.TYPE_32UINT)
    
    rsgislib.segmentation.clump(tmpClearSkyRegionsFullExtentSelectClumpsOpen, tmpClearSkyRegionsFullExtentSelectClumpsOpenClump, 'KEA', False, 0.0, False)
    
    rsgislib.rastergis.populateStats(tmpClearSkyRegionsFullExtentSelectClumpsOpenClump, True, True)

    rsgislib.segmentation.rmSmallClumps(tmpClearSkyRegionsFullExtentSelectClumpsOpenClump, tmpClearSkyRegionsFullExtentSelectClumpsOpenClumpRMSmall, initClearSkyRegionMinSize, 'KEA')
    
    rsgislib.imagecalc.imageMath(tmpClearSkyRegionsFullExtentSelectClumpsOpenClumpRMSmall, outputClearSkyMask, "b1>0?1:0", outFormat, rsgislib.TYPE_8UINT)
        
    if deleteTmpFiles:
        rsgisUtils = rsgislib.RSGISPyUtils()
        rsgisUtils.deleteFileWithBasename(tmpCloudsImgDist2Clouds)
        rsgisUtils.deleteFileWithBasename(tmpCloudsImgDist2CloudsNoData)
        rsgisUtils.deleteFileWithBasename(tmpInitClearSkyRegions)
        rsgisUtils.deleteFileWithBasename(tmpInitClearSkyRegionsClumps)
        rsgisUtils.deleteFileWithBasename(tmpInitClearSkyRegionsRmSmall)
        rsgisUtils.deleteFileWithBasename(tmpInitClearSkyRegionsFinal)
        rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtent)
        rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtentClumps)
        rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtentSelectClumps)
        rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtentSelectClumpsOpen)
        rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtentSelectClumpsOpenClump)
        rsgisUtils.deleteFileWithBasename(tmpClearSkyRegionsFullExtentSelectClumpsOpenClumpRMSmall)
        rsgisUtils.deleteFileWithBasename(tmpMorphOperator)
def applyClassifer(classTrainInfo,
                   skClassifier,
                   imgMask,
                   imgMaskVal,
                   imgFileInfo,
                   outputImg,
                   gdalformat,
                   classClrNames=True):
    """
This function uses a trained classifier and applies it to the provided input image.

:param classTrainInfo: dict (where the key is the class name) of ClassInfoObj objects which will be used to train the classifier (i.e., trainClassifier()), provide pixel value id and RGB class values.
:param skClassifier: a trained instance of a scikit-learn classifier (e.g., use trainClassifier or findClassifierParametersAndTrain)
:param imgMask: is an image file providing a mask to specify where should be classified. Simplest mask is all the valid data regions (rsgislib.imageutils.genValidMask)
:param imgMaskVal: the pixel value within the imgMask to limit the region to which the classification is applied. Can be used to create a heirachical classification.
:param imgFileInfo: a list of rsgislib.imageutils.ImageBandInfo objects (also used within rsgislib.imageutils.extractZoneImageBandValues2HDF) to identify which images and bands are to be used for the classification so it adheres to the training data.
:param outputImg: output image file with the classification. Note. by default a colour table and class names column is added to the image. If an error is produced use HFA or KEA formats.
:param gdalformat: is the output image format - all GDAL supported formats are supported.
:param classClrNames: default is True and therefore a colour table will the colours specified in classTrainInfo and a ClassName column (from imgFileInfo) will be added to the output file.

    """
    if not haveRIOS:
        raise Exception(
            "The rios module is required for this function could not be imported\n\t"
            + riosErr)

    infiles = applier.FilenameAssociations()
    infiles.imageMask = imgMask
    numClassVars = 0
    for imgFile in imgFileInfo:
        infiles.__dict__[imgFile.name] = imgFile.fileName
        numClassVars = numClassVars + len(imgFile.bands)

    outfiles = applier.FilenameAssociations()
    outfiles.outimage = outputImg
    otherargs = applier.OtherInputs()
    otherargs.classifier = skClassifier
    otherargs.mskVal = imgMaskVal
    otherargs.numClassVars = numClassVars
    otherargs.imgFileInfo = imgFileInfo

    aControls = applier.ApplierControls()
    aControls.progress = cuiprogress.CUIProgressBar()
    aControls.drivername = gdalformat
    aControls.omitPyramids = True
    aControls.calcStats = False
    print("Applying the Classifier")
    applier.apply(_applySKClassifier,
                  infiles,
                  outfiles,
                  otherargs,
                  controls=aControls)
    print("Completed")
    rsgislib.rastergis.populateStats(clumps=outputImg,
                                     addclrtab=True,
                                     calcpyramids=True,
                                     ignorezero=True)

    if classClrNames:
        ratDataset = gdal.Open(outputImg, gdal.GA_Update)
        red = rat.readColumn(ratDataset, 'Red')
        green = rat.readColumn(ratDataset, 'Green')
        blue = rat.readColumn(ratDataset, 'Blue')
        ClassName = numpy.empty_like(red, dtype=numpy.dtype('a255'))

        for classKey in classTrainInfo:
            print("Apply Colour to class \'" + classKey + "\'")
            red[classTrainInfo[classKey].id] = classTrainInfo[classKey].red
            green[classTrainInfo[classKey].id] = classTrainInfo[classKey].green
            blue[classTrainInfo[classKey].id] = classTrainInfo[classKey].blue
            ClassName[classTrainInfo[classKey].id] = classKey

        rat.writeColumn(ratDataset, "Red", red)
        rat.writeColumn(ratDataset, "Green", green)
        rat.writeColumn(ratDataset, "Blue", blue)
        rat.writeColumn(ratDataset, "ClassName", ClassName)

        ratDataset = None