コード例 #1
0
def extractImgPxlSample(inputImg, pxlNSample, noData=None):

	# Import the RIOS image reader
	from rios.imagereader import ImageReader

	first = True
	reader = ImageReader(inputImg, windowxsize=200, windowysize=200)
	print('Started .0.', end='', flush=True)
	outCount = 10
	for (info, block) in reader:
		if info.getPercent() > outCount:
			print('.'+str(int(outCount))+'.', end='', flush=True)
			outCount = outCount + 10

		blkShape = block.shape
		blkBands = block.reshape((blkShape[0], (blkShape[1]*blkShape[2])))

		blkBandsTrans = numpy.transpose(blkBands)

		if noData is not None:
			blkBandsTrans = blkBandsTrans[(blkBandsTrans!=noData).all(axis=1)]

		if blkBandsTrans.shape[0] > 0:
			nSamp = int((blkBandsTrans.shape[0])/pxlNSample)
			nSampRange = numpy.arange(0, nSamp, 1)*pxlNSample
			blkBandsTransSamp = blkBandsTrans[nSampRange]

			if first:
				outArr = blkBandsTransSamp
				first = False
			else:
				outArr = numpy.concatenate((outArr, blkBandsTransSamp), axis=0)
	print('. Completed')
	return outArr
コード例 #2
0
ファイル: gdaldriver.py プロジェクト: rcjetpilot/LIDAR-1
 def getData(self):
     """
     Read a 3d numpy array with data for the current extent
     """
     if self.mode == basedriver.CREATE:
         msg = 'Can only read raster data in READ or UPDATE modes'
         raise GDALException(msg)
     
     numpyType = imageio.GDALTypeToNumpyType(self.gdalType)
     # use RIOS to do the hard work
     data = ImageReader.readBlockWithMargin(self.ds, self.blockxcoord, 
                     self.blockycoord, self.blockxsize, self.blockysize, numpyType,
                     self.controls.overlap, self.nullValList)
     return data
コード例 #3
0
ファイル: __init__.py プロジェクト: ashutoshkumarjha/rsgislib
def countPxlsOfVal(inputImg, vals=[0]):
    """
Function which counts the number of pixels of a set of values returning a list in the same order as the list of values provided.

:param inputImg: the input image
:param vals: is a list of pixel values to be counted

"""
    if len(vals) == 0:
        raise Exception(
            'At least 1 value should be provided within the vals input varable.'
        )
    numVals = len(vals)
    outVals = numpy.zeros(numVals, dtype=numpy.int64)

    from rios.imagereader import ImageReader

    reader = ImageReader(inputImg)
    for (info, block) in reader:
        for idx in range(numVals):
            outVals[idx] = outVals[idx] + (block == vals[idx]).sum()

    return outVals
コード例 #4
0
#!/usr/bin/env python

import sys
from scipy import ndimage
from rios.imagereader import ImageReader
from rios.imagewriter import ImageWriter

inImage = sys.argv[1]
outImage = sys.argv[2]

reader = ImageReader(inImage)
writer = None
for (info, block) in reader:
    out = block * 2
    if writer is None:
        writer = ImageWriter(outImage,
                             info=info,
                             firstblock=out,
                             drivername='HFA',
                             creationoptions=['COMPRESSED=TRUE'])
    else:
        writer.write(out)

writer.close(calcStats=True)
コード例 #5
0
import optparse

from rios.imagereader import ImageReader
from rios.imagewriter import ImageWriter

inputs = list()
inputs.append(
    "/Users/pete/Temp/Hyperforest/Kersselaerspleyn_LiDAR_05m_pmfgrd_chmNN_median5_morphgrad_minima.env"
)
inputs.append(
    "/Users/pete/Temp/Hyperforest/Kersselaerspleyn_LiDAR_05m_pmfgrd_chmNN_median5_morphgrad.env"
)

outfile = "/Users/pete/Temp/Hyperforest/Kersselaerspleyn_LiDAR_05m_pmfgrd_chmNN_median5_watershed.img"

reader = ImageReader(inputs, windowxsize=1000, windowysize=1000, overlap=100)
writer = None
# read through each block and apply scaling
# and write into output file
for (info, blocks) in reader:
    block1, block2 = blocks
    seeds = np.int32(block1)
    grad = np.uint16(block2)

    out = np.expand_dims(ndimage.watershed_ift(grad[0], seeds[0]), 0)

    if writer is None:
        writer = ImageWriter(outfile, info=info, firstblock=out)
    else:
        writer.write(out)
    print info.getPercent(), '%\r',
コード例 #6
0
def label_pxl_sample_chips(sample_pxls_img, cls_msk_img, output_image, gdalformat, chip_size, cls_lut,
                           sample_pxl_img_band=1, cls_msk_img_band=1):
    """
    A function which labels image pixels based on the proportions of a class within a chip around the
    pixel (can be used in combination with rsgislib.imageutils.assign_random_pxls). It is expected that
    this function will be used when trying to use existing maps to create deep learning chip classification
    training data.

    Pixels are labelled if the proportion of pixels is >= the threshold provided in the LUT. If more than
    one class meets the threshold then the one with the highest proportion is assigned.

    :param sample_pxls_img: The input binary image with the pixel locations (value == 1)
    :param cls_msk_img: The classification image used to assign the output pixel values.
    :param output_image: The output image. Single pixels with the class value will be outputted.
    :param gdalformat: The output image file format.
    :param chip_size: The size of the chip used to identify the class - would probably correspond
                      to the chip size being used for the deep learning classification. Areas used
                      is half the chip size around the pixel (i.e., the pixel from the samples image
                      will be at the centre of the chip).
    :param cls_lut: A dict look up table (LUT) with the thresholds per class for the pixel to be
                    classified as that class.
    :param sample_pxl_img_band: Default 1. The image band in the sample image.
    :param cls_msk_img_band: Default 1. The image band in the sample image.

    Example::

        sample_pxls_img = 'LS5TM_20000108_latn531lonw37_r23p204_osgb_samples.kea'
        cls_msk_img = 'LS5TM_20000108_latn531lonw37_r23p204_osgb_clouds_up.kea'
        output_image = 'LS5TM_20000108_latn531lonw37_r23p204_osgb_samples_lbld.kea'

        cls_lut = dict()
        cls_lut[1] = 0.2
        cls_lut[2] = 0.2
        cls_lut[3] = 0.99

        label_pxl_sample_chips(sample_pxls_img, cls_msk_img, output_image, 'KEA', 21, cls_lut)

    """
    import rsgislib.rastergis
    from rios.imagereader import ImageReader
    from rios.imagewriter import ImageWriter
    import tqdm
    import numpy
    import math

    chip_size_odd = False
    if (chip_size % 2) != 0:
        chip_size_odd = True

    img_win_h_size = math.floor(chip_size / 2)
    img_win_size = chip_size
    n_pxls = img_win_size * img_win_size

    inImgs = list()
    inImgBands = list()
    inImgs.append(sample_pxls_img)
    inImgBands.append([sample_pxl_img_band])
    inImgs.append(cls_msk_img)
    inImgBands.append([cls_msk_img_band])

    writer = None
    reader = ImageReader(inImgs, windowxsize=200, windowysize=200, overlap=img_win_h_size, layerselection=inImgBands)
    for (info, block) in tqdm.tqdm(reader):
        samples_msk_arr = block[0]
        blk_shp = samples_msk_arr.shape

        xSize = blk_shp[2] - (img_win_h_size * 2)
        ySize = blk_shp[1] - (img_win_h_size * 2)
        xRange = numpy.arange(img_win_h_size, img_win_h_size + xSize, 1)
        yRange = numpy.arange(img_win_h_size, img_win_h_size + ySize, 1)
        out_samp_arr = numpy.zeros_like(samples_msk_arr, dtype=numpy.uint8)
        for y in yRange:
            yMin = y - img_win_h_size
            yMax = y + img_win_h_size
            if chip_size_odd:
                yMax += 1
            for x in xRange:
                xMin = x - img_win_h_size
                xMax = x + img_win_h_size
                if chip_size_odd:
                    xMax += 1
                if samples_msk_arr[0][y][x] == 1:
                    img_blk = block[1][0, yMin:yMax, xMin:xMax]
                    uniq_vals, uniq_counts = numpy.unique(img_blk, return_counts=True)
                    uniq_dict = dict(zip(uniq_vals, uniq_counts))
                    first = True
                    for val in uniq_vals:
                        if val in cls_lut:
                            val_prop = uniq_dict[val] / n_pxls
                            if val_prop >= cls_lut[val]:
                                if first:
                                    max_val = val
                                    max_val_prop = val_prop
                                    first = False
                                elif val_prop > max_val_prop:
                                    max_val = val
                                    max_val_prop = val_prop
                    if not first:
                        out_samp_arr[0][y][x] = max_val

        if writer is None:
            writer = ImageWriter(output_image, info=info, firstblock=out_samp_arr, drivername=gdalformat)
        else:
            writer.write(out_samp_arr)
    writer.close(calcStats=False)

    rsgislib.rastergis.populateStats(output_image, True, True, True)
コード例 #7
0
def apply_keras_chips_pixel_classifier(classTrainInfo,
                                       keras_cls_mdl,
                                       imgMask,
                                       imgMaskVal,
                                       imgFileInfo,
                                       chip_h_size,
                                       outClassImg,
                                       gdalformat,
                                       pred_batch_size=128,
                                       pred_max_queue_size=10,
                                       pred_workers=1,
                                       pred_use_multiprocessing=False,
                                       classClrNames=True):
    """
This function applies a trained single pixel keras model to an image. The function train_keras_pixel_classifer
can be used to train such as model. The output image will contain the hard membership of the predicted class.

For pred_batch_size, pred_max_queue_size, pred_workers and pred_use_multiprocessing options see the keras
documentation https://keras.io/models/model/

:param classTrainInfo: dict (where the key is the class name) of rsgislib.classification.ClassInfoObj
                       objects which will be used to train the classifier (i.e., train_keras_pixel_classifer()),
                       provide pixel value id and RGB class values.
:param keras_cls_mdl: a trained keras model object, with a input dimensions equivlent to the number of image
                      bands specified in the imgFileInfo input and output layer which provides an output array
                      of the length of the number of classes.
:param imgMask: is an image file providing a mask to specify where should be classified. Simplest mask is all the
                valid data regions (rsgislib.imageutils.genValidMask)
:param imgMaskVal: the pixel value within the imgMask to limit the region to which the classification is applied.
                   Can be used to create a heirachical classification.
:param imgFileInfo: a list of rsgislib.imageutils.ImageBandInfo objects (also used within
                    rsgislib.imageutils.extractZoneImageBandValues2HDF) to identify which images and bands are to
                    be used for the classification so it adheres to the training data.
:param outClassImg: Output image which will contain the hard classification.
:param chip_h_size: is half the chip size to be extracted (i.e., 10 with output image chips 21x21,
                    10 pixels either size of the one of interest).
:param gdalformat: is the output image format - all GDAL supported formats are supported.
:param pred_batch_size: the batch size used for the classification prediction.
:param pred_max_queue_size: the max queue size used for the classification prediction
:param pred_workers: the number of workers used for the classification prediction
:param pred_use_multiprocessing: whether to use a multiprocessing option for the classification prediction
:param classClrNames: default is True and therefore a colour table will the colours specified in ClassInfoObj
                      and a ClassName (from classTrainInfo) column will be added to the output file.

    """
    n_classes = len(classTrainInfo)
    cls_id_lut = numpy.zeros(n_classes)
    for clsname in classTrainInfo:
        if classTrainInfo[clsname].id >= n_classes:
            raise (
                "ClassInfoObj '{}' id ({}) is not consecutive starting from 0."
                .format(clsname, classTrainInfo[clsname].id))
        cls_id_lut[classTrainInfo[clsname].id] = classTrainInfo[clsname].out_id

    inImgs = list()
    inImgBands = list()

    inImgs.append(imgMask)
    inImgBands.append([1])
    n_img_bands = 0
    for inImgInfo in imgFileInfo:
        inImgs.append(inImgInfo.fileName)
        inImgBands.append(inImgInfo.bands)
        n_img_bands = n_img_bands + len(inImgInfo.bands)
    nImgs = len(imgFileInfo)

    scn_overlap = chip_h_size
    chip_size = (chip_h_size * 2) + 1

    writer = None
    reader = ImageReader(inImgs,
                         windowxsize=200,
                         windowysize=200,
                         overlap=scn_overlap,
                         layerselection=inImgBands)
    for (info, block) in tqdm.tqdm(reader):
        classMskArr = block[0]
        blkShape = classMskArr.shape

        vld_cls_arr = numpy.zeros_like(classMskArr, dtype=int)

        xSize = blkShape[2] - (scn_overlap * 2)
        ySize = blkShape[1] - (scn_overlap * 2)
        xRange = numpy.arange(scn_overlap, scn_overlap + xSize, 1)
        yRange = numpy.arange(scn_overlap, scn_overlap + ySize, 1)
        n_vld_pxls = 0
        for y in yRange:
            for x in xRange:
                if classMskArr[0][y][x] == imgMaskVal:
                    n_vld_pxls = n_vld_pxls + 1
                    vld_cls_arr[0][y][x] = 1

        feat2cls = numpy.zeros([n_vld_pxls, n_img_bands, chip_size, chip_size],
                               dtype=numpy.float32)
        iFeat = 0
        for y in yRange:
            yMin = y - scn_overlap
            yMax = y + scn_overlap + 1
            for x in xRange:
                xMin = x - scn_overlap
                xMax = x + scn_overlap + 1
                if classMskArr[0][y][x] == imgMaskVal:
                    for nImg in range(nImgs):
                        imgBlk = block[nImg + 1][..., yMin:yMax, xMin:xMax]
                        for iBand in range(imgBlk.shape[0]):
                            numpy.copyto(feat2cls[iFeat, iBand],
                                         imgBlk[iBand],
                                         casting='safe')
                        iFeat = iFeat + 1

        preds_idxs = numpy.argmax(keras_cls_mdl.predict(
            feat2cls,
            batch_size=pred_batch_size,
            max_queue_size=pred_max_queue_size,
            workers=pred_workers,
            use_multiprocessing=pred_use_multiprocessing),
                                  axis=1)
        feat2cls = None

        out_cls_arr = numpy.zeros_like(classMskArr, dtype=numpy.uint16)
        out_cls_arr = out_cls_arr.flatten()
        vld_cls_arr = vld_cls_arr.flatten()
        ID = numpy.arange(out_cls_arr.shape[0])
        ID = ID[vld_cls_arr == 1]

        preds_cls_ids = numpy.zeros_like(preds_idxs, dtype=numpy.uint16)
        for cld_id, idx in zip(cls_id_lut, numpy.arange(0, len(cls_id_lut))):
            preds_cls_ids[preds_idxs == idx] = cld_id

        out_cls_arr[ID] = preds_cls_ids
        out_cls_arr = numpy.expand_dims(out_cls_arr.reshape(
            (classMskArr.shape[1], classMskArr.shape[2])),
                                        axis=0)

        if writer is None:
            writer = ImageWriter(outClassImg,
                                 info=info,
                                 firstblock=out_cls_arr,
                                 drivername=gdalformat)
        else:
            writer.write(out_cls_arr)
    writer.close(calcStats=False)

    if classClrNames:
        rsgislib.rastergis.populateStats(outClassImg,
                                         addclrtab=True,
                                         calcpyramids=True,
                                         ignorezero=True)
        max_val = rsgislib.imagecalc.getImageBandMinMax(
            outClassImg, 1, False, 0)[1]
        ratDataset = gdal.Open(outClassImg, gdal.GA_Update)

        max_cls_val = 0
        for classKey in classTrainInfo:
            if classTrainInfo[classKey].out_id > max_cls_val:
                max_cls_val = classTrainInfo[classKey].out_id

        if max_cls_val > max_val:
            red = numpy.random.randint(0, 255, max_cls_val + 1)
            green = numpy.random.randint(0, 255, max_cls_val + 1)
            blue = numpy.random.randint(0, 255, max_cls_val + 1)
        else:
            red = rat.readColumn(ratDataset, 'Red')
            green = rat.readColumn(ratDataset, 'Green')
            blue = rat.readColumn(ratDataset, 'Blue')

        ClassName = numpy.empty_like(red, dtype=numpy.dtype('a255'))
        ClassName[...] = ""

        for classKey in classTrainInfo:
            print("Apply Colour to class \'" + classKey + "\'")
            red[classTrainInfo[classKey].out_id] = classTrainInfo[classKey].red
            green[classTrainInfo[classKey].
                  out_id] = classTrainInfo[classKey].green
            blue[classTrainInfo[classKey].
                 out_id] = classTrainInfo[classKey].blue
            ClassName[classTrainInfo[classKey].out_id] = classKey

        rat.writeColumn(ratDataset, "Red", red)
        rat.writeColumn(ratDataset, "Green", green)
        rat.writeColumn(ratDataset, "Blue", blue)
        rat.writeColumn(ratDataset, "ClassName", ClassName)
        ratDataset = None
コード例 #8
0
ファイル: clustersklearn.py プロジェクト: timebridge/rsgislib
def img_pixel_sample_cluster(inputImg,
                             outputImg,
                             gdalformat='KEA',
                             noDataVal=0,
                             imgSamp=100,
                             clusterer=MiniBatchKMeans(n_clusters=60,
                                                       init='k-means++',
                                                       max_iter=100,
                                                       batch_size=100),
                             calcStats=True,
                             useMeanShiftEstBandWidth=False):
    """
A function which allows a clustering to be performed using the algorithms available
within the scikit-learn library. The clusterer is trained on a sample of the input
image and then applied using the predict function (therefore this function is only
compatiable with clusterers which have the predict function implemented) to the whole
image.

:param inputImg: input image file.
:param outputImg: output image file.
:param gdalformat: output image file format.
:param noDataVal: no data value associated with the input image.
:param imgSamp: the input image sampling. (e.g., 100 is every 100th pixel)
:param clusterer: clusterer from scikit-learn which must have a predict function.
:param calcStats: calculate image pixel statistics, histogram and image pyramids - note if you are not using a
                  KEA file then the format needs to support RATs for this option as histogram and colour table
                  are written to RAT.
:param useMeanShiftEstBandWidth: use the mean-shift algorithm as the clusterer (pass None as the clusterer) where
                                 the bandwidth is calculated from the data itself.

"""
    print('Sample input image:')
    dataSamp = rsgislib.imageutils.extractImgPxlSample(inputImg, imgSamp,
                                                       noDataVal)

    if useMeanShiftEstBandWidth:
        print('Using Mean-Shift predict bandwidth')
        from sklearn.cluster import MeanShift, estimate_bandwidth
        bandwidth = estimate_bandwidth(dataSamp, quantile=0.2, n_samples=500)
        clusterer = MeanShift(bandwidth=bandwidth, bin_seeding=True)

    print('Fit Clusterer')
    outClust = clusterer.fit(dataSamp)
    print('Fitted Clusterer')

    print('Apply to whole image:')
    reader = ImageReader(inputImg, windowxsize=200, windowysize=200)
    writer = None
    for (info, block) in tqdm.tqdm(reader):
        blkShape = block.shape
        blkBands = block.reshape((blkShape[0], (blkShape[1] * blkShape[2]))).T
        ID = numpy.arange(blkBands.shape[0])
        outClusterVals = numpy.zeros((blkBands.shape[0]))

        finiteMskArr = numpy.isfinite(blkBands).all(axis=1)
        ID = ID[finiteMskArr]
        blkBands = blkBands[finiteMskArr]

        noDataValArr = numpy.logical_not(
            numpy.where(blkBands == noDataVal, True, False).all(axis=1))

        blkBandsNoData = blkBands[noDataValArr]
        ID = ID[noDataValArr]

        if ID.shape[0] > 0:
            outPred = clusterer.predict(blkBandsNoData) + 1
            outClusterVals[ID] = outPred

        outClusterValsOutArr = outClusterVals.reshape(
            [1, blkShape[1], blkShape[2]])

        if writer is None:
            writer = ImageWriter(outputImg,
                                 info=info,
                                 firstblock=outClusterValsOutArr,
                                 drivername=gdalformat,
                                 creationoptions=[])
        else:
            writer.write(outClusterValsOutArr)
    writer.close(calcStats=False)

    if calcStats:
        rsgislib.rastergis.populateStats(clumps=outputImg,
                                         addclrtab=True,
                                         calcpyramids=True,
                                         ignorezero=True)
コード例 #9
0
ファイル: clustersklearn.py プロジェクト: timebridge/rsgislib
def img_pixel_tiled_cluster(inputImg,
                            outputImg,
                            gdalformat='KEA',
                            noDataVal=0,
                            clusterer=MiniBatchKMeans(n_clusters=60,
                                                      init='k-means++',
                                                      max_iter=100,
                                                      batch_size=100),
                            calcStats=True,
                            useMeanShiftEstBandWidth=False,
                            tileXSize=200,
                            tileYSize=200):
    """
A function which allows a clustering to be performed using the algorithms available
within the scikit-learn library. The clusterer is applied to a single tile at a time
and therefore produces tile boundaries in the result. However, memory is controlled
such that usage isn't excessive which it could be when processing a whole image.

:param inputImg: input image file.
:param outputImg: output image file.
:param gdalformat: output image file format.
:param noDataVal: no data value associated with the input image.
:param clusterer: clusterer from scikit-learn which must have a predict function.
:param calcStats: calculate image pixel statistics, histogram and image pyramids - note if you are not using a KEA file then the format needs to support RATs for this option as histogram and colour table are written to RAT.
:param useMeanShiftEstBandWidth: use the mean-shift algorithm as the clusterer (pass None as the clusterer) where the bandwidth is calculated from the data itself.
:param tileXSize: tile size in the x-axis in pixels.
:param tileYSize: tile size in the y-axis in pixels.

"""
    if useMeanShiftEstBandWidth:
        from sklearn.cluster import MeanShift, estimate_bandwidth

    reader = ImageReader(inputImg,
                         windowxsize=tileXSize,
                         windowysize=tileYSize)
    writer = None
    for (info, block) in tqdm.tqdm(reader):
        blkShape = block.shape
        blkBands = block.reshape((blkShape[0], (blkShape[1] * blkShape[2]))).T
        ID = numpy.arange(blkBands.shape[0])
        outClusterVals = numpy.zeros((blkBands.shape[0]))

        finiteMskArr = numpy.isfinite(blkBands).all(axis=1)
        ID = ID[finiteMskArr]
        blkBands = blkBands[finiteMskArr]

        noDataValArr = numpy.logical_not(
            numpy.where(blkBands == noDataVal, True, False).all(axis=1))

        blkBandsNoData = blkBands[noDataValArr]
        ID = ID[noDataValArr]

        if ID.shape[0] > 0:
            if useMeanShiftEstBandWidth:
                bandwidth = estimate_bandwidth(blkBandsNoData,
                                               quantile=0.2,
                                               n_samples=1000)
                clusterer = MeanShift(bandwidth=bandwidth, bin_seeding=True)

            clusterer.fit(blkBandsNoData)
            outPred = clusterer.labels_ + 1
            outClusterVals[ID] = outPred

        outClusterValsOutArr = outClusterVals.reshape(
            [1, blkShape[1], blkShape[2]])

        if writer is None:
            writer = ImageWriter(outputImg,
                                 info=info,
                                 firstblock=outClusterValsOutArr,
                                 drivername=gdalformat,
                                 creationoptions=[])
        else:
            writer.write(outClusterValsOutArr)
    writer.close(calcStats=False)

    if calcStats:
        rsgislib.rastergis.populateStats(clumps=outputImg,
                                         addclrtab=True,
                                         calcpyramids=True,
                                         ignorezero=True)
コード例 #10
0
def zoneMeans(clumpFile, dataFile, clumpBand=1, dataBands=None, 
                    ignoreDataVals=None):
    """
    Given a file of clumps and a file of data, calculates
    the mean and standard deviation for the area of each
    clump value in the data. 
    If dataBands is None does all bands in the dataFile, otherwise
    pass list of 1-based band indices or a single integer
    If dataBands is None or a list, returns list of tuples. 
    Each tuple contains two arrays, one with the mean values, one
    with the standard deviation values. The indices of these
    arrays go from zero to the maximum clump value and have values
    for each clump id, zero for other indices.
    If dataBands is a single integer, returns a tuple with mean and
    standard deviation arrays as above.

    Ignore values(s) may be passed in with the ignoreDataVals parameter.
    This may be a single value in which case the same is used for all 
    dataBands, or a sequence the same length as dataValues.
    """
    
    fileDict = {'clumps':clumpFile, 'data':dataFile}

    origdataBands = dataBands # so we know whether to return list or tuple
    if isinstance(dataBands, int):
        dataBands = [dataBands] # treat as list for now

    if isinstance(ignoreDataVals, int):
        # make list same size as dataBands
        ignoreDataVals = [ignoreDataVals] * len(dataBands)

    # use dictionaries for accumulated values
    # index is the clump id
    # we have a list of these dictionaries one per dataBand
    sumDictList = []
    sumsqDictList = []
    countDictList = []
    if dataBands is not None:
        # if None, sorted below when we know how many bands
        # create the dictionaries for each band
        for dataBand in dataBands:
            sumDictList.append({})
            sumsqDictList.append({})
            countDictList.append({})
    
    # red thru the images
    reader = ImageReader(fileDict)
    for (info, blocks) in reader:
        # get the data for the specified bands and flatten it
        clumps = blocks['clumps'][clumpBand-1].flatten()

        if dataBands is None:
            # now we know how many bands there are for the default list
            dataBands = range(1, blocks['data'].shape[0]+1)
            # create the dictionaries for each band
            for dataBand in dataBands:
                sumDictList.append({})
                sumsqDictList.append({})
                countDictList.append({})

        for idx, dataBand in enumerate(dataBands):

            data = blocks['data'][dataBand-1].flatten()
            sumDict = sumDictList[dataBand-1]
            sumsqDict = sumsqDictList[dataBand-1]
            countDict = countDictList[dataBand-1]
        
            # for each clump id
            for value in numpy.unique(clumps):
                # get the data for that clump
                mask = (clumps == value)

                # if we are ignoring values then extend mask
                if ignoreDataVals is not None:
                    mask = mask & (data != ignoreDataVals[idx])

                dataSubset = data.compress(mask)
                # check we have data
                if dataSubset.size != 0:
                    # calculate the values
                    sum = dataSubset.sum()
                    sq = dataSubset * dataSubset
                    sumsq = sq.sum()

                    # check if we encountered this value or not
                    # and load into our dictioanaries
                    if value in sumDict:
                        sumDict[value] += sum
                        sumsqDict[value] += sumsq
                        countDict[value] += dataSubset.size
                    else:
                        sumDict[value] = sum
                        sumsqDict[value] = sumsq
                        countDict[value] = dataSubset.size
                
    # work out the length of the arrays and 
    # create some blank arrays
    maxidx = max(sumDict.keys()) + 1
    meanArray = numpy.zeros((maxidx,), numpy.float)
    stdArray = numpy.zeros((maxidx,), numpy.float)

    resultList = []
    # go through each band    
    for dataBand in dataBands:
        sumDict = sumDictList[dataBand-1]
        sumsqDict = sumsqDictList[dataBand-1]
        countDict = countDictList[dataBand-1]

        # turn into arrays so we don't have to iterate
        idxs = numpy.fromiter(sumDict.keys(), numpy.integer)
        sums = numpy.zeros((maxidx,), numpy.float)
        sums[idxs] = numpy.fromiter(sumDict.values(), numpy.float)
        sumsqs = numpy.zeros((maxidx,), numpy.float)
        sumsqs[idxs] = numpy.fromiter(sumsqDict.values(), numpy.float)
        counts = numpy.zeros((maxidx,), numpy.integer)
        counts[idxs] = numpy.fromiter(countDict.values(), numpy.integer)

        # mask out invalid divides
        outInvalid = counts == 0
        counts[outInvalid] = 1

        means = sums / counts
        stds = numpy.sqrt((sumsqs / counts) - (means * means))

        means[outInvalid] = 0
        stds[outInvalid] = 0
        
        resultList.append((means, stds))
            
    if isinstance(origdataBands, int):
        return resultList[0] # only one item
    else:
        return resultList
コード例 #11
0
def zoneMajority(clumpFile, dataFile, clumpBand=1, dataBands=None):
    """
    Given a file of clumps and a file of data, calculates
    the most common data values for each clump and the histogram
    If dataBands is None does all bands in the dataFile, otherwise
    pass list of 1-based band indices or a single integer
    If dataBands is None or a list, returns list of tuples. 
    Each tuple contains as array of the most common values and a histogram. 
    The indices of this array go from zero to the maximum clump value 
    and have values for each clump id, zero for other indices.
    The histogram is a dictionary, keyed on the clump id. Each value
    in the dictionary is itself a dictionary keyed on the data value,
    with the count of that value.
    If dataBands is a single integer, returns a tuple with the mode array and
    histogram dictionary as above.
    """

    origdataBands = dataBands # so we know whether to return list or tuple
    if isinstance(dataBands, int):
        dataBands = [dataBands] # treat as list for now
    
    fileDict = {'clumps':clumpFile, 'data':dataFile}
    
    # index is the clump id
    # list of dictionaries
    clumpDictList = []
    if dataBands is not None:
        # if None, sorted below when we know how many bands
        # create the dictionaries for each band
        for dataBand in dataBands:
            clumpDictList.append({})

    # red thru the images
    reader = ImageReader(fileDict)
    for (info, blocks) in reader:
        # get the data for the specified bands and flatten it
        clumps = blocks['clumps'][clumpBand-1].flatten()

        if dataBands is None:
            # now we know how many bands there are for the default list
            dataBands = range(1, blocks['data'].shape[0]+1)
            # create the dictionaries for each band
            for dataBand in dataBands:
                clumpDictList.append({})

        for dataBand in dataBands:

            data = blocks['data'][dataBand-1].flatten()
            clumpDict = clumpDictList[dataBand-1]
        
            # for each clump id
            for value in numpy.unique(clumps):
                # get the data for that clump
                dataSubset = data.compress(clumps == value)
                # check we have data
                if dataSubset.size != 0:
                    # do we have this value in histDict?
                    if value in clumpDict:
                        # yes, retrieve dict
                        histDict = clumpDict[value]
                    else:
                        # no, create it and set it
                        histDict = {}
                        clumpDict[value] = histDict

                    # do the bincount
                    bincount = numpy.bincount(dataSubset)
                    # turn this into a dictionary
                    bins = numpy.arange(bincount.size)
                    # only interested in values where count != 0
                    bins = numpy.compress(bincount != 0, bins)
                    bincount = numpy.compress(bincount != 0, bincount)

                    for count in range(bins.size):
                        binvalue = bins[count]
                        if binvalue in histDict:
                            histDict[binvalue] += bincount[count]
                        else:
                            histDict[binvalue] = bincount[count]
                
    resultList = []
    for dataBand in dataBands:
        # work out the length of the arrays and 
        # create a blank arrays
        maxidx = max(clumpDict.keys()) + 1
        modeArray = numpy.zeros((maxidx,), numpy.uint32)
    
        # go thru each value
        for value in clumpDict.keys():
            # find the mode
            histDict = clumpDict[value]
            maxValue, maxCount = max(histDict.items(), key=lambda x:x[1])

            modeArray[value] = maxValue

        resultList.append((modeArray, clumpDict))
            
    if isinstance(origdataBands, int):
        return resultList[0] # only one item
    else:
        return resultList