Ejemplo n.º 1
0
 def test_input2(self):
     """
     Test that the input keyword works and that the input is correctly
     added to the histogram.
     The expected length of the histogram is the same size as the input
     array.
     """
     # We are using the default binsize, for values in range [0,255]
     a = self.array2
     b = self.array6
     h = histogram(a, input=b)['histogram']
     hcontrol = histogram(a)['histogram'] + b
     self.assertEqual((h - hcontrol).sum(), 0)
Ejemplo n.º 2
0
 def test_binsize(self):
     """
     Test that the binsize keyword works.
     """
     h = histogram(self.array3, binsize=0.5)
     # should be 20 bins to contain the values 10 -> 19.5
     self.assertEqual(h['histogram'].shape[0], 20)
Ejemplo n.º 3
0
def idl_test():
    """
    A small test that investigates the speed of finding specific objects/segments
    within an image.  The identifed objects are then set/flagged in an array of the
    same dimensions as the original image.  This just illustrates the finding mechanism,
    but in reality statistics can be generated per segment/object.
    This is the method for solving such a problem using the histogram module.

    To Note:
        This is just one simple example of using the histogram to solve such an
        abstract problem.  It can be used for so much more, such as chunk indexing
        and incrementing vectors. 
        See http://www.idlcoyote.com/tips/histogram_tutorial.html for more info.
    """
    print 'The IDL method!'
    st = datetime.datetime.now()
    img = numpy.random.randint(0,30001, (8000,8000))
    b = numpy.random.randint(0,30001, (3000))
    c = numpy.unique(b)
    img2 = numpy.zeros((8000,8000), dtype='uint8').flatten()
    # Set min=0 then we can simply index directly using the array of values we wish to find
    h = histogram(img.flatten(), Min=0, Max=numpy.max(c), reverse_indices='ri')
    hist = h['histogram']
    ri = h['ri']
    for i in numpy.arange(c.shape[0]):
        if hist[c[i]] == 0:
            continue
        img2[ri[ri[c[i]]:ri[c[i]+1]]] = 1
    et = datetime.datetime.now()
    print et - st
Ejemplo n.º 4
0
 def test_nbins(self):
     """
     Test that the nbins keyword works.
     """
     h = histogram(self.array4, nbins=256)
     # There should be 256 bins
     self.assertEqual(h['histogram'].shape[0], 256)
Ejemplo n.º 5
0
 def test_omin(self):
     """
     Test that the omin keyword works.
     """
     # The output should be the same. Using an array 0->255
     h = histogram(self.array2, omin='omin')
     self.assertEqual(h['omin'], 0)
Ejemplo n.º 6
0
def summary_cleanup(array, min_value=1, max_value=4, min_population=10, all_neighbors=True):
    """
    A function for removing pixel 'islands' from the water summary output.
    Using the default parameters, pixel groups with less than 10 members
    within the data range of (1 <= x <= 4) are removed from the original array.

    :param array:
        A 2-Dimensional numpy array.

    :param min_value:
        Default value of 1. The minimum pixel value to be included within the analysis.

    :param max_value:
        Default value of 4. The maximum pixel value to be included within the analysis.

    :param min_population:
        Default value of 10. The minimum population size a group of pixels must be in order to be retained.

    :param all_neighbors:
        Default is True. If True, the 8 surrounding neighbors of the centre pixel will be used for connectivity.
        If False, then only the 4 immediate neighbors of the centre pixel will be used for connectivity.

    :return:
        A copy of array with pixels satisfying the min_value/max_value/min_pop_count parameters removed.

    :author:
        Josh Sixsmith; [email protected]

    :history:
        *  2013/09/11: Created
    """

    dims = array.shape
    if (len(dims) != 2):
        print 'Array is not 2-Dimensional!!!'
        return None

    flat_array = array.flatten()

    low_obs = (array >= min_value) & (array <= max_value)

    if all_neighbors:
        kernel  = [[1,1,1],[1,1,1],[1,1,1]]
    else:
        kernel  = [[0,1,0],[1,1,1],[0,1,0]]

    label_array, num_labels = ndimage.label(low_obs, structure=kernel)

    h = histogram(label_array.flatten(), Min=1, reverse_indices='ri')

    hist = h['histogram']
    ri   = h['ri']

    wh = numpy.where(hist < min_population)
    for i in wh[0]:
        flat_array[ri[ri[i]:ri[i+1]]] = 0

    cleaned_array = flat_array.reshape(dims)
    return cleaned_array
Ejemplo n.º 7
0
 def test_hist_min(self):
     """
     Test that the min keyword works.
     """
     # Using an array 0->255, check that 0 gets omitted
     h = histogram(self.array2, min=1)
     self.assertEqual(h['histogram'].shape[0], 255)
     self.assertEqual((h['histogram'] == 1).sum(), 255)
Ejemplo n.º 8
0
 def test_default_binsize(self):
     """
     Test that the default binsize is 1 and works accordingly.
     """
     # Using an array of values in range 0->1
     h = histogram(self.array4)
     self.assertEqual(h['histogram'].shape[0], 1)
     # All values should be in the first bin.
     self.assertEqual(self.array4.shape[0], h['histogram'][0])
Ejemplo n.º 9
0
def obj_area(array):
    """
    Calculates area per object. Area is referred to as number of pixels.
    """

    h = histogram(array.flatten(i), Min=1)
    hist = h['histogram']

    return hist
Ejemplo n.º 10
0
 def test_hist(self):
     """
     Test that the histogram works. Default binsize is 1, so there 
     should be 256 bins.
     """
     h = histogram(self.array2)
     # Should be 256 elements, and the value 1 contained within each.
     self.assertEqual(h['histogram'].shape[0], 256)
     self.assertEqual((h['histogram'] == 1).sum(), 256)
Ejemplo n.º 11
0
 def test_omax(self):
     """
     Test that the omin keyword works.
     """
     # Using an array 0->255
     # The returned value should be the same as the derived max, unless
     # the nbins keyword is set, in which case the max gets rescaled by
     # nbins*binsize+min in order to maintain equal bin widths.
     h = histogram(self.array2, omax='omax')
     self.assertEqual(h['omax'], 255)
Ejemplo n.º 12
0
 def test_nan(self):
     """
     Test that the NaN keyword works.
     """
     a = self.array2.astype('float64')
     a[0] = numpy.NaN
     h = histogram(a, NaN=True)
     # The histogram will fail if array contains NaN's and NaN isn't set.
     # One element is excluded (the NaN), so test the length.
     self.assertEqual(h['histogram'].shape[0], 255)
Ejemplo n.º 13
0
 def test_input1(self):
     """
     Test that the input keyword works and has the same length as the
     number of expected bins.
     """
     # Output should be of the same length as the number of bins.
     # We are using the default binsize, for values in range [0,255]
     a = self.array2
     b = self.array6
     h = histogram(a, input=b)
     self.assertEqual(h['histogram'].shape[0], 256)
Ejemplo n.º 14
0
 def test_input3(self):
     """
     Test that the input keyword works and that the expected length of the
     histogram should take the length of the input array.
     """
     # We are using the default binsize, for values in range [0,10)
     # Without using the input keyword, the histogram size should be 10.
     # However, the size of the input array is 256, so the output histogram
     # should be 256.
     a = self.array1
     b = self.array6
     h = histogram(a, input=b)
     self.assertEqual(h['histogram'].shape[0], b.shape[0])
Ejemplo n.º 15
0
 def test_input4(self):
     """
     Test that the input keyword works and that the input is correctly
     added to the histogram.
     The length of the histogram without setting the input keyword, is 10.
     However, an input array of length 256 will be used, thereby the length
     of the histogram will be 256.
     """
     # We are using the default binsize, for values in range [0,10)
     a = self.array1
     b = self.array6
     h = histogram(a, input=b)['histogram']
     # Elements h[0:9] should equal 1, and elements h[10:-1] should equal 0
     diff = h - b
     self.assertEqual(diff.sum(), 10)
Ejemplo n.º 16
0
def obj_mean(array, base_array):
    """
    Calculates mean value per object.
    """
    arr_flat = base_array.flatten()
    h        = histogram(array.flatten(), Min=1, reverse_indices='ri')
    hist     = h['histogram']
    ri       = h['ri']
    mean_obj = []
    for i in numpy.arange(hist.shape[0]):
        if (hist[i] == 0): 
            continue
        xbar = numpy.mean(arr_flat[ri[ri[i]:ri[i+1]]])
        mean_obj.append(xbar)

    return mean_obj
Ejemplo n.º 17
0
 def test_reverse_indices1(self):
     """
     Test that the reverse indices keyword works.
     """
     # Make a copy then shuffle the array. Elements are in a random order.
     a = self.array2.copy()
     numpy.random.shuffle(a)
     h = histogram(a, reverse_indices='ri')
     # Let's see if we can access the correct element. As we are dealing with
     # int's (and the binsize is one), pick a random element and the value
     # of the element represents the bin.
     # If reverse indices works, then the reeturned value should equal data.
     element = numpy.random.randint(0, 256, (1))[0]
     bin = a[element]
     ri = h['ri']
     data = a[ri[ri[bin]:ri[bin + 1]]]
     self.assertEqual(bin, data)
Ejemplo n.º 18
0
 def test_reverse_indices2(self):
     """
     Test whether mulitple values in a single bin are correctly returned
     by the reverse indices.
     """
     # Make a copy then shuffle the array. Elements are in a random order.
     a = self.array2.copy()
     numpy.random.shuffle(a)
     h = histogram(a, reverse_indices='ri', binsize=5)
     # Using an array in the range 0->255, find data >=100<105
     # This should be bin 21 (20th if start from the 0th bin)
     ri = h['ri']
     # We know that each ri has adjacent groups (no empty bin), so no need
     # to check that ri[21] > ri[21]
     data = a[ri[ri[20]:ri[21]]]
     # The order should be the same as well. If not then numpy has changed.
     control = a[(a >= 100) & (a < 105)]
     self.assertEqual((control - data).sum(), 0)
Ejemplo n.º 19
0
def obj_centroid(array):
    """
    Calculates centroids per object.
    """

    dims = array.shape
    h    = histogram(array.flatten(), Min=1, reverse_indices='ri')
    hist = h['histogram']
    ri   = h['ri']
    cent = []
    for i in numpy.arange(hist.shape[0]):
        if (hist[i] == 0):
            continue
        idx = numpy.array(array_indices(dims, ri[ri[i]:ri[i+1]], dimensions=True))
        cent_i = numpy.mean(idx, axis=1)
        cent.append(cent_i)

    return cent
Ejemplo n.º 20
0
 def test_reverse_indices3(self):
     """
     Test that the reverse indices keyword works across multiple bins 
     and values.
     """
     # A random floating array in range 0-20
     a = (self.array4) * 20
     # Specifying min=0 should give bin start points 0, 2.5, 5, 7.5 etc
     h = histogram(a, reverse_indices='ri', min=0, binsize=2.5)
     # Find values >= 7.5 < 17.5
     control = numpy.sort(a[(a >= 7.5) & (a < 17.5)])
     ri = h['ri']
     # If the locations keyword was set then the starting locations of each
     # bin would be:
     # [  0. ,   2.5,   5. ,   7.5,  10. ,  12.5,  15. ,  17.5]
     # So we want bins 3, 4, 5, 6. (Bins start at 0)
     # Sort the arrays; so we can do an element by element difference
     data = numpy.sort(a[ri[ri[3]:ri[7]]])
     self.assertEqual((control - data).sum(), 0)
Ejemplo n.º 21
0
def obj_rectangularity(array):
    """
    Calculates rectangularity per object.
    """

    dims = array.shape
    h    = histogram(array.flatten(), Min=1, reverse_indices='ri')
    hist = h['histogram']
    ri   = h['ri']
    rect = []
    for i in numpy.arange(hist.shape[0]):
        if (hist[i] == 0):
            continue
        idx = numpy.array(array_indices(dims, ri[ri[i]:ri[i+1]], dimensions=True))
        min_yx = numpy.min(idx, axis=1)
        max_yx = numpy.max(idx, axis=1)
        diff = max_yx - min_yx + 1 # Add one to account for zero based index
        bbox_area = numpy.prod(diff)
        rect.append(hist[i] / bbox_area)

    return rect
Ejemplo n.º 22
0
def binary_recursive_histogram(image, outfile, all_neighbours=False):
    """
    Recursively applies a histogram to an image with multiple bands.
    Designed for the analysing the binary results by finding counts of 1 within each band.

    :param image:
        A string containing the full file path name of a multi-band binary image.

    :param outfile:
        The output filename of the textfile that will contain the report.

    :param all_neighbours:
        If set then pixel connectivity will be 8 neighbours rather than 4. Default is 4.

    :author:
        Josh Sixsmith; [email protected], [email protected]

    :history:
        * 07/12/2013: Created
        * 11/12/2013: Added more stats to the output

    :copyright:
        Copyright (c) 2013, Josh Sixsmith
        All rights reserved.

        Redistribution and use in source and binary forms, with or without
        modification, are permitted provided that the following conditions are met:

        1. Redistributions of source code must retain the above copyright notice, this
           list of conditions and the following disclaimer.
        2. Redistributions in binary form must reproduce the above copyright notice,
           this list of conditions and the following disclaimer in the documentation
           and/or other materials provided with the distribution.

        THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
        ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
        WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
        DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
        ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
        (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
        LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
        ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
        SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

        The views and conclusions contained in the software and documentation are those
        of the authors and should not be interpreted as representing official policies,
        either expressed or implied, of the FreeBSD Project.

    """

    # Check that the directory for the output file exists.
    if not (os.path.exists(os.path.dirname(outfile))):
        if os.path.dirname(outfile) == '': # Output to the current directory
            outfname = os.path.join(os.getcwd(), outfile)
        else:
            os.makedirs(os.path.dirname(outfile))

    #if (type image == str):
    #    ds = gdal.Open(image)
    #    nb = ds.GetRasterCount()
    ds = gdal.Open(image)
    nb = ds.RasterCount

    # Initialise the result array
    result      =  numpy.zeros((nb,4), dtype='int') # Only dealing with integers at this point in time. This could change in future.
    result_list = []

    # Loop over the bands
    for i in range(nb):
        band = ds.GetRasterBand(i+1) # GDAL uses start index of 1
        img  = band.ReadAsArray()
        band.FlushCache()

        # Generate the histogram
        h    = histogram(img.flatten(), Min=0)
        hist = h['histogram']

        # Segment the binary mask
        kernel = [[0,1,0],[1,1,1],[0,1,0]]
        if all_neighbours:
            kernel = [[1,1,1],[1,1,1],[1,1,1]]
        label_arr, nlabels = ndimage.label(img, structure=kernel)

        # Get min and max areas of the segmented regions
        h2       = histogram(label_arr.flatten(), Min=1)
        hist2    = h2['histogram']
        mn_area  = numpy.min(hist2)
        mx_area  = numpy.max(hist2)
        avg_area = numpy.mean(hist2)

        # Populate the result array if data is found. If hist.shape[0] == 1 then only the value of zero was found.
        if (hist.shape[0] >= 2):
            result[i,0] = hist[1]
            result[i,1] = nlabels
            result[i,2] = mn_area
            result[i,3] = mx_area
            result_list.append('%i, %i, %i, %i, %i, %f\n' %(i+1, hist[1], nlabels, mn_area, mx_area, avg_area))
        else:
            result_list.append('%i, %i, %i, %i, %i, %f\n' %(i+1, 0, 0, 0, 0, 0.0))

    mx_loc      = numpy.argmax(result[:,0]) + 1 # Refer back to a 1 based band index
    mx_seg_loc  = numpy.argmax(result[:,1]) + 1 # Refer back to a 1 based band index
    mn_area_loc = numpy.argmin(result[:,2]) + 1 # Refer back to a 1 based band index
    mx_area_loc = numpy.argmax(result[:,3]) + 1 # Refer back to a 1 based band index

    outfile = open(outfname, 'w')
    outfile.write('Results from %s image file\n' %image)
    outfile.write('Band with most flagged pixels: %i\n' %mx_loc)
    outfile.write('Band with most objects: %i\n' %mx_seg_loc)
    outfile.write('\n')
    outfile.write('Band, Flagged Pixels, Number of Objects, Smallest Object, Largest Object, Average Object Size\n')
    
    for res in result_list:
        outfile.write(res)

    outfile.close()
Ejemplo n.º 23
0
def otsu_threshold(image, Binsize=None, Max=None, Min=None, Nbins=None, Fast=True, Apply=False):
    """
    Calculates the Otsu threshold.

    Seperates the input array into background and foreground components
    by finding the maximum between class variance.

    :param image:
        A numpy array of maximum three dimensions.

    :param Fast:
        Default is True. Will find the optimal threshold using the fast method which approximates the mean value per class.

    :param Apply:
        Default is False. If True then a mask/masks of the same dimensions as image will be returned. Otherwise only the threshold/thresholds will be returned.

    :param Binsize:
        (Optional) The binsize (Default is 1) to be used for creating the histogram.

    :param Max:
        (Optional) The maximum value to be used in creating the histogram. If not specified the array will be searched for max.

    :param Min: (Optional) The minimum value to be used in creating the histogram. If not specified the array will be searched for min.

    :param Nbins: (Optional) The number of bins to be used for creating the histogram. If set binsize is calculated as (max - min) / (nbins - 1), and the max value will be adjusted to (nbins*binsize + min).
          
    :author:
        Josh Sixsmith, [email protected]

    :history:
        * 06/02/2013--Created
        * 04/06/2013--Keywords Binsize, Nbins, Min, Max, Fast and Apply added.

    :sources:
        http://www.labbookpages.co.uk/software/imgProc/otsuThreshold.html
        http://www.codeproject.com/Articles/38319/Famous-Otsu-Thresholding-in-C
        http://en.wikipedia.org/wiki/Cumulative_frequency_analysis
        http://en.wikipedia.org/wiki/Otsu's_method

    """

    if image == None:
        print 'No input image!!!'
        return None

    dims = image.shape
    if (len(dims) > 3):
        print 'Incorrect shape!; More than 3 dimensions is not a standard image.'
        return None

    if Fast:
        if (len(dims) == 3):
            # For multi-band images, return a list of thresholds
            thresholds = []
            bands = dims[0]
            for b in range(bands):
                img = image[b].flatten()

                h = histogram(img, locations='loc', omin='omin', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins)
                hist = h['histogram']
                omin = h['omin']
                loc  = h['loc']
                binsz = numpy.abs(loc[1] - loc[0])

                cumu_hist  = numpy.cumsum(hist, dtype=float)
                rcumu_hist = numpy.cumsum(hist[::-1], dtype=float) # reverse

                total = cumu_hist[-1]

                # probabilities per threshold class
                bground_weights = cumu_hist / total
                fground_weights = 1 - bground_weights # reverse probability
                mean_bground = numpy.zeros(hist.shape[0])
                mean_fground = numpy.zeros(hist.shape[0])
                mean_bground[0:-1] = (numpy.cumsum(hist * loc) / cumu_hist)[0:-1]
                mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) / rcumu_hist)[::-1])[1:]
                sigma_between = bground_weights * fground_weights *(mean_bground - mean_fground)**2
                thresh = numpy.argmax(sigma_between)
                thresh = (thresh * binsz) + omin

                thresholds.append(thresh)

            if Apply:
                masks = numpy.zeros(dims, dtype='bool')
                for b in range(bands):
                    masks[b] = image[b] > thresholds[b]
                return masks
            else:
                return thresholds

        elif (len(dims) == 2):
            img = image.flatten()
            h = histogram(img, locations='loc', omin='omin', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins)
            hist = h['histogram']
            omin = h['omin']
            loc  = h['loc']
            binsz = numpy.abs(loc[1] - loc[0])
 
            cumu_hist  = numpy.cumsum(hist, dtype=float)
            rcumu_hist = numpy.cumsum(hist[::-1], dtype=float) # reverse
 
            total = cumu_hist[-1]
 
            # probabilities per threshold class
            bground_weights = cumu_hist / total
            fground_weights = 1 - bground_weights # reverse probability
            mean_bground = numpy.zeros(hist.shape[0])
            mean_fground = numpy.zeros(hist.shape[0])
            mean_bground[0:-1] = (numpy.cumsum(hist * loc) / cumu_hist)[0:-1]
            mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) / rcumu_hist)[::-1])[1:]
            sigma_between = bground_weights * fground_weights *(mean_bground - mean_fground)**2
            thresh = numpy.argmax(sigma_between)
            thresh = (thresh * binsz) + omin
 
            threshold = thresh

            if Apply:
                mask = image > threshold
                return mask
            else:
                return threshold

        elif (len(dims) == 1):
            h = histogram(image, locations='loc', omin='omin', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins)
            hist = h['histogram']
            omin = h['omin']
            loc  = h['loc']
            binsz = numpy.abs(loc[1] - loc[0])

            cumu_hist  = numpy.cumsum(hist, dtype=float)
            rcumu_hist = numpy.cumsum(hist[::-1], dtype=float) # reverse

            total = cumu_hist[-1]

            # probabilities per threshold class
            bground_weights = cumu_hist / total
            fground_weights = 1 - bground_weights # reverse probability

            # Calculate the mean of background and foreground classes.
            mean_bground = numpy.zeros(hist.shape[0])
            mean_fground = numpy.zeros(hist.shape[0])
            mean_bground[0:-1] = (numpy.cumsum(hist * loc) / cumu_hist)[0:-1]
            mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) / rcumu_hist)[::-1])[1:]
            sigma_between = bground_weights * fground_weights *(mean_bground - mean_fground)**2
            thresh = numpy.argmax(sigma_between)
            thresh = (thresh * binsz) + omin

            threshold = thresh

            if Apply:
                mask = image > threshold
                return mask
            else:
                return threshold

    else:
        if (len(dims) == 3):
            # For multi-band images, return a list of thresholds
            thresholds = []
            bands = dims[0]
            for b in range(bands):
                img = image[b].flatten()
                h = histogram(img, reverse_indices='ri', omin='omin', locations='loc', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins)

                hist = h['histogram']
                ri   = h['ri']
                omin = h['omin']
                loc  = h['loc']

                nbins = hist.shape[0]
                binsz = numpy.abs(loc[1] - loc[0])

                nB = numpy.cumsum(hist, dtype='int64')
                total = nB[-1]
                nF = total - nB
        
                # should't be a problem to start at zero. best_sigma should (by design) always be positive
                best_sigma = 0
                # set to loc[0], thresholds can be negative
                optimal_t = loc[0]
        
                for i in range(nbins):
                    # get bin zero to the threshold 'i', then 'i' to nbins
                    if ((ri[i+1] > ri[0]) and (ri[nbins] > ri[i+1])):
                        mean_b = numpy.mean(img[ri[ri[0]:ri[i+1]]], dtype='float64')
                        mean_f = numpy.mean(img[ri[ri[i+1]:ri[nbins]]], dtype='float64')
                        sigma_btwn = nB[i]*nF[i]*((mean_b - mean_f)**2)
                        if (sigma_btwn > best_sigma):
                            best_sigma = sigma_btwn
                            optimal_t = loc[i]
                        
                thresholds.append(optimal_t)

            if Apply:
                masks = numpy.zeros(dims, dtype='bool')
                for b in range(bands):
                    masks[b] = image[b] > thresholds[b]
                return masks
            else:
                return thresholds

            
        elif (len(dims) == 2):
            img = image.flatten()
            h = histogram(img, reverse_indices='ri', omin='omin', locations='loc', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins)

            hist = h['histogram']
            ri   = h['ri']
            omin = h['omin']
            loc  = h['loc']

            nbins = hist.shape[0]
            binsz = numpy.abs(loc[1] - loc[0])

            nB = numpy.cumsum(hist, dtype='int64')
            total = nB[-1]
            nF = total - nB
        
            # should't be a problem to start at zero. best_sigma should (by design) always be positive
            best_sigma = 0
            # set to loc[0], thresholds can be negative
            optimal_t = loc[0]
        
            for i in range(nbins):
                # get bin zero to the threshold 'i', then 'i' to nbins
                if ((ri[i+1] > ri[0]) and (ri[nbins] > ri[i+1])):
                    mean_b = numpy.mean(img[ri[ri[0]:ri[i+1]]], dtype='float64')
                    mean_f = numpy.mean(img[ri[ri[i+1]:ri[nbins]]], dtype='float64')
                    sigma_btwn = nB[i]*nF[i]*((mean_b - mean_f)**2)
                    if (sigma_btwn > best_sigma):
                        best_sigma = sigma_btwn
                        optimal_t = loc[i]
                        
            threshold = optimal_t
            if Apply:
                mask = image > threshold
                return mask
            else:
                return threshold

        elif (len(dims) == 1):
            h = histogram(image, reverse_indices='ri', omin='omin', locations='loc', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins)

            hist = h['histogram']
            ri   = h['ri']
            omin = h['omin']
            loc  = h['loc']

            nbins = hist.shape[0]
            binsz = numpy.abs(loc[1] - loc[0])

            nB = numpy.cumsum(hist, dtype='int64')
            total = nB[-1]
            nF = total - nB

            # should't be a problem to start at zero. best_sigma should (by design) always be positive
            best_sigma = 0
            # set to loc[0], thresholds can be negative
            optimal_t = loc[0]

            for i in range(nbins):
                # get bin zero to the threshold 'i', then 'i' to nbins
                if ((ri[i+1] > ri[0]) and (ri[nbins] > ri[i+1])):
                    mean_b = numpy.mean(image[ri[ri[0]:ri[i+1]]], dtype='float64')
                    mean_f = numpy.mean(image[ri[ri[i+1]:ri[nbins]]], dtype='float64')
                    sigma_btwn = nB[i]*nF[i]*((mean_b - mean_f)**2)
                    if (sigma_btwn > best_sigma):
                        best_sigma = sigma_btwn
                        optimal_t = loc[i]

            threshold = optimal_t
            if Apply:
                mask = image > threshold
                return mask
            else:
                return threshold
Ejemplo n.º 24
0
def hist_equal(array, BinSIZE=None, MaxV=None, MinV=None, Omax=None, Omin=None, Percent=None, Top=None, Histogram_Only=False):
    """
    Image contrast enhancement.
    Replicates the hist_equal function available within IDL (Interactive Data Language, EXELISvis).
    Converts an array to a histogram equalised byte array.

    :param array:
        A numpy array of any type.

    :param BinSIZE:
        The binsize to be used in constructing the histogram. The default is 1 for arrays with a datatype of byte (uint8). Arrays of other datatypes the binsize is computed as (MaxV - MinV) / 5000. (floating point).

    :param MaxV:
        The maximum data value to be considered in the contrast stretch. The default is 255 for arrays with a datatype of byte (uint8). Otherwise the maximum data value of array is used.

    :param MinV:
        The minimum data value to be considered in the contrast stretch. The default is 0 for arrays with a datatype of byte (uint8). Otherwise the minimum data value of array is used.

    :param Omax:
        (Optional) A string name used to refer to the dictionary key that will contain the maximum value used in generating the histogram.

    :param Omin:
        (Optional) A string name used to refer to the dictionary key that will contain the minimum value used in generating the histogram.

    :param Percent:
        A scalar between the values 0 and 100 that will be used to stretch the array histogram.

    :param Top:
        The maximum value of the scaled result. Default is 255. The mimimum value of the scaled result is always 0.

    :param Histogram_Only:
        Type Bool. Default is false. If set to True, then a numpy array of type int32 will be returned that contains the cumulative sum of the histogram.

    :return:
        Varies. If Histogram_Only is set to True, then the cumulative sum of the histogram will be returned. Additional optional returns Omax and and Omin. Otherwise a byte scaled version of array is returned. Additional optional returns Omax and and Omin.

    Example:

        >>> # 100x100 array of samples from N(3, 6.25)
        >>> a = 2.5 * numpy.random.randn(100,100) + 3
        >>> scl_a = hist_equal(a)
        >>> scl_pct_a = hist_equal(a, Percent=2)

    :author:
        Josh Sixsmith; [email protected]; [email protected]

    :history:
       *  2013/10/24: Created

    :copyright:
        Copyright (c) 2013, Josh Sixsmith
        All rights reserved.

        Redistribution and use in source and binary forms, with or without
        modification, are permitted provided that the following conditions are met:

        1. Redistributions of source code must retain the above copyright notice, this
           list of conditions and the following disclaimer.
        2. Redistributions in binary form must reproduce the above copyright notice,
           this list of conditions and the following disclaimer in the documentation
           and/or other materials provided with the distribution.

        THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
        ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
        WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
        DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
        ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
        (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
        LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
        ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
        SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

        The views and conclusions contained in the software and documentation are those
        of the authors and should not be interpreted as representing official policies,
        either expressed or implied, of the FreeBSD Project.

    """

    def linear_percent(cumulative_histogram, percent, min_, Binsize):
        """
        Image contrast enhancement.
    
        Given a cumulative histogram, upper and lower DN values are computed and returned.
    
        :param cumulative_histogram:
            A 1D numpy array. Must the cumulative sum of a histogram.
    
        :param perecent:
            A value in the range of 0-100.

        :param min_:
            The minumum value to be used in the determining the stretch.

        :param Binsize:
            The binsize used in constructing the histogram of which the cumulative histogram was then derived.
    
        :return:
            Two scalars, MaxDN and MinDN, corresponding to the maximum and minimum values of the original array to be used in the contrast stretch.
    
        :author:
            Josh Sixsmith; [email protected]; [email protected]

        :history:
           *  2013/10/24: Created

        """
    
        ch = cumulative_histogram
        if len(ch.shape) != 1:
            raise Exception('Only 1D arrays are supported.')

        # Calculate upper and lower values
        low  = (percent/100.)
        high = (1 - (percent/100.))

        # number of elements
        n = ch[-1]
    
        x1 = numpy.searchsorted(ch, n * low)
        while ch[x1] == ch[x1 + 1]:
            x1 = x1 + 1
    
        x2 = numpy.searchsorted(ch, n * high)
        while ch[x2] == ch[x2 - 1]:
            x2 = x2 - 1
    
        minDN = x1 * Binsize + min_
        maxDN = x2 * Binsize + min_
    
        return maxDN, minDN

    if (array.dtype == 'uint8'):
        MaxV = 255
        MinV = 0

    if (MaxV == None):
       MaxV = numpy.amax(array)

    if (MinV == None):
       MinV = numpy.amin(array)

    if (Top == None):
       Top = 255

    if (BinSIZE == None):
        if (array.dtype == 'uint8'):
            BinSIZE = 1
        else:
            BinSIZE = (MaxV - MinV) / 5000.

    # Retrieve the dimensions of the array
    dims = array.shape

    h = histogram(array.flatten(), binsize=BinSIZE, max=MaxV, min=MinV, omax='omax', omin='omin')

    # Need to check for omin and omax so they can be returned
    return_extra = False
    if ((type(Omin) == str) | (type(Omax) == str)):
        return_extra = True
        d = {}
        if (type(Omin) == str):
            d[Omin] = h['omin']
        if (type(Omax) == str):
            d[Omax] = h['omax']

    # Zeroing the first element of the histogram
    hist = h['histogram']
    hist[0] = 0

    cumu_hist = numpy.cumsum(hist, dtype='float')

    if (Histogram_Only):
        cumu_hist = cumu_hist.astype('int32')
        # Need to check for omin and omax so they can be returned
        if return_extra:
            return cumu_hist, d
        else:
            return cumu_hist

    # Evaluate a linear percent stretch
    if (Percent != None):
        if (Percent <= 0) or (Percent >= 100):
            raise Exception('Percent must be between 0 and 100')

        maxDN, MinDN = linear_percent(cumu_hist, percent=Percent, min_=MinV, Binsize=BinSIZE)
        scl = bytscl(array, Max=maxDN, Min=MinDN, Top=Top)
        if return_extra:
            return scl, d
        else:
            return scl

    scl_lookup = bytscl(cumu_hist, Top=Top)

    # apply the scl_lookup in order to retrieve the new scaled value
    if (type(array) == 'uint8'):
        # We know the binsize for byte data, i.e. 1
        # Clip the lower bounds
        arr = array.clip(min=MinV)
        scl = (scl_lookup[arr.flatten() - MinV]).reshape(dims)
    else:
        # We need to divide by the binsize in order to the bin position
        # Clip the lower bounds
        arr = array.clip(min=MinV)
        arr = numpy.floor((arr - MinV) / BinSIZE).astype('int')
        scl = (scl_lookup[arr.flatten()]).reshape(dims)

    if return_extra:
        return scl, d
    else:
        return scl
Ejemplo n.º 25
0
    # Create the mask via the thresholds
    mask = (array >= lower) & (array <= upper)

    # The label function segments the image into contiguous blobs
    label_array, num_labels = ndimage.label(mask, structure=s)

    # Find the labels associated with the ROI
    labels = label_array[ROIPixels]
    mx_lab = numpy.max(labels)
    # Find unique labels, excluding zero (background)
    ulabels = (numpy.unique(labels[labels > 0])
               ).tolist()  # Convert to list; Makes for neater indexing

    # Generate a histogram to find the label locations
    h = histogram(label_array.flatten(),
                  min=0,
                  max=mx_lab,
                  reverse_indices='ri')
    hist = h['histogram']
    ri = h['ri']

    for lab in ulabels:
        if hist[lab] == 0:
            continue
        idx.extend(ri[ri[lab]:ri[lab + 1]])

    idx = numpy.array(idx)
    idx = array_indices(dims, idx, dimensions=True)

    return idx
Ejemplo n.º 26
0
def plotHistogram(fname, out_dir):
    """
    """

    ds = SceneDataset(fname)

    # Retrieve and create the output base directory
    base_dir = os.path.basename(fname)
    out_dir = os.path.join(out_dir, base_dir)

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    bands = []
    for i in ds._bands['REFLECTIVE']:
        bands.append(i)
    for j in ds._bands['THERMAL']:
        bands.append(j)
    for k in ds._bands['ATMOSPHERE']:
        bands.append(k)

    pdf_name  = os.path.join(out_dir,'histogram_plots.pdf')
    plot_file = PdfPages(pdf_name)

    mx_DN_list = []
    hist_list  = []
    lab_list   = []
    for band in bands:
        b   = ds.GetRasterBand(band)
        img = b.ReadAsArray()
        h = histogram(img.ravel(), max=65535)
        hist = h['histogram']
        hist[0] = 0 # Ignore the no-data value
        if band != 9:
            hist_list.append(hist.copy())
        wh = numpy.where(hist != 0)
        mx = numpy.max(wh)
        mx_DN_list.append('Band: %i, Max DN: %i, Count: %i\n' %(band, mx, hist[mx]))
        # for plotting
        lab = 'Band %i'%band
        lab_list.append(lab)
        plt.plot(hist, label=lab)
        plt.legend()
        plt.suptitle(base_dir)
        plot_file.savefig()
        plt.close()

    # Now to output a single plot containing all (except band 9) histograms
    for i in range(len(hist_list)):
        plt.plot(hist_list[i], label=lab_list[i])

    plt.legend()
    plt.suptitle(base_dir)
    plot_file.savefig()
    plot_file.close()

    plt.close()

    out_file = open(os.path.join(out_dir, 'histogram_results.txt'), 'w')
    for line in mx_DN_list:
        out_file.write(line)

    out_file.close()
Ejemplo n.º 27
0
def binary_recursive_histogram(image, outfile, all_neighbours=False):
    """
    Recursively applies a histogram to an image with multiple bands.
    Designed for the analysing the binary results by finding counts of 1 within each band.

    :param image:
        A string containing the full file path name of a multi-band binary image.

    :param outfile:
        The output filename of the textfile that will contain the report.

    :param all_neighbours:
        If set then pixel connectivity will be 8 neighbours rather than 4. Default is 4.

    :author:
        Josh Sixsmith; [email protected], [email protected]

    :history:
        * 07/12/2013: Created
        * 11/12/2013: Added more stats to the output

    :copyright:
        Copyright (c) 2013, Josh Sixsmith
        All rights reserved.

        Redistribution and use in source and binary forms, with or without
        modification, are permitted provided that the following conditions are met:

        1. Redistributions of source code must retain the above copyright notice, this
           list of conditions and the following disclaimer.
        2. Redistributions in binary form must reproduce the above copyright notice,
           this list of conditions and the following disclaimer in the documentation
           and/or other materials provided with the distribution.

        THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
        ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
        WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
        DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
        ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
        (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
        LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
        ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
        SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

        The views and conclusions contained in the software and documentation are those
        of the authors and should not be interpreted as representing official policies,
        either expressed or implied, of the FreeBSD Project.

    """

    # Check that the directory for the output file exists.
    if not (os.path.exists(os.path.dirname(outfile))):
        if os.path.dirname(outfile) == '':  # Output to the current directory
            outfname = os.path.join(os.getcwd(), outfile)
        else:
            os.makedirs(os.path.dirname(outfile))

    #if (type image == str):
    #    ds = gdal.Open(image)
    #    nb = ds.GetRasterCount()
    ds = gdal.Open(image)
    nb = ds.RasterCount

    # Initialise the result array
    result = numpy.zeros(
        (nb, 4), dtype='int'
    )  # Only dealing with integers at this point in time. This could change in future.
    result_list = []

    # Loop over the bands
    for i in range(nb):
        band = ds.GetRasterBand(i + 1)  # GDAL uses start index of 1
        img = band.ReadAsArray()
        band.FlushCache()

        # Generate the histogram
        h = histogram(img.flatten(), Min=0)
        hist = h['histogram']

        # Segment the binary mask
        kernel = [[0, 1, 0], [1, 1, 1], [0, 1, 0]]
        if all_neighbours:
            kernel = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
        label_arr, nlabels = ndimage.label(img, structure=kernel)

        # Get min and max areas of the segmented regions
        h2 = histogram(label_arr.flatten(), Min=1)
        hist2 = h2['histogram']
        mn_area = numpy.min(hist2)
        mx_area = numpy.max(hist2)
        avg_area = numpy.mean(hist2)

        # Populate the result array if data is found. If hist.shape[0] == 1 then only the value of zero was found.
        if (hist.shape[0] >= 2):
            result[i, 0] = hist[1]
            result[i, 1] = nlabels
            result[i, 2] = mn_area
            result[i, 3] = mx_area
            result_list.append(
                '%i, %i, %i, %i, %i, %f\n' %
                (i + 1, hist[1], nlabels, mn_area, mx_area, avg_area))
        else:
            result_list.append('%i, %i, %i, %i, %i, %f\n' %
                               (i + 1, 0, 0, 0, 0, 0.0))

    mx_loc = numpy.argmax(result[:,
                                 0]) + 1  # Refer back to a 1 based band index
    mx_seg_loc = numpy.argmax(
        result[:, 1]) + 1  # Refer back to a 1 based band index
    mn_area_loc = numpy.argmin(
        result[:, 2]) + 1  # Refer back to a 1 based band index
    mx_area_loc = numpy.argmax(
        result[:, 3]) + 1  # Refer back to a 1 based band index

    outfile = open(outfname, 'w')
    outfile.write('Results from %s image file\n' % image)
    outfile.write('Band with most flagged pixels: %i\n' % mx_loc)
    outfile.write('Band with most objects: %i\n' % mx_seg_loc)
    outfile.write('\n')
    outfile.write(
        'Band, Flagged Pixels, Number of Objects, Smallest Object, Largest Object, Average Object Size\n'
    )

    for res in result_list:
        outfile.write(res)

    outfile.close()
Ejemplo n.º 28
0
def otsu_threshold(image,
                   Binsize=None,
                   Max=None,
                   Min=None,
                   Nbins=None,
                   Fast=True,
                   Apply=False):
    """
    Calculates the Otsu threshold.

    Seperates the input array into background and foreground components
    by finding the maximum between class variance.

    :param image:
        A numpy array of maximum three dimensions.

    :param Fast:
        Default is True. Will find the optimal threshold using the fast method which approximates the mean value per class.

    :param Apply:
        Default is False. If True then a mask/masks of the same dimensions as image will be returned. Otherwise only the threshold/thresholds will be returned.

    :param Binsize:
        (Optional) The binsize (Default is 1) to be used for creating the histogram.

    :param Max:
        (Optional) The maximum value to be used in creating the histogram. If not specified the array will be searched for max.

    :param Min: (Optional) The minimum value to be used in creating the histogram. If not specified the array will be searched for min.

    :param Nbins: (Optional) The number of bins to be used for creating the histogram. If set binsize is calculated as (max - min) / (nbins - 1), and the max value will be adjusted to (nbins*binsize + min).
          
    :author:
        Josh Sixsmith, [email protected]

    :history:
        * 06/02/2013--Created
        * 04/06/2013--Keywords Binsize, Nbins, Min, Max, Fast and Apply added.

    :sources:
        http://www.labbookpages.co.uk/software/imgProc/otsuThreshold.html
        http://www.codeproject.com/Articles/38319/Famous-Otsu-Thresholding-in-C
        http://en.wikipedia.org/wiki/Cumulative_frequency_analysis
        http://en.wikipedia.org/wiki/Otsu's_method

    """

    if image == None:
        print 'No input image!!!'
        return None

    dims = image.shape
    if (len(dims) > 3):
        print 'Incorrect shape!; More than 3 dimensions is not a standard image.'
        return None

    if Fast:
        if (len(dims) == 3):
            # For multi-band images, return a list of thresholds
            thresholds = []
            bands = dims[0]
            for b in range(bands):
                img = image[b].flatten()

                h = histogram(img,
                              locations='loc',
                              omin='omin',
                              binsize=Binsize,
                              Max=Max,
                              Min=Min,
                              nbins=Nbins)
                hist = h['histogram']
                omin = h['omin']
                loc = h['loc']
                binsz = numpy.abs(loc[1] - loc[0])

                cumu_hist = numpy.cumsum(hist, dtype=float)
                rcumu_hist = numpy.cumsum(hist[::-1], dtype=float)  # reverse

                total = cumu_hist[-1]

                # probabilities per threshold class
                bground_weights = cumu_hist / total
                fground_weights = 1 - bground_weights  # reverse probability
                mean_bground = numpy.zeros(hist.shape[0])
                mean_fground = numpy.zeros(hist.shape[0])
                mean_bground[0:-1] = (numpy.cumsum(hist * loc) /
                                      cumu_hist)[0:-1]
                mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) /
                                       rcumu_hist)[::-1])[1:]
                sigma_between = bground_weights * fground_weights * (
                    mean_bground - mean_fground)**2
                thresh = numpy.argmax(sigma_between)
                thresh = (thresh * binsz) + omin

                thresholds.append(thresh)

            if Apply:
                masks = numpy.zeros(dims, dtype='bool')
                for b in range(bands):
                    masks[b] = image[b] > thresholds[b]
                return masks
            else:
                return thresholds

        elif (len(dims) == 2):
            img = image.flatten()
            h = histogram(img,
                          locations='loc',
                          omin='omin',
                          binsize=Binsize,
                          Max=Max,
                          Min=Min,
                          nbins=Nbins)
            hist = h['histogram']
            omin = h['omin']
            loc = h['loc']
            binsz = numpy.abs(loc[1] - loc[0])

            cumu_hist = numpy.cumsum(hist, dtype=float)
            rcumu_hist = numpy.cumsum(hist[::-1], dtype=float)  # reverse

            total = cumu_hist[-1]

            # probabilities per threshold class
            bground_weights = cumu_hist / total
            fground_weights = 1 - bground_weights  # reverse probability
            mean_bground = numpy.zeros(hist.shape[0])
            mean_fground = numpy.zeros(hist.shape[0])
            mean_bground[0:-1] = (numpy.cumsum(hist * loc) / cumu_hist)[0:-1]
            mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) /
                                   rcumu_hist)[::-1])[1:]
            sigma_between = bground_weights * fground_weights * (
                mean_bground - mean_fground)**2
            thresh = numpy.argmax(sigma_between)
            thresh = (thresh * binsz) + omin

            threshold = thresh

            if Apply:
                mask = image > threshold
                return mask
            else:
                return threshold

        elif (len(dims) == 1):
            h = histogram(image,
                          locations='loc',
                          omin='omin',
                          binsize=Binsize,
                          Max=Max,
                          Min=Min,
                          nbins=Nbins)
            hist = h['histogram']
            omin = h['omin']
            loc = h['loc']
            binsz = numpy.abs(loc[1] - loc[0])

            cumu_hist = numpy.cumsum(hist, dtype=float)
            rcumu_hist = numpy.cumsum(hist[::-1], dtype=float)  # reverse

            total = cumu_hist[-1]

            # probabilities per threshold class
            bground_weights = cumu_hist / total
            fground_weights = 1 - bground_weights  # reverse probability

            # Calculate the mean of background and foreground classes.
            mean_bground = numpy.zeros(hist.shape[0])
            mean_fground = numpy.zeros(hist.shape[0])
            mean_bground[0:-1] = (numpy.cumsum(hist * loc) / cumu_hist)[0:-1]
            mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) /
                                   rcumu_hist)[::-1])[1:]
            sigma_between = bground_weights * fground_weights * (
                mean_bground - mean_fground)**2
            thresh = numpy.argmax(sigma_between)
            thresh = (thresh * binsz) + omin

            threshold = thresh

            if Apply:
                mask = image > threshold
                return mask
            else:
                return threshold

    else:
        if (len(dims) == 3):
            # For multi-band images, return a list of thresholds
            thresholds = []
            bands = dims[0]
            for b in range(bands):
                img = image[b].flatten()
                h = histogram(img,
                              reverse_indices='ri',
                              omin='omin',
                              locations='loc',
                              binsize=Binsize,
                              Max=Max,
                              Min=Min,
                              nbins=Nbins)

                hist = h['histogram']
                ri = h['ri']
                omin = h['omin']
                loc = h['loc']

                nbins = hist.shape[0]
                binsz = numpy.abs(loc[1] - loc[0])

                nB = numpy.cumsum(hist, dtype='int64')
                total = nB[-1]
                nF = total - nB

                # should't be a problem to start at zero. best_sigma should (by design) always be positive
                best_sigma = 0
                # set to loc[0], thresholds can be negative
                optimal_t = loc[0]

                for i in range(nbins):
                    # get bin zero to the threshold 'i', then 'i' to nbins
                    if ((ri[i + 1] > ri[0]) and (ri[nbins] > ri[i + 1])):
                        mean_b = numpy.mean(img[ri[ri[0]:ri[i + 1]]],
                                            dtype='float64')
                        mean_f = numpy.mean(img[ri[ri[i + 1]:ri[nbins]]],
                                            dtype='float64')
                        sigma_btwn = nB[i] * nF[i] * ((mean_b - mean_f)**2)
                        if (sigma_btwn > best_sigma):
                            best_sigma = sigma_btwn
                            optimal_t = loc[i]

                thresholds.append(optimal_t)

            if Apply:
                masks = numpy.zeros(dims, dtype='bool')
                for b in range(bands):
                    masks[b] = image[b] > thresholds[b]
                return masks
            else:
                return thresholds

        elif (len(dims) == 2):
            img = image.flatten()
            h = histogram(img,
                          reverse_indices='ri',
                          omin='omin',
                          locations='loc',
                          binsize=Binsize,
                          Max=Max,
                          Min=Min,
                          nbins=Nbins)

            hist = h['histogram']
            ri = h['ri']
            omin = h['omin']
            loc = h['loc']

            nbins = hist.shape[0]
            binsz = numpy.abs(loc[1] - loc[0])

            nB = numpy.cumsum(hist, dtype='int64')
            total = nB[-1]
            nF = total - nB

            # should't be a problem to start at zero. best_sigma should (by design) always be positive
            best_sigma = 0
            # set to loc[0], thresholds can be negative
            optimal_t = loc[0]

            for i in range(nbins):
                # get bin zero to the threshold 'i', then 'i' to nbins
                if ((ri[i + 1] > ri[0]) and (ri[nbins] > ri[i + 1])):
                    mean_b = numpy.mean(img[ri[ri[0]:ri[i + 1]]],
                                        dtype='float64')
                    mean_f = numpy.mean(img[ri[ri[i + 1]:ri[nbins]]],
                                        dtype='float64')
                    sigma_btwn = nB[i] * nF[i] * ((mean_b - mean_f)**2)
                    if (sigma_btwn > best_sigma):
                        best_sigma = sigma_btwn
                        optimal_t = loc[i]

            threshold = optimal_t
            if Apply:
                mask = image > threshold
                return mask
            else:
                return threshold

        elif (len(dims) == 1):
            h = histogram(image,
                          reverse_indices='ri',
                          omin='omin',
                          locations='loc',
                          binsize=Binsize,
                          Max=Max,
                          Min=Min,
                          nbins=Nbins)

            hist = h['histogram']
            ri = h['ri']
            omin = h['omin']
            loc = h['loc']

            nbins = hist.shape[0]
            binsz = numpy.abs(loc[1] - loc[0])

            nB = numpy.cumsum(hist, dtype='int64')
            total = nB[-1]
            nF = total - nB

            # should't be a problem to start at zero. best_sigma should (by design) always be positive
            best_sigma = 0
            # set to loc[0], thresholds can be negative
            optimal_t = loc[0]

            for i in range(nbins):
                # get bin zero to the threshold 'i', then 'i' to nbins
                if ((ri[i + 1] > ri[0]) and (ri[nbins] > ri[i + 1])):
                    mean_b = numpy.mean(image[ri[ri[0]:ri[i + 1]]],
                                        dtype='float64')
                    mean_f = numpy.mean(image[ri[ri[i + 1]:ri[nbins]]],
                                        dtype='float64')
                    sigma_btwn = nB[i] * nF[i] * ((mean_b - mean_f)**2)
                    if (sigma_btwn > best_sigma):
                        best_sigma = sigma_btwn
                        optimal_t = loc[i]

            threshold = optimal_t
            if Apply:
                mask = image > threshold
                return mask
            else:
                return threshold
Ejemplo n.º 29
0
def main(indir, outdir, logpath, pattern, vector_file, outfname):
    """
    The main processing routine.

    :param indir:
        A string containing the file system pathname to a directory
        containing the water extent image files.

    :param outdir:
        A string containing the file system pathname to a directory
        that will contain the result output.

    :param logpath:
        A string containing the file system pathname to a directory
        that will contain the operation system logging information.

    :param pattern:
        A string containing the image extents file extension pattern,
        eg '*.tif'.

    :param vector_file:
        A string containing the file system pathname to an OGR
        compatible vector file.

    :param outfname):
        A string containing the ststem file pathname for the output
        csv file.

    :return:
        Nothing, main() acts as a procedure.
    """

    # setup logging file ... log to <outputPath>/../logs/createWaterExtent_<hostname>_pid.log
    logPath = os.path.join(logpath,"waterExtentVectorSummary_%s_%d.log" % (os.uname()[1], os.getpid()))
    logging.basicConfig(filename=logPath,format='%(asctime)s %(levelname)s: %(message)s', datefmt='%d/%m/%Y %H:%M:%S', level=logging.INFO)


    baseOutputDir = Directory(outdir)
    if not baseOutputDir.exists():
        logging.error("%s does not exist" % baseOutputDir.getPath())
        sys.exit(1)

    # Get a list of water_extent files
    files = getFiles(indir, pattern)

    # Get the water_extent objects and sort them by date
    sortedWaterExtents, cellId = getWaterExtents(files)

    # lat and lon will be helpful
    lon = cellId[0]
    lat = cellId[1]

    # we output to a lon_lat subdirectory in the base output directory
    # create it
    outputPath = "%s/%03d_%04d" % (baseOutputDir.getPath(), lon, lat)
    outputDir = Directory(outputPath)
    outputDir.makedirs()
    logging.info("output directory is %s" %outputDir.getPath())

    # Rasterise the features
    # We can use the first image file as the base
    segments_ds = Rasterise(RasterFilename=files[0], VectorFilename=vector_file)
    logging.info("Rasterising features.")
    segments_ds.rasterise()

    # Extract the array
    veg2rast = segments_ds.segemented_array

    # Initialise the segment visitor
    seg_vis = SegmentVisitor(veg2rast)

    # Get specific attribute records
    logging.info("Opening vector file %s" %vector_file)
    vec_ds  = ogr.Open(vector_file)
    layer   = vec_ds.GetLayer()

    # Initialise dicts to hold feature names, and hydro_id
    feature_names = {}
    hydro_id      = {}

    # Dicts to hold forward and backward mapping of fid's and seg id's
    seg2fid = {}
    fid2seg = {}

    logging.info("Gathering attribute information for each feature.")
    # These Field Id's are unique to NGIG's vector datasets
    for feature in layer:
        fid                = feature.GetFID()
        feature_names[fid] = feature.GetField("NAME")
        hydro_id[fid]      = feature.GetField("AUSHYDRO_I")
        seg2fid[fid+1]     = fid
        fid2seg[fid]       = fid + 1

    # Go back to the start of the vector file
    layer.ResetReading()

    # Replace any occurences of None with UNKNOWN
    for key in feature_names.keys():
        if feature_names[key] == None:
            feature_names[key] = 'UNKNOWN'

    # TODO Define dict lookup for potential segments up to max segment

    # Initialise the output file
    full_fname = os.path.join(outputDir.getPath(), outfname)
    logging.info("Creating output summary file %s"%full_fname)
    outcsv = open(full_fname, 'w')

    # Define the headings for the output file
    headings = ("Time Slice, Time Stamp, Feature Name, AUSHYDRO_ID, "
                "FID, Total Pixel Count, WATER_NOT_PRESENT, "
                "NO_DATA, MASKED_NO_CONTIGUITY, "
                "MASKED_SEA_WATER, MASKED_TERRAIN_SHADOW, "
                "MASKED_HIGH_SLOPE, MASKED_CLOUD_SHADOW, "
                "MASKED_CLOUD, WATER_PRESENT\n")

    # Write the headings to disk
    outcsv.write(textwrap.dedent(headings))

    # Loop over each WaterExtent file
    for waterExtent in sortedWaterExtents:
        logging.info("Processing %s" % waterExtent.filename)


        # Read the waterLayer from the extent file
        waterLayer = waterExtent.getArray()

        # timestamp
        timestamp = waterLayer.timestamp
        str_time = timestamp.strftime('%Y-%m-%d %H:%M:%S.%f')

        # Loop over each feature Id
        # Skip any FID's that don't exist in the current spatial extent
        for key in fid2seg.keys():
            if fid2seg[key] > seg_vis.max_segID:
                continue
            data = seg_vis.getSegmentData(waterLayer, segmentID=fid2seg[key])
            dim  = data.shape
            #pdb.set_trace()
            # Returns are 1D arrays, so check if we have an empty array
            if dim[0] == 0:
                continue # Empty bin, (no data), skipping
            h    = histogram(data, Min=0, Max=128)
            hist = h['histogram']
            total_area = dim[0]


            """
            A WaterTile stores 1 data layer encoded as unsigned BYTE values as described in the WaterConstants.py file.

            Note - legal (decimal) values are:

                   0:  no water in pixel
                   1:  no data (one or more bands) in source NBAR image
               2-127:  pixel masked for some reason (refer to MASKED bits)
                 128:  water in pixel

            Values 129-255 are illegal (i.e. if bit 7 set, all others must be unset)


            WATER_PRESENT          (dec 128) bit 7: 1=water present, 0=no water if all other bits zero
            MASKED_CLOUD           (dec 64)  bit 6: 1=pixel masked out due to cloud, 0=unmasked
            MASKED_CLOUD_SHADOW    (dec 32)  bit 5: 1=pixel masked out due to cloud shadow, 0=unmasked
            MASKED_HIGH_SLOPE      (dec 16)  bit 4: 1=pixel masked out due to high slope, 0=unmasked
            MASKED_TERRAIN_SHADOW  (dec 8)   bit 3: 1=pixel masked out due to terrain shadow, 0=unmasked
            MASKED_SEA_WATER       (dec 4)   bit 2: 1=pixel masked out due to being over sea, 0=unmasked
            MASKED_NO_CONTIGUITY   (dec 2)   bit 1: 1=pixel masked out due to lack of data contiguity, 0=unmasked
            NO_DATA                (dec 1)   bit 0: 1=pixel masked out due to NO_DATA in NBAR source, 0=valid data in NBAR
            WATER_NOT_PRESENT      (dec 0)          All bits zero indicated valid observation, no water present
            """

            # [0..128] bins were generated, i.e 129 bins
            WATER_NOT_PRESENT     = hist[0]
            NO_DATA               = hist[1]
            MASKED_NO_CONTIGUITY  = hist[2]
            MASKED_SEA_WATER      = hist[4]
            MASKED_TERRAIN_SHADOW = hist[8]
            MASKED_HIGH_SLOPE     = hist[16]
            MASKED_CLOUD_SHADOW   = hist[32]
            MASKED_CLOUD          = hist[64]
            WATER_PRESENT         = hist[128]

            # Now to output counts per feature
            # TODO update to Python's newer version of string insertion
            #s = "%s, %s, %s, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d\n" %(waterExtent.filename,
            #                                                             str_time,
            #                                                             feature_names[key],
            #                                                             hydro_id[key],
            #                                                             total_area,
            #                                                             WATER_NOT_PRESENT,
            #                                                             NO_DATA,
            #                                                             MASKED_NO_CONTIGUITY,
            #                                                             MASKED_SEA_WATER,
            #                                                             MASKED_TERRAIN_SHADOW,
            #                                                             MASKED_HIGH_SLOPE,
            #                                                             MASKED_CLOUD_SHADOW,
            #                                                             MASKED_CLOUD,
            #                                                             WATER_PRESENT)

            s = ('{fname}, {timestamp}, {feature_name}, {hydro_id}, {FID}, '
                 '{area}, {not_water}, {no_data}, {contiguity}, {sea}, '
                 '{terrain}, {slope}, {cloud_shadow}, {cloud}, {water}\n')

            format_dict = {'fname': waterExtent.filename,
                           'timestamp': str_time,
                           'feature_name': feature_names[key],
                           'hydro_id': hydro_id[key],
                           'FID': seg2fid[key],
                           'area': total_area,
                           'not_water': WATER_NOT_PRESENT,
                           'no_data': NO_DATA,
                           'contiguity': MASKED_NO_CONTIGUITY,
                           'sea': MASKED_SEA_WATER,
                           'terrain': MASKED_TERRAIN_SHADOW,
                           'slope': MASKED_HIGH_SLOPE,
                           'cloud_shadow': MASKED_CLOUD_SHADOW,
                           'cloud': MASKED_CLOUD,
                           'water': WATER_PRESENT}

            s.format(format_dict)
            outcsv.write(s)

    outcsv.close()
Ejemplo n.º 30
0
def tiled_main(vector_file, cell_list, indir, outdir, pattern, logpath):
    """
    
    """
    # setup logging file ... log to <outputPath>/../logs/createWaterExtent_<hostname>_pid.log
    log_file = "waterExtentVectorSummary_{}_{}.log".format(os.uname()[1],
                                                           os.getpid())
    logPath = os.path.join(logpath, log_file)
    logging.basicConfig(filename=logPath,
                        format='%(asctime)s %(levelname)s: %(message)s',
                        datefmt='%d/%m/%Y %H:%M:%S', level=logging.INFO)

    baseOutputDir = Directory(outdir)
    if not baseOutputDir.exists():
        logging.error("%s does not exist" % baseOutputDir.getPath())
        sys.exit(1)

    logging.info("Opening vector file %s" %vector_file)
    vec_ds  = ogr.Open(vector_file)
    layer   = vec_ds.GetLayer()

    # Initialise dicts to hold feature names, and hydro_id
    feature_names = {}
    hydro_id      = {}

    # Dicts to hold forward and backward mapping of fid's and seg id's
    seg2fid = {}
    fid2seg = {}

    fid_list = []

    fid_df = {}

    logging.info("Gathering attribute information for each feature.")
    # These Field Id's are unique to NGIG's vector datasets
    for feature in layer:
        fid                = feature.GetFID()
        feature_names[fid] = feature.GetField("NAME")
        hydro_id[fid]      = feature.GetField("AUSHYDRO_I")
        seg2fid[fid+1]     = fid
        fid2seg[fid]       = fid + 1
        fid_list.append(fid)
        fid_df[fid] = pandas.DataFrame()

    # Initialise the dataframe to store the results
    df = pandas.DataFrame()
    df['FID'] = fid_list

    nfeatures = len(fid_list)
    min_fid = df['FID'].min()
    max_fid = df['FID'].max()

    # We offset the min and max fid's by 1 as the rasterisation will be
    # created that way
    h = histogram(numpy.zeros((10), dtype='int32'), Max=max_fid+1, Min=min_fid+1)
    # This will be used as the input keyword and changes will be made in place
    t_area = h['histogram']

    # Create an output file that we can continually append data
    store = pandas.HDFStore(os.path.join(outdir, 'Test_Results.h5'))

    for cell in cell_list:
        logging.info("Processing Cell ID: {}".format(cell))
        celldir = os.path.join(indir, cell)
        # processing here
        st = datetime.datetime.now()
        result_df = tiled_processing(vector_file, t_area, min_fid, max_fid, celldir, pattern)
        et = datetime.datetime.now()
        print "Tiled process time taken: {}".format(et - st)
        # We don't need to define cols up front
        # We can define an empty dataframe and append to it
        # That way cols can be defined within the script
        # but how do we combine records of the same fid, & date but different cell???
        # do we need to know the cols then? maybe cols should only contain counts???
        st = datetime.datetime.now()
        for key in result_df:
            #fid_df[key] = fid_df[key].append(result_df, ignore_index=True)
            # Group names shouldn't start with a number
            group_name = "FID_{}".format(key)
            store.append(group_name, result_df[key])
        et = datetime.datetime.now()
        print "Append to h5 time taken: {}".format(et - st)

    # Combine FIDs with identical timestamps and sum the pixel counts
    # Including the hydro_id and fid as groupby's should exclude them from
    # the summation.
    # The filename and Feature Name fields will be removed as a result of the
    # summation. Feature Name could potentially be kept
    group_items = ['Time Stamp', 'AUSHYDRO_ID', 'FID']
    st = datetime.datetime.now()
    #for key in fid_df:
    for key in store.keys():
        #group_name = "FID_{}".format(key)
        #fid_df[key] = fid_df[key].groupby(group_items).sum()
        # Combine results and overwrite the 
        #store[group_name] = store[group_name].groupby(group_items).sum()
        store[key] = store[key].groupby(group_items).sum()
    et = datetime.datetime.now()
    print "Group by time taken: {}".format(et - st)

    # Now to output the excel files
    #fname1 = os.path.join(outdir, 'Result_Combined.xls')
    #excel_file1 = pandas.ExcelWriter(fname1)

    #combined_df = pandas.DataFrame()

    #fname2 = os.path.join(outdir, 'Result_Multiple_Sheets.xls')
    #excel_file2 = pandas.ExcelWriter(fname2)

    #for key in fid_df:
    ##for key in store.keys():
    #    group_name = "FID_{}".format(key)
    #    sheet_name = 'FID {fid}'.format(fid=key)
    #    data = store[group_name]
    #    #fid_df[key].to_excel(excel_file2, sheet_name)
    #    data.to_excel(excel_file2, sheet_name)
    #    #combined_df = combined_df.append(fid_df[key])
    #    combined_df = combined_df.append(data)
    #    #fid_df[key] = None # Attempt to conserve memory

    #combined_df.to_excel(excel_file1, 'Sheet1')

    # Save and close the files
    store.close()
Ejemplo n.º 31
0
def perimeter(array, labelled=False, all_neighbors=False):
    """
    Calculates the perimeter per object.
    """

    # Construct the kernel to be used for the erosion process
    if all_neighbors:
        k = [[1,1,1],[1,1,1],[1,1,1]]
    else:
        k = [[0,1,0],[1,1,1],[0,1,0]]

    if labelled:
        # Calculate the histogram of the labelled array and retrive the indices
        h    = histogram(array.flatten(), Min=1, reverse_indices='ri')
        hist = h['histogram']
        ri   = h['ri']
        arr  = array > 0
    else:
        # Label the array to assign id's to segments/regions
        lab, num = ndimage.label(array, k)

        # Calculate the histogram of the labelled array and retrive the indices
        h    = histogram(lab.flatten(), Min=1, reverse_indices='ri')
        hist = h['histogram']
        ri   = h['ri']
        arr = array

    # Erode the image
    erode = ndimage.binary_erosion(arr, k)

    # Get the borders of each object/region/segment
    obj_borders = arr - erode

    # There is potential here for the kernel to miss object borders containing diagonal features
    # Force the kernel to include all neighbouring pixels
    #k = [[1,1,1],[1,1,1],[1,1,1]]
    #label_arr, n_labels = ndimage.label(obj_borders, k)
    #TODO
    # An alternative would be to use the reverse_indices of the original objects.
    # It shouldn't matter if they point to zero in the convolve array as the second histogram will exclude them.

    #h    = histogram(label_arr.flatten(), min=1, reverse_indices='ri')
    #hist = h['histogram']
    #ri   = h['ri']

    # Construct the perimeter kernel
    k2 = [[10,2,10],[2,1,2],[10,2,10]]
    convolved = ndimage.convolve(obj_borders, k2, mode='constant', cval=0.0) # pixels on array border only use values within the array extents

    # Initialise the perimeter list
    perim   = []

    # Calculate the weights to be used for each edge pixel's contribution
    sqrt2   = numpy.sqrt(2.)
    weights = numpy.zeros(50)
    weights[[5,7,15,17,25,27]] = 1 # case (a)
    weights[[21,33]] = sqrt2 # case (b)
    weights[[13,23]] = (1. + sqrt2) / 2. # case (c)

    for i in numpy.arange(hist.shape[0]):
        #if hist[i] # Probable don't need this check, as ndimage.label() should provide consecutive labels
        h_i    = histogram(convolved[ri[ri[i]:ri[i+1]]], Min=1, Max=50)
        hist_i = h_i['histogram']
        perim.append(numpy.dot(hist_i, weights))

    perim = numpy.array(perim)

    return perim
Ejemplo n.º 32
0
def tiled_processing(vector_file, input_hist, Min_id, Max_id, indir, pattern):
    """
    The main processing routine.

    :param indir:
        A string containing the file system pathname to a directory
        containing the water extent image files.

    :param outdir:
        A string containing the file system pathname to a directory
        that will contain the result output.

    :param logpath:
        A string containing the file system pathname to a directory
        that will contain the operation system logging information.

    :param pattern:
        A string containing the image extents file extension pattern,
        eg '*.tif'.

    :param vector_file:
        A string containing the file system pathname to an OGR
        compatible vector file.

    :param outfname):
        A string containing the ststem file pathname for the output
        csv file.

    :return:
        Nothing, main() acts as a procedure.
    """
    # Get a list of water_extent files
    files = getFiles(indir, pattern)

    # Get the water_extent objects and sort them by date
    sortedWaterExtents, cellId = getWaterExtents(files)

    # lat and lon will be helpful
    lon = cellId[0]
    lat = cellId[1]

    # Rasterise the features
    # We can use the first image file as the base
    st = datetime.datetime.now()
    segments_ds = Rasterise(RasterFilename=files[0], VectorFilename=vector_file)
    logging.info("Rasterising features.")
    segments_ds.rasterise()
    et = datetime.datetime.now()
    print "Rasterisation time taken: {}".format(et - st)

    # Extract the array
    veg2rast = segments_ds.segemented_array

    # Initialise the segment visitor
    seg_vis = SegmentVisitor(veg2rast)

    # Update the total area (recursive histogram technique)
    # input keyword modifies in-place
    recursive_h = histogram(veg2rast.ravel(), input=input_hist, Min=Min_id, Max=Max_id)

    # Get specific attribute records
    logging.info("Opening vector file %s" %vector_file)
    vec_ds  = ogr.Open(vector_file)
    layer   = vec_ds.GetLayer()

    # Define the headings for the data frame
    headings = ["Filename", "Time Stamp", "Feature Name", "AUSHYDRO_ID",
                "FID", "Total Pixel Count", "WATER_NOT_PRESENT",
                "NO_DATA", "MASKED_NO_CONTIGUITY",
                "MASKED_SEA_WATER", "MASKED_TERRAIN_SHADOW",
                "MASKED_HIGH_SLOPE", "MASKED_CLOUD_SHADOW",
                "MASKED_CLOUD", "WATER_PRESENT"]

    # Initialise dicts to hold feature names, hydro_id and data frame
    feature_names = {}
    hydro_id      = {}
    fid_df        = {}

    # Dicts to hold forward and backward mapping of fid's and seg id's
    seg2fid = {}
    fid2seg = {}

    logging.info("Gathering attribute information for each feature.")
    # These Field Id's are unique to NGIG's vector datasets
    for feature in layer:
        fid                = feature.GetFID()
        feature_names[fid] = feature.GetField("NAME")
        hydro_id[fid]      = feature.GetField("AUSHYDRO_I")
        seg2fid[fid+1]     = fid
        fid2seg[fid]       = fid + 1
        fid_df[fid] = pandas.DataFrame(columns=headings)

    # Go back to the start of the vector file
    layer.ResetReading()

    # Replace any occurences of None with UNKNOWN
    for key in feature_names:
        if feature_names[key] == None:
            feature_names[key] = 'UNKNOWN'

    # TODO Define dict lookup for potential segments up to max segment

    # Loop over each WaterExtent file
    for waterExtent in sortedWaterExtents:
        logging.info("Processing %s" % waterExtent.filename)


        # Read the waterLayer from the extent file
        waterLayer = waterExtent.getArray()

        # timestamp
        timestamp = waterExtent.getDatetime()
        #str_time = timestamp.strftime('%Y-%m-%d %H:%M:%S.%f')

        # Loop over each feature Id
        # Skip any FID's that don't exist in the current spatial extent
        for key in fid2seg:
            if fid2seg[key] > seg_vis.max_segID:
                continue
            data = seg_vis.getSegmentData(waterLayer, segmentID=fid2seg[key])
            dim  = data.shape
            #pdb.set_trace()
            # Returns are 1D arrays, so check if we have an empty array
            if dim[0] == 0:
                continue # Empty bin, (no data), skipping
            FID = key
            h    = histogram(data, Min=0, Max=128)
            hist = h['histogram']
            total_area = dim[0]


            """
            A WaterTile stores 1 data layer encoded as unsigned BYTE values as described in the WaterConstants.py file.

            Note - legal (decimal) values are:

                   0:  no water in pixel
                   1:  no data (one or more bands) in source NBAR image
               2-127:  pixel masked for some reason (refer to MASKED bits)
                 128:  water in pixel

            Values 129-255 are illegal (i.e. if bit 7 set, all others must be unset)


            WATER_PRESENT          (dec 128) bit 7: 1=water present, 0=no water if all other bits zero
            MASKED_CLOUD           (dec 64)  bit 6: 1=pixel masked out due to cloud, 0=unmasked
            MASKED_CLOUD_SHADOW    (dec 32)  bit 5: 1=pixel masked out due to cloud shadow, 0=unmasked
            MASKED_HIGH_SLOPE      (dec 16)  bit 4: 1=pixel masked out due to high slope, 0=unmasked
            MASKED_TERRAIN_SHADOW  (dec 8)   bit 3: 1=pixel masked out due to terrain shadow, 0=unmasked
            MASKED_SEA_WATER       (dec 4)   bit 2: 1=pixel masked out due to being over sea, 0=unmasked
            MASKED_NO_CONTIGUITY   (dec 2)   bit 1: 1=pixel masked out due to lack of data contiguity, 0=unmasked
            NO_DATA                (dec 1)   bit 0: 1=pixel masked out due to NO_DATA in NBAR source, 0=valid data in NBAR
            WATER_NOT_PRESENT      (dec 0)          All bits zero indicated valid observation, no water present
            """

            # [0..128] bins were generated, i.e 129 bins
            WATER_NOT_PRESENT     = hist[0]
            NO_DATA               = hist[1]
            MASKED_NO_CONTIGUITY  = hist[2]
            MASKED_SEA_WATER      = hist[4]
            MASKED_TERRAIN_SHADOW = hist[8]
            MASKED_HIGH_SLOPE     = hist[16]
            MASKED_CLOUD_SHADOW   = hist[32]
            MASKED_CLOUD          = hist[64]
            WATER_PRESENT         = hist[128]

            format_dict = {'Filename': waterExtent.filename,
                           'Time Stamp': timestamp,
                           'Feature Name': feature_names[key],
                           'AUSHYDRO_ID': hydro_id[key],
                           'FID': FID,
                           'Total Pixel Count': total_area,
                           'WATER_NOT_PRESENT': WATER_NOT_PRESENT,
                           'NO_DATA': NO_DATA,
                           'MASKED_NO_CONTIGUITY': MASKED_NO_CONTIGUITY,
                           'MASKED_SEA_WATER': MASKED_SEA_WATER,
                           'MASKED_TERRAIN_SHADOW': MASKED_TERRAIN_SHADOW,
                           'MASKED_HIGH_SLOPE': MASKED_HIGH_SLOPE,
                           'MASKED_CLOUD_SHADOW': MASKED_CLOUD_SHADOW,
                           'MASKED_CLOUD': MASKED_CLOUD,
                           'WATER_PRESENT': WATER_PRESENT}

            # Append the new data to the FID data frame
            fid_df[FID] = fid_df[FID].append(format_dict, ignore_index=True)

    return fid_df
Ejemplo n.º 33
0
        assert array.ndim == 2, "Dimensions of array must be 2D!\n Supplied array is %i"%array.ndim

        self.array   = array
        self.array1D = array.ravel()

        self.dims = array.shape

        self.histogram = None
        self.ri        = None

    def _findSegements(self)
        """
        
        """

        h = histogram(self.array1D, min=0, reverse_indices='ri')

        self.histogram = h['histogram']
        self.ri        = h['ri']

        self.min_segID = numpy.min(self.array > 0)
        self.max_segID = numpy.max(self.array)

    def getSegementData(self, array, segmentID=1):
        """
        Retrieve the data from an array corresponding to a segmentID.
        """

        ri       = self.ri
        i        = segmentID
        arr_flat = array.ravel()
Ejemplo n.º 34
0
def plotHistogram(fname, out_dir):
    """
    """

    ds = SceneDataset(fname)

    # Retrieve and create the output base directory
    base_dir = os.path.basename(fname)
    out_dir = os.path.join(out_dir, base_dir)

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    bands = []
    for i in ds._bands['REFLECTIVE']:
        bands.append(i)
    for j in ds._bands['THERMAL']:
        bands.append(j)
    for k in ds._bands['ATMOSPHERE']:
        bands.append(k)

    pdf_name = os.path.join(out_dir, 'histogram_plots.pdf')
    plot_file = PdfPages(pdf_name)

    mx_DN_list = []
    hist_list = []
    lab_list = []
    for band in bands:
        b = ds.GetRasterBand(band)
        img = b.ReadAsArray()
        h = histogram(img.ravel(), max=65535)
        hist = h['histogram']
        hist[0] = 0  # Ignore the no-data value
        if band != 9:
            hist_list.append(hist.copy())
        wh = numpy.where(hist != 0)
        mx = numpy.max(wh)
        mx_DN_list.append('Band: %i, Max DN: %i, Count: %i\n' %
                          (band, mx, hist[mx]))
        # for plotting
        lab = 'Band %i' % band
        lab_list.append(lab)
        plt.plot(hist, label=lab)
        plt.legend()
        plt.suptitle(base_dir)
        plot_file.savefig()
        plt.close()

    # Now to output a single plot containing all (except band 9) histograms
    for i in range(len(hist_list)):
        plt.plot(hist_list[i], label=lab_list[i])

    plt.legend()
    plt.suptitle(base_dir)
    plot_file.savefig()
    plot_file.close()

    plt.close()

    out_file = open(os.path.join(out_dir, 'histogram_results.txt'), 'w')
    for line in mx_DN_list:
        out_file.write(line)

    out_file.close()
Ejemplo n.º 35
0
def obj_get_boundary_method1(labelled_array, fill_holes=True):
    """
    Get the pixels that mark the object boundary/perimeter.
    Method 1.
    
    8 neighbourhood chain code

         5 6 7
         4 . 0
         3 2 1

    4 neighbourhood chain code

         . 3 .
         2 . 0
         . 1 .

    """

    dims = labelled_array.shape
    rows = dims[0]
    cols = dims[1]

    if fill_holes:
        orig_binary = (labelled_array > 0).astype('uint8')
        fill = obj_fill_holes(labelled_array)
        s = [[1,1,1],[1,1,1],[1,1,1]]
        labelled_array, nlabels = ndimage.label(fill, structure=s)

    # We'll opt for the perimeter co-ordinates to be ordered in a clockwise fashion. GIS convention???
    pix_directions = numpy.array([[ 0, 1],
                                  [ 1, 1],
                                  [ 1, 0],
                                  [ 1,-1],
                                  [ 0,-1],
                                  [-1,-1],
                                  [-1, 0],
                                  [-1, 1]])

    # Set up the distances as we traverse across a pixel
    diag           = numpy.sqrt(2.0) # NumPy will return a float64, but just in case future versions change....
    pix_distances  = {0 : 1.0,
                      1 : diag,
                      2 : 1.0,
                      3 : diag,
                      4 : 1.0,
                      5 : diag,
                      6 : 1.0,
                      7, diag
                     }

    # Determine the co-ordinates (indices) of each segement
    # The first index of each segment will be used to define the start and end of a boundary/perimeter
    h = histogram(labelled_array.flatten(), Min=1, reverse_indices='ri')
    hist = h['histogram']
    ri = h['ri']
    nlabels = hist.shape[0]
    seg_start_idxs = numpy.zeros(nlabels, dtype='int')

    # Boundary or perimeter ?? Will go with perimeter, akin to a method implement earlier which uses a
    # convolution operator to determine perimeter length.

    # Obtain the start indices of each segment/object
    for i in numpy.arange(nlabels):
        #if (hist[i] == 0): # The labeled array should be consecutive
        #    continue
        seg_start_idxs[i] = ri[ri[i]:ri[i+1]][0] # Return the first index

    # Convert the 1D indices to 2D indices used by NumPy
    seg_start_idxs = array_indices(dims, seg_start_idxs, dimensions=True)

    # Lots to figure out here. Dealing with 'from' and 'too' directions can make things confusing
    # Keep track of the direction we last travelled, that way we can start at the next clockwise direction
    # For single pixel objects or 'islands' use the histogram value to skip the follow boundary/search routine

    """
    The memory of the order of the array should be 'C-Style': column, column, column, then next row.

    eg a 2x5 array

        0, 1, 2, 3, 4
        5, 6, 7, 8, 9

    Therefore the first index will only have labels to the right and below (below left, below right).

    eg an object with a label ID of 6

       0, 0, 6, 6, 6
       6, 6, 6, 6, 6

    As such, the first direction travelled will be to the right '0' in the freeman chain, and the first index will be the
    first boundary co-ordinate and will be the final co-ordinate to close the boundary thereby creating a polygon.
    As for linear features...
    """

    # Probably deal with these inside the boundary tracking routine
    # Let the tracking routine handle to/from and just return the final result
    to_   = 0
    from_ = 4

    perimeter_info = {}

    for i in range(hist.shape[0]):
        if hist[i] == 0:
            continue
        if hist[i] == 1:
            # What is the perimeter of a single pixel, 0.0, 4.0???
            #perimeter_co_ords[i] = seg_start_idxs[i[0],i[1]] # Still need to design the function and how to return the result
            continue
        idx   = (seg_start_idxs[0][i], seg_start_idxs[1][i])
        label = i + 1
        perimeter_info[i] = track_object_boundary(labelled_array, start_index=idx, label_id=label)

    # Might need to format the perimeter_info dictionary before returning, ie turn the co-ords into numpy arrays.
    # Or even into a polygon object using the shapely library???
    # Using shapely might be easier to report geometrical attributes

    # Still need to deal with holes within an object as ENVI does. They will increase an objects perimeter length.
    # SciPy have a binary_fill_holes function. label the filled array, then get the indices, and then retrive only those indices
    # for each object that are 0 in the original array.
    # That might be one way to do it, which means re-writing the above function....ughhh :)

    #!!!!This isn't the correct place for the handling of object holes, but just get the rough structure out!!!
    if fill_holes:
        for i in range(hist.shape[0]):
            if hist[i] == 0:
                continue
            if hist[i] == 1:
                # What is the perimeter of a single pixel, 0.0, 4.0???
                continue
            idx   = (seg_start_idxs[0][i], seg_start_idxs[1][i])
            label = i + 1
            perimeter_info[i] = track_object_boundary(labelled_array, start_index=idx, label_id=label)
            # Can we trust that the labelling of the filled and unfilled arrays will give the same object index??
            # If we can we could use the area differences to determine if there are holes and only go through the 
            # hole perimeter tracking if needed.
            single_object = numpy.zeros((rows*cols), dtype='uint8')
            single_object[ri[ri[i]:ri[i+1]]] = 1
            holes = numexpr.evaluate("(single_object - orig_binary) == 1")
            labs, nlabs = ndimage.label(holes, s)
            h_holes = histogram(labs, Min=1, reverse_indices='ri')
            hist_holes = h_holes['histogram']
            ri_h = h_holes['ri']
            seg_holes_start_idxs = numpy.zeros(nlabs, dtype='int')
            for j in numpy.arange(nlabels):
                #if (hist[i] == 0): # The labeled array should be consecutive
                #    continue
                seg_holes_start_idxs[j] = ri_h[ri_h[j]:ri_h[j+1]][0] # Return the first index

            # Convert the 1D indices to 2D indices used by NumPy
            seg_holes_start_idxs = array_indices(dims, seg_holes_start_idxs, dimensions=True)
            for k in range(hist_holes.shape):
                if hist_holes[k] == 0:
                    continue
                if hist[i] == 1:
                    # What is the perimeter of a single pixel, 0.0, 4.0???
                    continue
                idx = (seg_holes_start_idxs[0][k], seg_holes_start_idxs[1][k])
                holes_label = k + 1
                holes_result = track_object_boundary(labs, start_index=idx, label_id=holes_label)
                perimeter_info[i]['Holes'] = holes_result['Vertices']
                perimeter_info[i]['Perimeter_Length'] += perimeter_info['Perimeter_Length']

    return perimeter_info
Ejemplo n.º 36
0
def triangle_threshold(array, Binsize=None, Max=None, Min=None, Nbins=None, Apply=True, Invert=False):
    """
    Calculates a threshold and optionally creates a binary mask from an array 
    using the Triangle threshold method.

    The threshold is calculated as the point of maximum perpendicular distance
    of a line between the histogram peak and the farthest non-zero histogram edge
    to the histogram.

    :param image:
        A numpy array.

    :param Apply:
        Default is False. If True then a mask of the same dimensions as array will be returned.
        Otherwise only the threshold will be returned.

    :param Binsize:
        (Optional) The binsize (Default is 1) to be used for creating the histogram.

    :param Max:
        (Optional) The maximum value to be used in creating the histogram. If not specified the array will be searched for max.

    :param Min:
        (Optional) The minimum value to be used in creating the histogram. If not specified the array will be searched for min.

    :param Nbins:
        (Optional) The number of bins to be used for creating the histogram. 
        If set binsize is calculated as (max - min) / (nbins - 1), and the max value will be adjusted to (nbins*binsize + min).

    :param Apply:
        If True (Default), then the threshold will be applied and an array of type bool will be returned.
        Otherwise just the threshold will be returned.

    :param Invert:
        If True (Default is False), then the returned mask will be inverted. Only valid if Apply=True.
        The inverted mask is applied as (array < threshold) & (array >= min).
        The non-inverted mask is applied as (array >= threshold) & (array <= max)

    :author:
        Josh Sixsmith, [email protected]

    :history:
        * 12/07/2014--Translated from IDL

    :sources:
        G.W. Zack, W.E. Rogers, and S.A. Latt. Automatic measurement of sister
            chromatid exchange frequency. Journal of Histochemistry & Cytochemistry,
            25(7):741, 1977. 1, 2.1
    """

    if array == None:
        raise Exception("No input array!")

    dims = array.shape

    arr = array.flatten()
    h = histogram(arr, locations='loc', omax='omax', omin='omin', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins)

    hist = h['histogram']
    omin = h['omin']
    omax = h['omax']
    loc  = h['loc']
    binsz = numpy.abs(loc[1] - loc[0])

    # Calculate the threshold
    threshold = calculate_triangle_threshold(histogram=hist)
    thresh_convert = thresh * binsz + omin

    if Apply:
        if Invert:
            mask = (arr < thresh_convert) & (arr >= omin)
        else:
            mask = (arr >= thresh_convert) & (arr <= omax)
        return mask

    return threshold
Ejemplo n.º 37
0
        burn = i + 1
        gdal.RasterizeLayer(outds, [1], layer, burn_values=[burn])
        layer.SetAttributeFilter(None)

    # Retrieve the rasterised vector and delete the GDAL MEM dataset
    vec2rast = outds.ReadAsArray().flatten()
    outds = None
    vec_ds = None
    layer = None

    # TODO
    # Need to sort out feature names with features that get rasterised
    # Hmmm, it might be ok

    # Calculate the histogram and the reverse indices of the rasterised vector
    h = histogram(vec2rast, Min=1, reverse_indices='ri', omax='omax')
    hist = h['histogram']
    ri = h['ri']
    omax = h['omax']

    # Get the indices for each bin
    idxs = []
    for i in range(hist.shape[0]):
        if hist[i] == 0:
            idxs.append(None)  # An empty item
            continue
        idx = ri[ri[i]:ri[i + 1]]
        idxs.append(idx)

    logging.info("Creating output summary file")