def test_input2(self): """ Test that the input keyword works and that the input is correctly added to the histogram. The expected length of the histogram is the same size as the input array. """ # We are using the default binsize, for values in range [0,255] a = self.array2 b = self.array6 h = histogram(a, input=b)['histogram'] hcontrol = histogram(a)['histogram'] + b self.assertEqual((h - hcontrol).sum(), 0)
def test_binsize(self): """ Test that the binsize keyword works. """ h = histogram(self.array3, binsize=0.5) # should be 20 bins to contain the values 10 -> 19.5 self.assertEqual(h['histogram'].shape[0], 20)
def idl_test(): """ A small test that investigates the speed of finding specific objects/segments within an image. The identifed objects are then set/flagged in an array of the same dimensions as the original image. This just illustrates the finding mechanism, but in reality statistics can be generated per segment/object. This is the method for solving such a problem using the histogram module. To Note: This is just one simple example of using the histogram to solve such an abstract problem. It can be used for so much more, such as chunk indexing and incrementing vectors. See http://www.idlcoyote.com/tips/histogram_tutorial.html for more info. """ print 'The IDL method!' st = datetime.datetime.now() img = numpy.random.randint(0,30001, (8000,8000)) b = numpy.random.randint(0,30001, (3000)) c = numpy.unique(b) img2 = numpy.zeros((8000,8000), dtype='uint8').flatten() # Set min=0 then we can simply index directly using the array of values we wish to find h = histogram(img.flatten(), Min=0, Max=numpy.max(c), reverse_indices='ri') hist = h['histogram'] ri = h['ri'] for i in numpy.arange(c.shape[0]): if hist[c[i]] == 0: continue img2[ri[ri[c[i]]:ri[c[i]+1]]] = 1 et = datetime.datetime.now() print et - st
def test_nbins(self): """ Test that the nbins keyword works. """ h = histogram(self.array4, nbins=256) # There should be 256 bins self.assertEqual(h['histogram'].shape[0], 256)
def test_omin(self): """ Test that the omin keyword works. """ # The output should be the same. Using an array 0->255 h = histogram(self.array2, omin='omin') self.assertEqual(h['omin'], 0)
def summary_cleanup(array, min_value=1, max_value=4, min_population=10, all_neighbors=True): """ A function for removing pixel 'islands' from the water summary output. Using the default parameters, pixel groups with less than 10 members within the data range of (1 <= x <= 4) are removed from the original array. :param array: A 2-Dimensional numpy array. :param min_value: Default value of 1. The minimum pixel value to be included within the analysis. :param max_value: Default value of 4. The maximum pixel value to be included within the analysis. :param min_population: Default value of 10. The minimum population size a group of pixels must be in order to be retained. :param all_neighbors: Default is True. If True, the 8 surrounding neighbors of the centre pixel will be used for connectivity. If False, then only the 4 immediate neighbors of the centre pixel will be used for connectivity. :return: A copy of array with pixels satisfying the min_value/max_value/min_pop_count parameters removed. :author: Josh Sixsmith; [email protected] :history: * 2013/09/11: Created """ dims = array.shape if (len(dims) != 2): print 'Array is not 2-Dimensional!!!' return None flat_array = array.flatten() low_obs = (array >= min_value) & (array <= max_value) if all_neighbors: kernel = [[1,1,1],[1,1,1],[1,1,1]] else: kernel = [[0,1,0],[1,1,1],[0,1,0]] label_array, num_labels = ndimage.label(low_obs, structure=kernel) h = histogram(label_array.flatten(), Min=1, reverse_indices='ri') hist = h['histogram'] ri = h['ri'] wh = numpy.where(hist < min_population) for i in wh[0]: flat_array[ri[ri[i]:ri[i+1]]] = 0 cleaned_array = flat_array.reshape(dims) return cleaned_array
def test_hist_min(self): """ Test that the min keyword works. """ # Using an array 0->255, check that 0 gets omitted h = histogram(self.array2, min=1) self.assertEqual(h['histogram'].shape[0], 255) self.assertEqual((h['histogram'] == 1).sum(), 255)
def test_default_binsize(self): """ Test that the default binsize is 1 and works accordingly. """ # Using an array of values in range 0->1 h = histogram(self.array4) self.assertEqual(h['histogram'].shape[0], 1) # All values should be in the first bin. self.assertEqual(self.array4.shape[0], h['histogram'][0])
def obj_area(array): """ Calculates area per object. Area is referred to as number of pixels. """ h = histogram(array.flatten(i), Min=1) hist = h['histogram'] return hist
def test_hist(self): """ Test that the histogram works. Default binsize is 1, so there should be 256 bins. """ h = histogram(self.array2) # Should be 256 elements, and the value 1 contained within each. self.assertEqual(h['histogram'].shape[0], 256) self.assertEqual((h['histogram'] == 1).sum(), 256)
def test_omax(self): """ Test that the omin keyword works. """ # Using an array 0->255 # The returned value should be the same as the derived max, unless # the nbins keyword is set, in which case the max gets rescaled by # nbins*binsize+min in order to maintain equal bin widths. h = histogram(self.array2, omax='omax') self.assertEqual(h['omax'], 255)
def test_nan(self): """ Test that the NaN keyword works. """ a = self.array2.astype('float64') a[0] = numpy.NaN h = histogram(a, NaN=True) # The histogram will fail if array contains NaN's and NaN isn't set. # One element is excluded (the NaN), so test the length. self.assertEqual(h['histogram'].shape[0], 255)
def test_input1(self): """ Test that the input keyword works and has the same length as the number of expected bins. """ # Output should be of the same length as the number of bins. # We are using the default binsize, for values in range [0,255] a = self.array2 b = self.array6 h = histogram(a, input=b) self.assertEqual(h['histogram'].shape[0], 256)
def test_input3(self): """ Test that the input keyword works and that the expected length of the histogram should take the length of the input array. """ # We are using the default binsize, for values in range [0,10) # Without using the input keyword, the histogram size should be 10. # However, the size of the input array is 256, so the output histogram # should be 256. a = self.array1 b = self.array6 h = histogram(a, input=b) self.assertEqual(h['histogram'].shape[0], b.shape[0])
def test_input4(self): """ Test that the input keyword works and that the input is correctly added to the histogram. The length of the histogram without setting the input keyword, is 10. However, an input array of length 256 will be used, thereby the length of the histogram will be 256. """ # We are using the default binsize, for values in range [0,10) a = self.array1 b = self.array6 h = histogram(a, input=b)['histogram'] # Elements h[0:9] should equal 1, and elements h[10:-1] should equal 0 diff = h - b self.assertEqual(diff.sum(), 10)
def obj_mean(array, base_array): """ Calculates mean value per object. """ arr_flat = base_array.flatten() h = histogram(array.flatten(), Min=1, reverse_indices='ri') hist = h['histogram'] ri = h['ri'] mean_obj = [] for i in numpy.arange(hist.shape[0]): if (hist[i] == 0): continue xbar = numpy.mean(arr_flat[ri[ri[i]:ri[i+1]]]) mean_obj.append(xbar) return mean_obj
def test_reverse_indices1(self): """ Test that the reverse indices keyword works. """ # Make a copy then shuffle the array. Elements are in a random order. a = self.array2.copy() numpy.random.shuffle(a) h = histogram(a, reverse_indices='ri') # Let's see if we can access the correct element. As we are dealing with # int's (and the binsize is one), pick a random element and the value # of the element represents the bin. # If reverse indices works, then the reeturned value should equal data. element = numpy.random.randint(0, 256, (1))[0] bin = a[element] ri = h['ri'] data = a[ri[ri[bin]:ri[bin + 1]]] self.assertEqual(bin, data)
def test_reverse_indices2(self): """ Test whether mulitple values in a single bin are correctly returned by the reverse indices. """ # Make a copy then shuffle the array. Elements are in a random order. a = self.array2.copy() numpy.random.shuffle(a) h = histogram(a, reverse_indices='ri', binsize=5) # Using an array in the range 0->255, find data >=100<105 # This should be bin 21 (20th if start from the 0th bin) ri = h['ri'] # We know that each ri has adjacent groups (no empty bin), so no need # to check that ri[21] > ri[21] data = a[ri[ri[20]:ri[21]]] # The order should be the same as well. If not then numpy has changed. control = a[(a >= 100) & (a < 105)] self.assertEqual((control - data).sum(), 0)
def obj_centroid(array): """ Calculates centroids per object. """ dims = array.shape h = histogram(array.flatten(), Min=1, reverse_indices='ri') hist = h['histogram'] ri = h['ri'] cent = [] for i in numpy.arange(hist.shape[0]): if (hist[i] == 0): continue idx = numpy.array(array_indices(dims, ri[ri[i]:ri[i+1]], dimensions=True)) cent_i = numpy.mean(idx, axis=1) cent.append(cent_i) return cent
def test_reverse_indices3(self): """ Test that the reverse indices keyword works across multiple bins and values. """ # A random floating array in range 0-20 a = (self.array4) * 20 # Specifying min=0 should give bin start points 0, 2.5, 5, 7.5 etc h = histogram(a, reverse_indices='ri', min=0, binsize=2.5) # Find values >= 7.5 < 17.5 control = numpy.sort(a[(a >= 7.5) & (a < 17.5)]) ri = h['ri'] # If the locations keyword was set then the starting locations of each # bin would be: # [ 0. , 2.5, 5. , 7.5, 10. , 12.5, 15. , 17.5] # So we want bins 3, 4, 5, 6. (Bins start at 0) # Sort the arrays; so we can do an element by element difference data = numpy.sort(a[ri[ri[3]:ri[7]]]) self.assertEqual((control - data).sum(), 0)
def obj_rectangularity(array): """ Calculates rectangularity per object. """ dims = array.shape h = histogram(array.flatten(), Min=1, reverse_indices='ri') hist = h['histogram'] ri = h['ri'] rect = [] for i in numpy.arange(hist.shape[0]): if (hist[i] == 0): continue idx = numpy.array(array_indices(dims, ri[ri[i]:ri[i+1]], dimensions=True)) min_yx = numpy.min(idx, axis=1) max_yx = numpy.max(idx, axis=1) diff = max_yx - min_yx + 1 # Add one to account for zero based index bbox_area = numpy.prod(diff) rect.append(hist[i] / bbox_area) return rect
def binary_recursive_histogram(image, outfile, all_neighbours=False): """ Recursively applies a histogram to an image with multiple bands. Designed for the analysing the binary results by finding counts of 1 within each band. :param image: A string containing the full file path name of a multi-band binary image. :param outfile: The output filename of the textfile that will contain the report. :param all_neighbours: If set then pixel connectivity will be 8 neighbours rather than 4. Default is 4. :author: Josh Sixsmith; [email protected], [email protected] :history: * 07/12/2013: Created * 11/12/2013: Added more stats to the output :copyright: Copyright (c) 2013, Josh Sixsmith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. The views and conclusions contained in the software and documentation are those of the authors and should not be interpreted as representing official policies, either expressed or implied, of the FreeBSD Project. """ # Check that the directory for the output file exists. if not (os.path.exists(os.path.dirname(outfile))): if os.path.dirname(outfile) == '': # Output to the current directory outfname = os.path.join(os.getcwd(), outfile) else: os.makedirs(os.path.dirname(outfile)) #if (type image == str): # ds = gdal.Open(image) # nb = ds.GetRasterCount() ds = gdal.Open(image) nb = ds.RasterCount # Initialise the result array result = numpy.zeros((nb,4), dtype='int') # Only dealing with integers at this point in time. This could change in future. result_list = [] # Loop over the bands for i in range(nb): band = ds.GetRasterBand(i+1) # GDAL uses start index of 1 img = band.ReadAsArray() band.FlushCache() # Generate the histogram h = histogram(img.flatten(), Min=0) hist = h['histogram'] # Segment the binary mask kernel = [[0,1,0],[1,1,1],[0,1,0]] if all_neighbours: kernel = [[1,1,1],[1,1,1],[1,1,1]] label_arr, nlabels = ndimage.label(img, structure=kernel) # Get min and max areas of the segmented regions h2 = histogram(label_arr.flatten(), Min=1) hist2 = h2['histogram'] mn_area = numpy.min(hist2) mx_area = numpy.max(hist2) avg_area = numpy.mean(hist2) # Populate the result array if data is found. If hist.shape[0] == 1 then only the value of zero was found. if (hist.shape[0] >= 2): result[i,0] = hist[1] result[i,1] = nlabels result[i,2] = mn_area result[i,3] = mx_area result_list.append('%i, %i, %i, %i, %i, %f\n' %(i+1, hist[1], nlabels, mn_area, mx_area, avg_area)) else: result_list.append('%i, %i, %i, %i, %i, %f\n' %(i+1, 0, 0, 0, 0, 0.0)) mx_loc = numpy.argmax(result[:,0]) + 1 # Refer back to a 1 based band index mx_seg_loc = numpy.argmax(result[:,1]) + 1 # Refer back to a 1 based band index mn_area_loc = numpy.argmin(result[:,2]) + 1 # Refer back to a 1 based band index mx_area_loc = numpy.argmax(result[:,3]) + 1 # Refer back to a 1 based band index outfile = open(outfname, 'w') outfile.write('Results from %s image file\n' %image) outfile.write('Band with most flagged pixels: %i\n' %mx_loc) outfile.write('Band with most objects: %i\n' %mx_seg_loc) outfile.write('\n') outfile.write('Band, Flagged Pixels, Number of Objects, Smallest Object, Largest Object, Average Object Size\n') for res in result_list: outfile.write(res) outfile.close()
def otsu_threshold(image, Binsize=None, Max=None, Min=None, Nbins=None, Fast=True, Apply=False): """ Calculates the Otsu threshold. Seperates the input array into background and foreground components by finding the maximum between class variance. :param image: A numpy array of maximum three dimensions. :param Fast: Default is True. Will find the optimal threshold using the fast method which approximates the mean value per class. :param Apply: Default is False. If True then a mask/masks of the same dimensions as image will be returned. Otherwise only the threshold/thresholds will be returned. :param Binsize: (Optional) The binsize (Default is 1) to be used for creating the histogram. :param Max: (Optional) The maximum value to be used in creating the histogram. If not specified the array will be searched for max. :param Min: (Optional) The minimum value to be used in creating the histogram. If not specified the array will be searched for min. :param Nbins: (Optional) The number of bins to be used for creating the histogram. If set binsize is calculated as (max - min) / (nbins - 1), and the max value will be adjusted to (nbins*binsize + min). :author: Josh Sixsmith, [email protected] :history: * 06/02/2013--Created * 04/06/2013--Keywords Binsize, Nbins, Min, Max, Fast and Apply added. :sources: http://www.labbookpages.co.uk/software/imgProc/otsuThreshold.html http://www.codeproject.com/Articles/38319/Famous-Otsu-Thresholding-in-C http://en.wikipedia.org/wiki/Cumulative_frequency_analysis http://en.wikipedia.org/wiki/Otsu's_method """ if image == None: print 'No input image!!!' return None dims = image.shape if (len(dims) > 3): print 'Incorrect shape!; More than 3 dimensions is not a standard image.' return None if Fast: if (len(dims) == 3): # For multi-band images, return a list of thresholds thresholds = [] bands = dims[0] for b in range(bands): img = image[b].flatten() h = histogram(img, locations='loc', omin='omin', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] omin = h['omin'] loc = h['loc'] binsz = numpy.abs(loc[1] - loc[0]) cumu_hist = numpy.cumsum(hist, dtype=float) rcumu_hist = numpy.cumsum(hist[::-1], dtype=float) # reverse total = cumu_hist[-1] # probabilities per threshold class bground_weights = cumu_hist / total fground_weights = 1 - bground_weights # reverse probability mean_bground = numpy.zeros(hist.shape[0]) mean_fground = numpy.zeros(hist.shape[0]) mean_bground[0:-1] = (numpy.cumsum(hist * loc) / cumu_hist)[0:-1] mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) / rcumu_hist)[::-1])[1:] sigma_between = bground_weights * fground_weights *(mean_bground - mean_fground)**2 thresh = numpy.argmax(sigma_between) thresh = (thresh * binsz) + omin thresholds.append(thresh) if Apply: masks = numpy.zeros(dims, dtype='bool') for b in range(bands): masks[b] = image[b] > thresholds[b] return masks else: return thresholds elif (len(dims) == 2): img = image.flatten() h = histogram(img, locations='loc', omin='omin', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] omin = h['omin'] loc = h['loc'] binsz = numpy.abs(loc[1] - loc[0]) cumu_hist = numpy.cumsum(hist, dtype=float) rcumu_hist = numpy.cumsum(hist[::-1], dtype=float) # reverse total = cumu_hist[-1] # probabilities per threshold class bground_weights = cumu_hist / total fground_weights = 1 - bground_weights # reverse probability mean_bground = numpy.zeros(hist.shape[0]) mean_fground = numpy.zeros(hist.shape[0]) mean_bground[0:-1] = (numpy.cumsum(hist * loc) / cumu_hist)[0:-1] mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) / rcumu_hist)[::-1])[1:] sigma_between = bground_weights * fground_weights *(mean_bground - mean_fground)**2 thresh = numpy.argmax(sigma_between) thresh = (thresh * binsz) + omin threshold = thresh if Apply: mask = image > threshold return mask else: return threshold elif (len(dims) == 1): h = histogram(image, locations='loc', omin='omin', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] omin = h['omin'] loc = h['loc'] binsz = numpy.abs(loc[1] - loc[0]) cumu_hist = numpy.cumsum(hist, dtype=float) rcumu_hist = numpy.cumsum(hist[::-1], dtype=float) # reverse total = cumu_hist[-1] # probabilities per threshold class bground_weights = cumu_hist / total fground_weights = 1 - bground_weights # reverse probability # Calculate the mean of background and foreground classes. mean_bground = numpy.zeros(hist.shape[0]) mean_fground = numpy.zeros(hist.shape[0]) mean_bground[0:-1] = (numpy.cumsum(hist * loc) / cumu_hist)[0:-1] mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) / rcumu_hist)[::-1])[1:] sigma_between = bground_weights * fground_weights *(mean_bground - mean_fground)**2 thresh = numpy.argmax(sigma_between) thresh = (thresh * binsz) + omin threshold = thresh if Apply: mask = image > threshold return mask else: return threshold else: if (len(dims) == 3): # For multi-band images, return a list of thresholds thresholds = [] bands = dims[0] for b in range(bands): img = image[b].flatten() h = histogram(img, reverse_indices='ri', omin='omin', locations='loc', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] ri = h['ri'] omin = h['omin'] loc = h['loc'] nbins = hist.shape[0] binsz = numpy.abs(loc[1] - loc[0]) nB = numpy.cumsum(hist, dtype='int64') total = nB[-1] nF = total - nB # should't be a problem to start at zero. best_sigma should (by design) always be positive best_sigma = 0 # set to loc[0], thresholds can be negative optimal_t = loc[0] for i in range(nbins): # get bin zero to the threshold 'i', then 'i' to nbins if ((ri[i+1] > ri[0]) and (ri[nbins] > ri[i+1])): mean_b = numpy.mean(img[ri[ri[0]:ri[i+1]]], dtype='float64') mean_f = numpy.mean(img[ri[ri[i+1]:ri[nbins]]], dtype='float64') sigma_btwn = nB[i]*nF[i]*((mean_b - mean_f)**2) if (sigma_btwn > best_sigma): best_sigma = sigma_btwn optimal_t = loc[i] thresholds.append(optimal_t) if Apply: masks = numpy.zeros(dims, dtype='bool') for b in range(bands): masks[b] = image[b] > thresholds[b] return masks else: return thresholds elif (len(dims) == 2): img = image.flatten() h = histogram(img, reverse_indices='ri', omin='omin', locations='loc', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] ri = h['ri'] omin = h['omin'] loc = h['loc'] nbins = hist.shape[0] binsz = numpy.abs(loc[1] - loc[0]) nB = numpy.cumsum(hist, dtype='int64') total = nB[-1] nF = total - nB # should't be a problem to start at zero. best_sigma should (by design) always be positive best_sigma = 0 # set to loc[0], thresholds can be negative optimal_t = loc[0] for i in range(nbins): # get bin zero to the threshold 'i', then 'i' to nbins if ((ri[i+1] > ri[0]) and (ri[nbins] > ri[i+1])): mean_b = numpy.mean(img[ri[ri[0]:ri[i+1]]], dtype='float64') mean_f = numpy.mean(img[ri[ri[i+1]:ri[nbins]]], dtype='float64') sigma_btwn = nB[i]*nF[i]*((mean_b - mean_f)**2) if (sigma_btwn > best_sigma): best_sigma = sigma_btwn optimal_t = loc[i] threshold = optimal_t if Apply: mask = image > threshold return mask else: return threshold elif (len(dims) == 1): h = histogram(image, reverse_indices='ri', omin='omin', locations='loc', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] ri = h['ri'] omin = h['omin'] loc = h['loc'] nbins = hist.shape[0] binsz = numpy.abs(loc[1] - loc[0]) nB = numpy.cumsum(hist, dtype='int64') total = nB[-1] nF = total - nB # should't be a problem to start at zero. best_sigma should (by design) always be positive best_sigma = 0 # set to loc[0], thresholds can be negative optimal_t = loc[0] for i in range(nbins): # get bin zero to the threshold 'i', then 'i' to nbins if ((ri[i+1] > ri[0]) and (ri[nbins] > ri[i+1])): mean_b = numpy.mean(image[ri[ri[0]:ri[i+1]]], dtype='float64') mean_f = numpy.mean(image[ri[ri[i+1]:ri[nbins]]], dtype='float64') sigma_btwn = nB[i]*nF[i]*((mean_b - mean_f)**2) if (sigma_btwn > best_sigma): best_sigma = sigma_btwn optimal_t = loc[i] threshold = optimal_t if Apply: mask = image > threshold return mask else: return threshold
def hist_equal(array, BinSIZE=None, MaxV=None, MinV=None, Omax=None, Omin=None, Percent=None, Top=None, Histogram_Only=False): """ Image contrast enhancement. Replicates the hist_equal function available within IDL (Interactive Data Language, EXELISvis). Converts an array to a histogram equalised byte array. :param array: A numpy array of any type. :param BinSIZE: The binsize to be used in constructing the histogram. The default is 1 for arrays with a datatype of byte (uint8). Arrays of other datatypes the binsize is computed as (MaxV - MinV) / 5000. (floating point). :param MaxV: The maximum data value to be considered in the contrast stretch. The default is 255 for arrays with a datatype of byte (uint8). Otherwise the maximum data value of array is used. :param MinV: The minimum data value to be considered in the contrast stretch. The default is 0 for arrays with a datatype of byte (uint8). Otherwise the minimum data value of array is used. :param Omax: (Optional) A string name used to refer to the dictionary key that will contain the maximum value used in generating the histogram. :param Omin: (Optional) A string name used to refer to the dictionary key that will contain the minimum value used in generating the histogram. :param Percent: A scalar between the values 0 and 100 that will be used to stretch the array histogram. :param Top: The maximum value of the scaled result. Default is 255. The mimimum value of the scaled result is always 0. :param Histogram_Only: Type Bool. Default is false. If set to True, then a numpy array of type int32 will be returned that contains the cumulative sum of the histogram. :return: Varies. If Histogram_Only is set to True, then the cumulative sum of the histogram will be returned. Additional optional returns Omax and and Omin. Otherwise a byte scaled version of array is returned. Additional optional returns Omax and and Omin. Example: >>> # 100x100 array of samples from N(3, 6.25) >>> a = 2.5 * numpy.random.randn(100,100) + 3 >>> scl_a = hist_equal(a) >>> scl_pct_a = hist_equal(a, Percent=2) :author: Josh Sixsmith; [email protected]; [email protected] :history: * 2013/10/24: Created :copyright: Copyright (c) 2013, Josh Sixsmith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. The views and conclusions contained in the software and documentation are those of the authors and should not be interpreted as representing official policies, either expressed or implied, of the FreeBSD Project. """ def linear_percent(cumulative_histogram, percent, min_, Binsize): """ Image contrast enhancement. Given a cumulative histogram, upper and lower DN values are computed and returned. :param cumulative_histogram: A 1D numpy array. Must the cumulative sum of a histogram. :param perecent: A value in the range of 0-100. :param min_: The minumum value to be used in the determining the stretch. :param Binsize: The binsize used in constructing the histogram of which the cumulative histogram was then derived. :return: Two scalars, MaxDN and MinDN, corresponding to the maximum and minimum values of the original array to be used in the contrast stretch. :author: Josh Sixsmith; [email protected]; [email protected] :history: * 2013/10/24: Created """ ch = cumulative_histogram if len(ch.shape) != 1: raise Exception('Only 1D arrays are supported.') # Calculate upper and lower values low = (percent/100.) high = (1 - (percent/100.)) # number of elements n = ch[-1] x1 = numpy.searchsorted(ch, n * low) while ch[x1] == ch[x1 + 1]: x1 = x1 + 1 x2 = numpy.searchsorted(ch, n * high) while ch[x2] == ch[x2 - 1]: x2 = x2 - 1 minDN = x1 * Binsize + min_ maxDN = x2 * Binsize + min_ return maxDN, minDN if (array.dtype == 'uint8'): MaxV = 255 MinV = 0 if (MaxV == None): MaxV = numpy.amax(array) if (MinV == None): MinV = numpy.amin(array) if (Top == None): Top = 255 if (BinSIZE == None): if (array.dtype == 'uint8'): BinSIZE = 1 else: BinSIZE = (MaxV - MinV) / 5000. # Retrieve the dimensions of the array dims = array.shape h = histogram(array.flatten(), binsize=BinSIZE, max=MaxV, min=MinV, omax='omax', omin='omin') # Need to check for omin and omax so they can be returned return_extra = False if ((type(Omin) == str) | (type(Omax) == str)): return_extra = True d = {} if (type(Omin) == str): d[Omin] = h['omin'] if (type(Omax) == str): d[Omax] = h['omax'] # Zeroing the first element of the histogram hist = h['histogram'] hist[0] = 0 cumu_hist = numpy.cumsum(hist, dtype='float') if (Histogram_Only): cumu_hist = cumu_hist.astype('int32') # Need to check for omin and omax so they can be returned if return_extra: return cumu_hist, d else: return cumu_hist # Evaluate a linear percent stretch if (Percent != None): if (Percent <= 0) or (Percent >= 100): raise Exception('Percent must be between 0 and 100') maxDN, MinDN = linear_percent(cumu_hist, percent=Percent, min_=MinV, Binsize=BinSIZE) scl = bytscl(array, Max=maxDN, Min=MinDN, Top=Top) if return_extra: return scl, d else: return scl scl_lookup = bytscl(cumu_hist, Top=Top) # apply the scl_lookup in order to retrieve the new scaled value if (type(array) == 'uint8'): # We know the binsize for byte data, i.e. 1 # Clip the lower bounds arr = array.clip(min=MinV) scl = (scl_lookup[arr.flatten() - MinV]).reshape(dims) else: # We need to divide by the binsize in order to the bin position # Clip the lower bounds arr = array.clip(min=MinV) arr = numpy.floor((arr - MinV) / BinSIZE).astype('int') scl = (scl_lookup[arr.flatten()]).reshape(dims) if return_extra: return scl, d else: return scl
# Create the mask via the thresholds mask = (array >= lower) & (array <= upper) # The label function segments the image into contiguous blobs label_array, num_labels = ndimage.label(mask, structure=s) # Find the labels associated with the ROI labels = label_array[ROIPixels] mx_lab = numpy.max(labels) # Find unique labels, excluding zero (background) ulabels = (numpy.unique(labels[labels > 0]) ).tolist() # Convert to list; Makes for neater indexing # Generate a histogram to find the label locations h = histogram(label_array.flatten(), min=0, max=mx_lab, reverse_indices='ri') hist = h['histogram'] ri = h['ri'] for lab in ulabels: if hist[lab] == 0: continue idx.extend(ri[ri[lab]:ri[lab + 1]]) idx = numpy.array(idx) idx = array_indices(dims, idx, dimensions=True) return idx
def plotHistogram(fname, out_dir): """ """ ds = SceneDataset(fname) # Retrieve and create the output base directory base_dir = os.path.basename(fname) out_dir = os.path.join(out_dir, base_dir) if not os.path.exists(out_dir): os.makedirs(out_dir) bands = [] for i in ds._bands['REFLECTIVE']: bands.append(i) for j in ds._bands['THERMAL']: bands.append(j) for k in ds._bands['ATMOSPHERE']: bands.append(k) pdf_name = os.path.join(out_dir,'histogram_plots.pdf') plot_file = PdfPages(pdf_name) mx_DN_list = [] hist_list = [] lab_list = [] for band in bands: b = ds.GetRasterBand(band) img = b.ReadAsArray() h = histogram(img.ravel(), max=65535) hist = h['histogram'] hist[0] = 0 # Ignore the no-data value if band != 9: hist_list.append(hist.copy()) wh = numpy.where(hist != 0) mx = numpy.max(wh) mx_DN_list.append('Band: %i, Max DN: %i, Count: %i\n' %(band, mx, hist[mx])) # for plotting lab = 'Band %i'%band lab_list.append(lab) plt.plot(hist, label=lab) plt.legend() plt.suptitle(base_dir) plot_file.savefig() plt.close() # Now to output a single plot containing all (except band 9) histograms for i in range(len(hist_list)): plt.plot(hist_list[i], label=lab_list[i]) plt.legend() plt.suptitle(base_dir) plot_file.savefig() plot_file.close() plt.close() out_file = open(os.path.join(out_dir, 'histogram_results.txt'), 'w') for line in mx_DN_list: out_file.write(line) out_file.close()
def binary_recursive_histogram(image, outfile, all_neighbours=False): """ Recursively applies a histogram to an image with multiple bands. Designed for the analysing the binary results by finding counts of 1 within each band. :param image: A string containing the full file path name of a multi-band binary image. :param outfile: The output filename of the textfile that will contain the report. :param all_neighbours: If set then pixel connectivity will be 8 neighbours rather than 4. Default is 4. :author: Josh Sixsmith; [email protected], [email protected] :history: * 07/12/2013: Created * 11/12/2013: Added more stats to the output :copyright: Copyright (c) 2013, Josh Sixsmith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. The views and conclusions contained in the software and documentation are those of the authors and should not be interpreted as representing official policies, either expressed or implied, of the FreeBSD Project. """ # Check that the directory for the output file exists. if not (os.path.exists(os.path.dirname(outfile))): if os.path.dirname(outfile) == '': # Output to the current directory outfname = os.path.join(os.getcwd(), outfile) else: os.makedirs(os.path.dirname(outfile)) #if (type image == str): # ds = gdal.Open(image) # nb = ds.GetRasterCount() ds = gdal.Open(image) nb = ds.RasterCount # Initialise the result array result = numpy.zeros( (nb, 4), dtype='int' ) # Only dealing with integers at this point in time. This could change in future. result_list = [] # Loop over the bands for i in range(nb): band = ds.GetRasterBand(i + 1) # GDAL uses start index of 1 img = band.ReadAsArray() band.FlushCache() # Generate the histogram h = histogram(img.flatten(), Min=0) hist = h['histogram'] # Segment the binary mask kernel = [[0, 1, 0], [1, 1, 1], [0, 1, 0]] if all_neighbours: kernel = [[1, 1, 1], [1, 1, 1], [1, 1, 1]] label_arr, nlabels = ndimage.label(img, structure=kernel) # Get min and max areas of the segmented regions h2 = histogram(label_arr.flatten(), Min=1) hist2 = h2['histogram'] mn_area = numpy.min(hist2) mx_area = numpy.max(hist2) avg_area = numpy.mean(hist2) # Populate the result array if data is found. If hist.shape[0] == 1 then only the value of zero was found. if (hist.shape[0] >= 2): result[i, 0] = hist[1] result[i, 1] = nlabels result[i, 2] = mn_area result[i, 3] = mx_area result_list.append( '%i, %i, %i, %i, %i, %f\n' % (i + 1, hist[1], nlabels, mn_area, mx_area, avg_area)) else: result_list.append('%i, %i, %i, %i, %i, %f\n' % (i + 1, 0, 0, 0, 0, 0.0)) mx_loc = numpy.argmax(result[:, 0]) + 1 # Refer back to a 1 based band index mx_seg_loc = numpy.argmax( result[:, 1]) + 1 # Refer back to a 1 based band index mn_area_loc = numpy.argmin( result[:, 2]) + 1 # Refer back to a 1 based band index mx_area_loc = numpy.argmax( result[:, 3]) + 1 # Refer back to a 1 based band index outfile = open(outfname, 'w') outfile.write('Results from %s image file\n' % image) outfile.write('Band with most flagged pixels: %i\n' % mx_loc) outfile.write('Band with most objects: %i\n' % mx_seg_loc) outfile.write('\n') outfile.write( 'Band, Flagged Pixels, Number of Objects, Smallest Object, Largest Object, Average Object Size\n' ) for res in result_list: outfile.write(res) outfile.close()
def otsu_threshold(image, Binsize=None, Max=None, Min=None, Nbins=None, Fast=True, Apply=False): """ Calculates the Otsu threshold. Seperates the input array into background and foreground components by finding the maximum between class variance. :param image: A numpy array of maximum three dimensions. :param Fast: Default is True. Will find the optimal threshold using the fast method which approximates the mean value per class. :param Apply: Default is False. If True then a mask/masks of the same dimensions as image will be returned. Otherwise only the threshold/thresholds will be returned. :param Binsize: (Optional) The binsize (Default is 1) to be used for creating the histogram. :param Max: (Optional) The maximum value to be used in creating the histogram. If not specified the array will be searched for max. :param Min: (Optional) The minimum value to be used in creating the histogram. If not specified the array will be searched for min. :param Nbins: (Optional) The number of bins to be used for creating the histogram. If set binsize is calculated as (max - min) / (nbins - 1), and the max value will be adjusted to (nbins*binsize + min). :author: Josh Sixsmith, [email protected] :history: * 06/02/2013--Created * 04/06/2013--Keywords Binsize, Nbins, Min, Max, Fast and Apply added. :sources: http://www.labbookpages.co.uk/software/imgProc/otsuThreshold.html http://www.codeproject.com/Articles/38319/Famous-Otsu-Thresholding-in-C http://en.wikipedia.org/wiki/Cumulative_frequency_analysis http://en.wikipedia.org/wiki/Otsu's_method """ if image == None: print 'No input image!!!' return None dims = image.shape if (len(dims) > 3): print 'Incorrect shape!; More than 3 dimensions is not a standard image.' return None if Fast: if (len(dims) == 3): # For multi-band images, return a list of thresholds thresholds = [] bands = dims[0] for b in range(bands): img = image[b].flatten() h = histogram(img, locations='loc', omin='omin', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] omin = h['omin'] loc = h['loc'] binsz = numpy.abs(loc[1] - loc[0]) cumu_hist = numpy.cumsum(hist, dtype=float) rcumu_hist = numpy.cumsum(hist[::-1], dtype=float) # reverse total = cumu_hist[-1] # probabilities per threshold class bground_weights = cumu_hist / total fground_weights = 1 - bground_weights # reverse probability mean_bground = numpy.zeros(hist.shape[0]) mean_fground = numpy.zeros(hist.shape[0]) mean_bground[0:-1] = (numpy.cumsum(hist * loc) / cumu_hist)[0:-1] mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) / rcumu_hist)[::-1])[1:] sigma_between = bground_weights * fground_weights * ( mean_bground - mean_fground)**2 thresh = numpy.argmax(sigma_between) thresh = (thresh * binsz) + omin thresholds.append(thresh) if Apply: masks = numpy.zeros(dims, dtype='bool') for b in range(bands): masks[b] = image[b] > thresholds[b] return masks else: return thresholds elif (len(dims) == 2): img = image.flatten() h = histogram(img, locations='loc', omin='omin', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] omin = h['omin'] loc = h['loc'] binsz = numpy.abs(loc[1] - loc[0]) cumu_hist = numpy.cumsum(hist, dtype=float) rcumu_hist = numpy.cumsum(hist[::-1], dtype=float) # reverse total = cumu_hist[-1] # probabilities per threshold class bground_weights = cumu_hist / total fground_weights = 1 - bground_weights # reverse probability mean_bground = numpy.zeros(hist.shape[0]) mean_fground = numpy.zeros(hist.shape[0]) mean_bground[0:-1] = (numpy.cumsum(hist * loc) / cumu_hist)[0:-1] mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) / rcumu_hist)[::-1])[1:] sigma_between = bground_weights * fground_weights * ( mean_bground - mean_fground)**2 thresh = numpy.argmax(sigma_between) thresh = (thresh * binsz) + omin threshold = thresh if Apply: mask = image > threshold return mask else: return threshold elif (len(dims) == 1): h = histogram(image, locations='loc', omin='omin', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] omin = h['omin'] loc = h['loc'] binsz = numpy.abs(loc[1] - loc[0]) cumu_hist = numpy.cumsum(hist, dtype=float) rcumu_hist = numpy.cumsum(hist[::-1], dtype=float) # reverse total = cumu_hist[-1] # probabilities per threshold class bground_weights = cumu_hist / total fground_weights = 1 - bground_weights # reverse probability # Calculate the mean of background and foreground classes. mean_bground = numpy.zeros(hist.shape[0]) mean_fground = numpy.zeros(hist.shape[0]) mean_bground[0:-1] = (numpy.cumsum(hist * loc) / cumu_hist)[0:-1] mean_fground[0:-1] = ((numpy.cumsum(hist[::-1] * loc[::-1]) / rcumu_hist)[::-1])[1:] sigma_between = bground_weights * fground_weights * ( mean_bground - mean_fground)**2 thresh = numpy.argmax(sigma_between) thresh = (thresh * binsz) + omin threshold = thresh if Apply: mask = image > threshold return mask else: return threshold else: if (len(dims) == 3): # For multi-band images, return a list of thresholds thresholds = [] bands = dims[0] for b in range(bands): img = image[b].flatten() h = histogram(img, reverse_indices='ri', omin='omin', locations='loc', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] ri = h['ri'] omin = h['omin'] loc = h['loc'] nbins = hist.shape[0] binsz = numpy.abs(loc[1] - loc[0]) nB = numpy.cumsum(hist, dtype='int64') total = nB[-1] nF = total - nB # should't be a problem to start at zero. best_sigma should (by design) always be positive best_sigma = 0 # set to loc[0], thresholds can be negative optimal_t = loc[0] for i in range(nbins): # get bin zero to the threshold 'i', then 'i' to nbins if ((ri[i + 1] > ri[0]) and (ri[nbins] > ri[i + 1])): mean_b = numpy.mean(img[ri[ri[0]:ri[i + 1]]], dtype='float64') mean_f = numpy.mean(img[ri[ri[i + 1]:ri[nbins]]], dtype='float64') sigma_btwn = nB[i] * nF[i] * ((mean_b - mean_f)**2) if (sigma_btwn > best_sigma): best_sigma = sigma_btwn optimal_t = loc[i] thresholds.append(optimal_t) if Apply: masks = numpy.zeros(dims, dtype='bool') for b in range(bands): masks[b] = image[b] > thresholds[b] return masks else: return thresholds elif (len(dims) == 2): img = image.flatten() h = histogram(img, reverse_indices='ri', omin='omin', locations='loc', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] ri = h['ri'] omin = h['omin'] loc = h['loc'] nbins = hist.shape[0] binsz = numpy.abs(loc[1] - loc[0]) nB = numpy.cumsum(hist, dtype='int64') total = nB[-1] nF = total - nB # should't be a problem to start at zero. best_sigma should (by design) always be positive best_sigma = 0 # set to loc[0], thresholds can be negative optimal_t = loc[0] for i in range(nbins): # get bin zero to the threshold 'i', then 'i' to nbins if ((ri[i + 1] > ri[0]) and (ri[nbins] > ri[i + 1])): mean_b = numpy.mean(img[ri[ri[0]:ri[i + 1]]], dtype='float64') mean_f = numpy.mean(img[ri[ri[i + 1]:ri[nbins]]], dtype='float64') sigma_btwn = nB[i] * nF[i] * ((mean_b - mean_f)**2) if (sigma_btwn > best_sigma): best_sigma = sigma_btwn optimal_t = loc[i] threshold = optimal_t if Apply: mask = image > threshold return mask else: return threshold elif (len(dims) == 1): h = histogram(image, reverse_indices='ri', omin='omin', locations='loc', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] ri = h['ri'] omin = h['omin'] loc = h['loc'] nbins = hist.shape[0] binsz = numpy.abs(loc[1] - loc[0]) nB = numpy.cumsum(hist, dtype='int64') total = nB[-1] nF = total - nB # should't be a problem to start at zero. best_sigma should (by design) always be positive best_sigma = 0 # set to loc[0], thresholds can be negative optimal_t = loc[0] for i in range(nbins): # get bin zero to the threshold 'i', then 'i' to nbins if ((ri[i + 1] > ri[0]) and (ri[nbins] > ri[i + 1])): mean_b = numpy.mean(image[ri[ri[0]:ri[i + 1]]], dtype='float64') mean_f = numpy.mean(image[ri[ri[i + 1]:ri[nbins]]], dtype='float64') sigma_btwn = nB[i] * nF[i] * ((mean_b - mean_f)**2) if (sigma_btwn > best_sigma): best_sigma = sigma_btwn optimal_t = loc[i] threshold = optimal_t if Apply: mask = image > threshold return mask else: return threshold
def main(indir, outdir, logpath, pattern, vector_file, outfname): """ The main processing routine. :param indir: A string containing the file system pathname to a directory containing the water extent image files. :param outdir: A string containing the file system pathname to a directory that will contain the result output. :param logpath: A string containing the file system pathname to a directory that will contain the operation system logging information. :param pattern: A string containing the image extents file extension pattern, eg '*.tif'. :param vector_file: A string containing the file system pathname to an OGR compatible vector file. :param outfname): A string containing the ststem file pathname for the output csv file. :return: Nothing, main() acts as a procedure. """ # setup logging file ... log to <outputPath>/../logs/createWaterExtent_<hostname>_pid.log logPath = os.path.join(logpath,"waterExtentVectorSummary_%s_%d.log" % (os.uname()[1], os.getpid())) logging.basicConfig(filename=logPath,format='%(asctime)s %(levelname)s: %(message)s', datefmt='%d/%m/%Y %H:%M:%S', level=logging.INFO) baseOutputDir = Directory(outdir) if not baseOutputDir.exists(): logging.error("%s does not exist" % baseOutputDir.getPath()) sys.exit(1) # Get a list of water_extent files files = getFiles(indir, pattern) # Get the water_extent objects and sort them by date sortedWaterExtents, cellId = getWaterExtents(files) # lat and lon will be helpful lon = cellId[0] lat = cellId[1] # we output to a lon_lat subdirectory in the base output directory # create it outputPath = "%s/%03d_%04d" % (baseOutputDir.getPath(), lon, lat) outputDir = Directory(outputPath) outputDir.makedirs() logging.info("output directory is %s" %outputDir.getPath()) # Rasterise the features # We can use the first image file as the base segments_ds = Rasterise(RasterFilename=files[0], VectorFilename=vector_file) logging.info("Rasterising features.") segments_ds.rasterise() # Extract the array veg2rast = segments_ds.segemented_array # Initialise the segment visitor seg_vis = SegmentVisitor(veg2rast) # Get specific attribute records logging.info("Opening vector file %s" %vector_file) vec_ds = ogr.Open(vector_file) layer = vec_ds.GetLayer() # Initialise dicts to hold feature names, and hydro_id feature_names = {} hydro_id = {} # Dicts to hold forward and backward mapping of fid's and seg id's seg2fid = {} fid2seg = {} logging.info("Gathering attribute information for each feature.") # These Field Id's are unique to NGIG's vector datasets for feature in layer: fid = feature.GetFID() feature_names[fid] = feature.GetField("NAME") hydro_id[fid] = feature.GetField("AUSHYDRO_I") seg2fid[fid+1] = fid fid2seg[fid] = fid + 1 # Go back to the start of the vector file layer.ResetReading() # Replace any occurences of None with UNKNOWN for key in feature_names.keys(): if feature_names[key] == None: feature_names[key] = 'UNKNOWN' # TODO Define dict lookup for potential segments up to max segment # Initialise the output file full_fname = os.path.join(outputDir.getPath(), outfname) logging.info("Creating output summary file %s"%full_fname) outcsv = open(full_fname, 'w') # Define the headings for the output file headings = ("Time Slice, Time Stamp, Feature Name, AUSHYDRO_ID, " "FID, Total Pixel Count, WATER_NOT_PRESENT, " "NO_DATA, MASKED_NO_CONTIGUITY, " "MASKED_SEA_WATER, MASKED_TERRAIN_SHADOW, " "MASKED_HIGH_SLOPE, MASKED_CLOUD_SHADOW, " "MASKED_CLOUD, WATER_PRESENT\n") # Write the headings to disk outcsv.write(textwrap.dedent(headings)) # Loop over each WaterExtent file for waterExtent in sortedWaterExtents: logging.info("Processing %s" % waterExtent.filename) # Read the waterLayer from the extent file waterLayer = waterExtent.getArray() # timestamp timestamp = waterLayer.timestamp str_time = timestamp.strftime('%Y-%m-%d %H:%M:%S.%f') # Loop over each feature Id # Skip any FID's that don't exist in the current spatial extent for key in fid2seg.keys(): if fid2seg[key] > seg_vis.max_segID: continue data = seg_vis.getSegmentData(waterLayer, segmentID=fid2seg[key]) dim = data.shape #pdb.set_trace() # Returns are 1D arrays, so check if we have an empty array if dim[0] == 0: continue # Empty bin, (no data), skipping h = histogram(data, Min=0, Max=128) hist = h['histogram'] total_area = dim[0] """ A WaterTile stores 1 data layer encoded as unsigned BYTE values as described in the WaterConstants.py file. Note - legal (decimal) values are: 0: no water in pixel 1: no data (one or more bands) in source NBAR image 2-127: pixel masked for some reason (refer to MASKED bits) 128: water in pixel Values 129-255 are illegal (i.e. if bit 7 set, all others must be unset) WATER_PRESENT (dec 128) bit 7: 1=water present, 0=no water if all other bits zero MASKED_CLOUD (dec 64) bit 6: 1=pixel masked out due to cloud, 0=unmasked MASKED_CLOUD_SHADOW (dec 32) bit 5: 1=pixel masked out due to cloud shadow, 0=unmasked MASKED_HIGH_SLOPE (dec 16) bit 4: 1=pixel masked out due to high slope, 0=unmasked MASKED_TERRAIN_SHADOW (dec 8) bit 3: 1=pixel masked out due to terrain shadow, 0=unmasked MASKED_SEA_WATER (dec 4) bit 2: 1=pixel masked out due to being over sea, 0=unmasked MASKED_NO_CONTIGUITY (dec 2) bit 1: 1=pixel masked out due to lack of data contiguity, 0=unmasked NO_DATA (dec 1) bit 0: 1=pixel masked out due to NO_DATA in NBAR source, 0=valid data in NBAR WATER_NOT_PRESENT (dec 0) All bits zero indicated valid observation, no water present """ # [0..128] bins were generated, i.e 129 bins WATER_NOT_PRESENT = hist[0] NO_DATA = hist[1] MASKED_NO_CONTIGUITY = hist[2] MASKED_SEA_WATER = hist[4] MASKED_TERRAIN_SHADOW = hist[8] MASKED_HIGH_SLOPE = hist[16] MASKED_CLOUD_SHADOW = hist[32] MASKED_CLOUD = hist[64] WATER_PRESENT = hist[128] # Now to output counts per feature # TODO update to Python's newer version of string insertion #s = "%s, %s, %s, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d\n" %(waterExtent.filename, # str_time, # feature_names[key], # hydro_id[key], # total_area, # WATER_NOT_PRESENT, # NO_DATA, # MASKED_NO_CONTIGUITY, # MASKED_SEA_WATER, # MASKED_TERRAIN_SHADOW, # MASKED_HIGH_SLOPE, # MASKED_CLOUD_SHADOW, # MASKED_CLOUD, # WATER_PRESENT) s = ('{fname}, {timestamp}, {feature_name}, {hydro_id}, {FID}, ' '{area}, {not_water}, {no_data}, {contiguity}, {sea}, ' '{terrain}, {slope}, {cloud_shadow}, {cloud}, {water}\n') format_dict = {'fname': waterExtent.filename, 'timestamp': str_time, 'feature_name': feature_names[key], 'hydro_id': hydro_id[key], 'FID': seg2fid[key], 'area': total_area, 'not_water': WATER_NOT_PRESENT, 'no_data': NO_DATA, 'contiguity': MASKED_NO_CONTIGUITY, 'sea': MASKED_SEA_WATER, 'terrain': MASKED_TERRAIN_SHADOW, 'slope': MASKED_HIGH_SLOPE, 'cloud_shadow': MASKED_CLOUD_SHADOW, 'cloud': MASKED_CLOUD, 'water': WATER_PRESENT} s.format(format_dict) outcsv.write(s) outcsv.close()
def tiled_main(vector_file, cell_list, indir, outdir, pattern, logpath): """ """ # setup logging file ... log to <outputPath>/../logs/createWaterExtent_<hostname>_pid.log log_file = "waterExtentVectorSummary_{}_{}.log".format(os.uname()[1], os.getpid()) logPath = os.path.join(logpath, log_file) logging.basicConfig(filename=logPath, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%d/%m/%Y %H:%M:%S', level=logging.INFO) baseOutputDir = Directory(outdir) if not baseOutputDir.exists(): logging.error("%s does not exist" % baseOutputDir.getPath()) sys.exit(1) logging.info("Opening vector file %s" %vector_file) vec_ds = ogr.Open(vector_file) layer = vec_ds.GetLayer() # Initialise dicts to hold feature names, and hydro_id feature_names = {} hydro_id = {} # Dicts to hold forward and backward mapping of fid's and seg id's seg2fid = {} fid2seg = {} fid_list = [] fid_df = {} logging.info("Gathering attribute information for each feature.") # These Field Id's are unique to NGIG's vector datasets for feature in layer: fid = feature.GetFID() feature_names[fid] = feature.GetField("NAME") hydro_id[fid] = feature.GetField("AUSHYDRO_I") seg2fid[fid+1] = fid fid2seg[fid] = fid + 1 fid_list.append(fid) fid_df[fid] = pandas.DataFrame() # Initialise the dataframe to store the results df = pandas.DataFrame() df['FID'] = fid_list nfeatures = len(fid_list) min_fid = df['FID'].min() max_fid = df['FID'].max() # We offset the min and max fid's by 1 as the rasterisation will be # created that way h = histogram(numpy.zeros((10), dtype='int32'), Max=max_fid+1, Min=min_fid+1) # This will be used as the input keyword and changes will be made in place t_area = h['histogram'] # Create an output file that we can continually append data store = pandas.HDFStore(os.path.join(outdir, 'Test_Results.h5')) for cell in cell_list: logging.info("Processing Cell ID: {}".format(cell)) celldir = os.path.join(indir, cell) # processing here st = datetime.datetime.now() result_df = tiled_processing(vector_file, t_area, min_fid, max_fid, celldir, pattern) et = datetime.datetime.now() print "Tiled process time taken: {}".format(et - st) # We don't need to define cols up front # We can define an empty dataframe and append to it # That way cols can be defined within the script # but how do we combine records of the same fid, & date but different cell??? # do we need to know the cols then? maybe cols should only contain counts??? st = datetime.datetime.now() for key in result_df: #fid_df[key] = fid_df[key].append(result_df, ignore_index=True) # Group names shouldn't start with a number group_name = "FID_{}".format(key) store.append(group_name, result_df[key]) et = datetime.datetime.now() print "Append to h5 time taken: {}".format(et - st) # Combine FIDs with identical timestamps and sum the pixel counts # Including the hydro_id and fid as groupby's should exclude them from # the summation. # The filename and Feature Name fields will be removed as a result of the # summation. Feature Name could potentially be kept group_items = ['Time Stamp', 'AUSHYDRO_ID', 'FID'] st = datetime.datetime.now() #for key in fid_df: for key in store.keys(): #group_name = "FID_{}".format(key) #fid_df[key] = fid_df[key].groupby(group_items).sum() # Combine results and overwrite the #store[group_name] = store[group_name].groupby(group_items).sum() store[key] = store[key].groupby(group_items).sum() et = datetime.datetime.now() print "Group by time taken: {}".format(et - st) # Now to output the excel files #fname1 = os.path.join(outdir, 'Result_Combined.xls') #excel_file1 = pandas.ExcelWriter(fname1) #combined_df = pandas.DataFrame() #fname2 = os.path.join(outdir, 'Result_Multiple_Sheets.xls') #excel_file2 = pandas.ExcelWriter(fname2) #for key in fid_df: ##for key in store.keys(): # group_name = "FID_{}".format(key) # sheet_name = 'FID {fid}'.format(fid=key) # data = store[group_name] # #fid_df[key].to_excel(excel_file2, sheet_name) # data.to_excel(excel_file2, sheet_name) # #combined_df = combined_df.append(fid_df[key]) # combined_df = combined_df.append(data) # #fid_df[key] = None # Attempt to conserve memory #combined_df.to_excel(excel_file1, 'Sheet1') # Save and close the files store.close()
def perimeter(array, labelled=False, all_neighbors=False): """ Calculates the perimeter per object. """ # Construct the kernel to be used for the erosion process if all_neighbors: k = [[1,1,1],[1,1,1],[1,1,1]] else: k = [[0,1,0],[1,1,1],[0,1,0]] if labelled: # Calculate the histogram of the labelled array and retrive the indices h = histogram(array.flatten(), Min=1, reverse_indices='ri') hist = h['histogram'] ri = h['ri'] arr = array > 0 else: # Label the array to assign id's to segments/regions lab, num = ndimage.label(array, k) # Calculate the histogram of the labelled array and retrive the indices h = histogram(lab.flatten(), Min=1, reverse_indices='ri') hist = h['histogram'] ri = h['ri'] arr = array # Erode the image erode = ndimage.binary_erosion(arr, k) # Get the borders of each object/region/segment obj_borders = arr - erode # There is potential here for the kernel to miss object borders containing diagonal features # Force the kernel to include all neighbouring pixels #k = [[1,1,1],[1,1,1],[1,1,1]] #label_arr, n_labels = ndimage.label(obj_borders, k) #TODO # An alternative would be to use the reverse_indices of the original objects. # It shouldn't matter if they point to zero in the convolve array as the second histogram will exclude them. #h = histogram(label_arr.flatten(), min=1, reverse_indices='ri') #hist = h['histogram'] #ri = h['ri'] # Construct the perimeter kernel k2 = [[10,2,10],[2,1,2],[10,2,10]] convolved = ndimage.convolve(obj_borders, k2, mode='constant', cval=0.0) # pixels on array border only use values within the array extents # Initialise the perimeter list perim = [] # Calculate the weights to be used for each edge pixel's contribution sqrt2 = numpy.sqrt(2.) weights = numpy.zeros(50) weights[[5,7,15,17,25,27]] = 1 # case (a) weights[[21,33]] = sqrt2 # case (b) weights[[13,23]] = (1. + sqrt2) / 2. # case (c) for i in numpy.arange(hist.shape[0]): #if hist[i] # Probable don't need this check, as ndimage.label() should provide consecutive labels h_i = histogram(convolved[ri[ri[i]:ri[i+1]]], Min=1, Max=50) hist_i = h_i['histogram'] perim.append(numpy.dot(hist_i, weights)) perim = numpy.array(perim) return perim
def tiled_processing(vector_file, input_hist, Min_id, Max_id, indir, pattern): """ The main processing routine. :param indir: A string containing the file system pathname to a directory containing the water extent image files. :param outdir: A string containing the file system pathname to a directory that will contain the result output. :param logpath: A string containing the file system pathname to a directory that will contain the operation system logging information. :param pattern: A string containing the image extents file extension pattern, eg '*.tif'. :param vector_file: A string containing the file system pathname to an OGR compatible vector file. :param outfname): A string containing the ststem file pathname for the output csv file. :return: Nothing, main() acts as a procedure. """ # Get a list of water_extent files files = getFiles(indir, pattern) # Get the water_extent objects and sort them by date sortedWaterExtents, cellId = getWaterExtents(files) # lat and lon will be helpful lon = cellId[0] lat = cellId[1] # Rasterise the features # We can use the first image file as the base st = datetime.datetime.now() segments_ds = Rasterise(RasterFilename=files[0], VectorFilename=vector_file) logging.info("Rasterising features.") segments_ds.rasterise() et = datetime.datetime.now() print "Rasterisation time taken: {}".format(et - st) # Extract the array veg2rast = segments_ds.segemented_array # Initialise the segment visitor seg_vis = SegmentVisitor(veg2rast) # Update the total area (recursive histogram technique) # input keyword modifies in-place recursive_h = histogram(veg2rast.ravel(), input=input_hist, Min=Min_id, Max=Max_id) # Get specific attribute records logging.info("Opening vector file %s" %vector_file) vec_ds = ogr.Open(vector_file) layer = vec_ds.GetLayer() # Define the headings for the data frame headings = ["Filename", "Time Stamp", "Feature Name", "AUSHYDRO_ID", "FID", "Total Pixel Count", "WATER_NOT_PRESENT", "NO_DATA", "MASKED_NO_CONTIGUITY", "MASKED_SEA_WATER", "MASKED_TERRAIN_SHADOW", "MASKED_HIGH_SLOPE", "MASKED_CLOUD_SHADOW", "MASKED_CLOUD", "WATER_PRESENT"] # Initialise dicts to hold feature names, hydro_id and data frame feature_names = {} hydro_id = {} fid_df = {} # Dicts to hold forward and backward mapping of fid's and seg id's seg2fid = {} fid2seg = {} logging.info("Gathering attribute information for each feature.") # These Field Id's are unique to NGIG's vector datasets for feature in layer: fid = feature.GetFID() feature_names[fid] = feature.GetField("NAME") hydro_id[fid] = feature.GetField("AUSHYDRO_I") seg2fid[fid+1] = fid fid2seg[fid] = fid + 1 fid_df[fid] = pandas.DataFrame(columns=headings) # Go back to the start of the vector file layer.ResetReading() # Replace any occurences of None with UNKNOWN for key in feature_names: if feature_names[key] == None: feature_names[key] = 'UNKNOWN' # TODO Define dict lookup for potential segments up to max segment # Loop over each WaterExtent file for waterExtent in sortedWaterExtents: logging.info("Processing %s" % waterExtent.filename) # Read the waterLayer from the extent file waterLayer = waterExtent.getArray() # timestamp timestamp = waterExtent.getDatetime() #str_time = timestamp.strftime('%Y-%m-%d %H:%M:%S.%f') # Loop over each feature Id # Skip any FID's that don't exist in the current spatial extent for key in fid2seg: if fid2seg[key] > seg_vis.max_segID: continue data = seg_vis.getSegmentData(waterLayer, segmentID=fid2seg[key]) dim = data.shape #pdb.set_trace() # Returns are 1D arrays, so check if we have an empty array if dim[0] == 0: continue # Empty bin, (no data), skipping FID = key h = histogram(data, Min=0, Max=128) hist = h['histogram'] total_area = dim[0] """ A WaterTile stores 1 data layer encoded as unsigned BYTE values as described in the WaterConstants.py file. Note - legal (decimal) values are: 0: no water in pixel 1: no data (one or more bands) in source NBAR image 2-127: pixel masked for some reason (refer to MASKED bits) 128: water in pixel Values 129-255 are illegal (i.e. if bit 7 set, all others must be unset) WATER_PRESENT (dec 128) bit 7: 1=water present, 0=no water if all other bits zero MASKED_CLOUD (dec 64) bit 6: 1=pixel masked out due to cloud, 0=unmasked MASKED_CLOUD_SHADOW (dec 32) bit 5: 1=pixel masked out due to cloud shadow, 0=unmasked MASKED_HIGH_SLOPE (dec 16) bit 4: 1=pixel masked out due to high slope, 0=unmasked MASKED_TERRAIN_SHADOW (dec 8) bit 3: 1=pixel masked out due to terrain shadow, 0=unmasked MASKED_SEA_WATER (dec 4) bit 2: 1=pixel masked out due to being over sea, 0=unmasked MASKED_NO_CONTIGUITY (dec 2) bit 1: 1=pixel masked out due to lack of data contiguity, 0=unmasked NO_DATA (dec 1) bit 0: 1=pixel masked out due to NO_DATA in NBAR source, 0=valid data in NBAR WATER_NOT_PRESENT (dec 0) All bits zero indicated valid observation, no water present """ # [0..128] bins were generated, i.e 129 bins WATER_NOT_PRESENT = hist[0] NO_DATA = hist[1] MASKED_NO_CONTIGUITY = hist[2] MASKED_SEA_WATER = hist[4] MASKED_TERRAIN_SHADOW = hist[8] MASKED_HIGH_SLOPE = hist[16] MASKED_CLOUD_SHADOW = hist[32] MASKED_CLOUD = hist[64] WATER_PRESENT = hist[128] format_dict = {'Filename': waterExtent.filename, 'Time Stamp': timestamp, 'Feature Name': feature_names[key], 'AUSHYDRO_ID': hydro_id[key], 'FID': FID, 'Total Pixel Count': total_area, 'WATER_NOT_PRESENT': WATER_NOT_PRESENT, 'NO_DATA': NO_DATA, 'MASKED_NO_CONTIGUITY': MASKED_NO_CONTIGUITY, 'MASKED_SEA_WATER': MASKED_SEA_WATER, 'MASKED_TERRAIN_SHADOW': MASKED_TERRAIN_SHADOW, 'MASKED_HIGH_SLOPE': MASKED_HIGH_SLOPE, 'MASKED_CLOUD_SHADOW': MASKED_CLOUD_SHADOW, 'MASKED_CLOUD': MASKED_CLOUD, 'WATER_PRESENT': WATER_PRESENT} # Append the new data to the FID data frame fid_df[FID] = fid_df[FID].append(format_dict, ignore_index=True) return fid_df
assert array.ndim == 2, "Dimensions of array must be 2D!\n Supplied array is %i"%array.ndim self.array = array self.array1D = array.ravel() self.dims = array.shape self.histogram = None self.ri = None def _findSegements(self) """ """ h = histogram(self.array1D, min=0, reverse_indices='ri') self.histogram = h['histogram'] self.ri = h['ri'] self.min_segID = numpy.min(self.array > 0) self.max_segID = numpy.max(self.array) def getSegementData(self, array, segmentID=1): """ Retrieve the data from an array corresponding to a segmentID. """ ri = self.ri i = segmentID arr_flat = array.ravel()
def plotHistogram(fname, out_dir): """ """ ds = SceneDataset(fname) # Retrieve and create the output base directory base_dir = os.path.basename(fname) out_dir = os.path.join(out_dir, base_dir) if not os.path.exists(out_dir): os.makedirs(out_dir) bands = [] for i in ds._bands['REFLECTIVE']: bands.append(i) for j in ds._bands['THERMAL']: bands.append(j) for k in ds._bands['ATMOSPHERE']: bands.append(k) pdf_name = os.path.join(out_dir, 'histogram_plots.pdf') plot_file = PdfPages(pdf_name) mx_DN_list = [] hist_list = [] lab_list = [] for band in bands: b = ds.GetRasterBand(band) img = b.ReadAsArray() h = histogram(img.ravel(), max=65535) hist = h['histogram'] hist[0] = 0 # Ignore the no-data value if band != 9: hist_list.append(hist.copy()) wh = numpy.where(hist != 0) mx = numpy.max(wh) mx_DN_list.append('Band: %i, Max DN: %i, Count: %i\n' % (band, mx, hist[mx])) # for plotting lab = 'Band %i' % band lab_list.append(lab) plt.plot(hist, label=lab) plt.legend() plt.suptitle(base_dir) plot_file.savefig() plt.close() # Now to output a single plot containing all (except band 9) histograms for i in range(len(hist_list)): plt.plot(hist_list[i], label=lab_list[i]) plt.legend() plt.suptitle(base_dir) plot_file.savefig() plot_file.close() plt.close() out_file = open(os.path.join(out_dir, 'histogram_results.txt'), 'w') for line in mx_DN_list: out_file.write(line) out_file.close()
def obj_get_boundary_method1(labelled_array, fill_holes=True): """ Get the pixels that mark the object boundary/perimeter. Method 1. 8 neighbourhood chain code 5 6 7 4 . 0 3 2 1 4 neighbourhood chain code . 3 . 2 . 0 . 1 . """ dims = labelled_array.shape rows = dims[0] cols = dims[1] if fill_holes: orig_binary = (labelled_array > 0).astype('uint8') fill = obj_fill_holes(labelled_array) s = [[1,1,1],[1,1,1],[1,1,1]] labelled_array, nlabels = ndimage.label(fill, structure=s) # We'll opt for the perimeter co-ordinates to be ordered in a clockwise fashion. GIS convention??? pix_directions = numpy.array([[ 0, 1], [ 1, 1], [ 1, 0], [ 1,-1], [ 0,-1], [-1,-1], [-1, 0], [-1, 1]]) # Set up the distances as we traverse across a pixel diag = numpy.sqrt(2.0) # NumPy will return a float64, but just in case future versions change.... pix_distances = {0 : 1.0, 1 : diag, 2 : 1.0, 3 : diag, 4 : 1.0, 5 : diag, 6 : 1.0, 7, diag } # Determine the co-ordinates (indices) of each segement # The first index of each segment will be used to define the start and end of a boundary/perimeter h = histogram(labelled_array.flatten(), Min=1, reverse_indices='ri') hist = h['histogram'] ri = h['ri'] nlabels = hist.shape[0] seg_start_idxs = numpy.zeros(nlabels, dtype='int') # Boundary or perimeter ?? Will go with perimeter, akin to a method implement earlier which uses a # convolution operator to determine perimeter length. # Obtain the start indices of each segment/object for i in numpy.arange(nlabels): #if (hist[i] == 0): # The labeled array should be consecutive # continue seg_start_idxs[i] = ri[ri[i]:ri[i+1]][0] # Return the first index # Convert the 1D indices to 2D indices used by NumPy seg_start_idxs = array_indices(dims, seg_start_idxs, dimensions=True) # Lots to figure out here. Dealing with 'from' and 'too' directions can make things confusing # Keep track of the direction we last travelled, that way we can start at the next clockwise direction # For single pixel objects or 'islands' use the histogram value to skip the follow boundary/search routine """ The memory of the order of the array should be 'C-Style': column, column, column, then next row. eg a 2x5 array 0, 1, 2, 3, 4 5, 6, 7, 8, 9 Therefore the first index will only have labels to the right and below (below left, below right). eg an object with a label ID of 6 0, 0, 6, 6, 6 6, 6, 6, 6, 6 As such, the first direction travelled will be to the right '0' in the freeman chain, and the first index will be the first boundary co-ordinate and will be the final co-ordinate to close the boundary thereby creating a polygon. As for linear features... """ # Probably deal with these inside the boundary tracking routine # Let the tracking routine handle to/from and just return the final result to_ = 0 from_ = 4 perimeter_info = {} for i in range(hist.shape[0]): if hist[i] == 0: continue if hist[i] == 1: # What is the perimeter of a single pixel, 0.0, 4.0??? #perimeter_co_ords[i] = seg_start_idxs[i[0],i[1]] # Still need to design the function and how to return the result continue idx = (seg_start_idxs[0][i], seg_start_idxs[1][i]) label = i + 1 perimeter_info[i] = track_object_boundary(labelled_array, start_index=idx, label_id=label) # Might need to format the perimeter_info dictionary before returning, ie turn the co-ords into numpy arrays. # Or even into a polygon object using the shapely library??? # Using shapely might be easier to report geometrical attributes # Still need to deal with holes within an object as ENVI does. They will increase an objects perimeter length. # SciPy have a binary_fill_holes function. label the filled array, then get the indices, and then retrive only those indices # for each object that are 0 in the original array. # That might be one way to do it, which means re-writing the above function....ughhh :) #!!!!This isn't the correct place for the handling of object holes, but just get the rough structure out!!! if fill_holes: for i in range(hist.shape[0]): if hist[i] == 0: continue if hist[i] == 1: # What is the perimeter of a single pixel, 0.0, 4.0??? continue idx = (seg_start_idxs[0][i], seg_start_idxs[1][i]) label = i + 1 perimeter_info[i] = track_object_boundary(labelled_array, start_index=idx, label_id=label) # Can we trust that the labelling of the filled and unfilled arrays will give the same object index?? # If we can we could use the area differences to determine if there are holes and only go through the # hole perimeter tracking if needed. single_object = numpy.zeros((rows*cols), dtype='uint8') single_object[ri[ri[i]:ri[i+1]]] = 1 holes = numexpr.evaluate("(single_object - orig_binary) == 1") labs, nlabs = ndimage.label(holes, s) h_holes = histogram(labs, Min=1, reverse_indices='ri') hist_holes = h_holes['histogram'] ri_h = h_holes['ri'] seg_holes_start_idxs = numpy.zeros(nlabs, dtype='int') for j in numpy.arange(nlabels): #if (hist[i] == 0): # The labeled array should be consecutive # continue seg_holes_start_idxs[j] = ri_h[ri_h[j]:ri_h[j+1]][0] # Return the first index # Convert the 1D indices to 2D indices used by NumPy seg_holes_start_idxs = array_indices(dims, seg_holes_start_idxs, dimensions=True) for k in range(hist_holes.shape): if hist_holes[k] == 0: continue if hist[i] == 1: # What is the perimeter of a single pixel, 0.0, 4.0??? continue idx = (seg_holes_start_idxs[0][k], seg_holes_start_idxs[1][k]) holes_label = k + 1 holes_result = track_object_boundary(labs, start_index=idx, label_id=holes_label) perimeter_info[i]['Holes'] = holes_result['Vertices'] perimeter_info[i]['Perimeter_Length'] += perimeter_info['Perimeter_Length'] return perimeter_info
def triangle_threshold(array, Binsize=None, Max=None, Min=None, Nbins=None, Apply=True, Invert=False): """ Calculates a threshold and optionally creates a binary mask from an array using the Triangle threshold method. The threshold is calculated as the point of maximum perpendicular distance of a line between the histogram peak and the farthest non-zero histogram edge to the histogram. :param image: A numpy array. :param Apply: Default is False. If True then a mask of the same dimensions as array will be returned. Otherwise only the threshold will be returned. :param Binsize: (Optional) The binsize (Default is 1) to be used for creating the histogram. :param Max: (Optional) The maximum value to be used in creating the histogram. If not specified the array will be searched for max. :param Min: (Optional) The minimum value to be used in creating the histogram. If not specified the array will be searched for min. :param Nbins: (Optional) The number of bins to be used for creating the histogram. If set binsize is calculated as (max - min) / (nbins - 1), and the max value will be adjusted to (nbins*binsize + min). :param Apply: If True (Default), then the threshold will be applied and an array of type bool will be returned. Otherwise just the threshold will be returned. :param Invert: If True (Default is False), then the returned mask will be inverted. Only valid if Apply=True. The inverted mask is applied as (array < threshold) & (array >= min). The non-inverted mask is applied as (array >= threshold) & (array <= max) :author: Josh Sixsmith, [email protected] :history: * 12/07/2014--Translated from IDL :sources: G.W. Zack, W.E. Rogers, and S.A. Latt. Automatic measurement of sister chromatid exchange frequency. Journal of Histochemistry & Cytochemistry, 25(7):741, 1977. 1, 2.1 """ if array == None: raise Exception("No input array!") dims = array.shape arr = array.flatten() h = histogram(arr, locations='loc', omax='omax', omin='omin', binsize=Binsize, Max=Max, Min=Min, nbins=Nbins) hist = h['histogram'] omin = h['omin'] omax = h['omax'] loc = h['loc'] binsz = numpy.abs(loc[1] - loc[0]) # Calculate the threshold threshold = calculate_triangle_threshold(histogram=hist) thresh_convert = thresh * binsz + omin if Apply: if Invert: mask = (arr < thresh_convert) & (arr >= omin) else: mask = (arr >= thresh_convert) & (arr <= omax) return mask return threshold
burn = i + 1 gdal.RasterizeLayer(outds, [1], layer, burn_values=[burn]) layer.SetAttributeFilter(None) # Retrieve the rasterised vector and delete the GDAL MEM dataset vec2rast = outds.ReadAsArray().flatten() outds = None vec_ds = None layer = None # TODO # Need to sort out feature names with features that get rasterised # Hmmm, it might be ok # Calculate the histogram and the reverse indices of the rasterised vector h = histogram(vec2rast, Min=1, reverse_indices='ri', omax='omax') hist = h['histogram'] ri = h['ri'] omax = h['omax'] # Get the indices for each bin idxs = [] for i in range(hist.shape[0]): if hist[i] == 0: idxs.append(None) # An empty item continue idx = ri[ri[i]:ri[i + 1]] idxs.append(idx) logging.info("Creating output summary file")