def getCloudPercentage(image, region): '''Estimates the cloud cover percentage in a Landsat image''' # The function will attempt the calculation in these ranges # - Native Landsat resolution is 30 MIN_RESOLUTION = 60 MAX_RESOLUTION = 1000 resolution = MIN_RESOLUTION while True: try: oneMask = ee.Image(1.0) cloudScore = detect_clouds(image) areaCount = oneMask.reduceRegion(ee.Reducer.sum(), region, resolution) cloudCount = cloudScore.reduceRegion(ee.Reducer.sum(), region, resolution) percentage = safe_get_info(cloudCount)['constant'] / safe_get_info( areaCount)['constant'] return percentage except Exception as e: # Keep trying with lower resolution until we succeed resolution = 2 * resolution if resolution > MAX_RESOLUTION: raise e
def getCloudPercentage(lowResModis, region): '''Returns the percentage of a region flagged as clouds by the MODIS metadata''' MODIS_CLOUD_RESOLUTION = 1000 # Clouds are flagged at this resolution # Divide the number of cloud pixels by the total number of pixels oneMask = ee.Image(1.0) cloudMask = getModisBadPixelMask(lowResModis) areaCount = oneMask.reduceRegion( ee.Reducer.sum(), region, MODIS_CLOUD_RESOLUTION) cloudCount = cloudMask.reduceRegion(ee.Reducer.sum(), region, MODIS_CLOUD_RESOLUTION) percentage = safe_get_info(cloudCount)['cloud_state'] / safe_get_info(areaCount)['constant'] print 'Detected cloud percentage: ' + str(percentage) return percentage
def __compute_threshold_ranges(training_domains, training_images, water_masks, bands): '''For each band, find lowest and highest fixed percentiles among the training domains.''' LOW_PERCENTILE = 20 HIGH_PERCENTILE = 100 EVAL_RESOLUTION = 250 band_splits = dict() for band_name in bands: # Loop through each band (weak classifier input) split = None print('Computing threshold ranges for: ' + band_name) mean = 0 for i in range(len(training_domains)): # Loop through all input domains # Compute the low and high percentiles for the data in the training image masked_input_band = training_images[i].select(band_name).mask(water_masks[i]) ret = safe_get_info(masked_input_band.reduceRegion(ee.Reducer.percentile([LOW_PERCENTILE, HIGH_PERCENTILE], ['s', 'b']), training_domains[i].bounds, EVAL_RESOLUTION)) s = [ret[band_name + '_s'], ret[band_name + '_b']] # Extract the two output values mean += modis_utilities.compute_binary_threshold(training_images[i].select([band_name], ['b1']), water_masks[i], training_domains[i].bounds) if split == None: # True for the first training domain split = s else: # Track the minimum and maximum percentiles for this band split[0] = min(split[0], s[0]) split[1] = max(split[1], s[1]) mean = mean / len(training_domains) # For this band: bound by lowest percentile and maximum percentile, start by evaluating mean band_splits[band_name] = [split[0], split[0] + (mean - split[0]) / 2, mean + (split[1] - mean) / 2, split[1]] return band_splits
def _find_adaboost_optimal_threshold(domains, images, truths, band_name, weights, splits): '''Binary search to find best threshold for this band''' EVAL_RESOLUTION = 250 choices = [] for i in range(len(splits) - 1): choices.append((splits[i] + splits[i + 1]) / 2) domain_range = range(len(domains)) best = None best_value = None for k in range(len(choices)): # Pick a threshold and count how many pixels fall under it across all the input images c = choices[k] errors = [ safe_get_info(weights[i].multiply( images[i].select(band_name).lte(c).neq( truths[i])).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION, 'EPSG:4326'))['constant'] for i in range(len(images)) ] error = sum(errors) #threshold_sums = [safe_get_info(weights[i].mask(images[i].select(band_name).lte(c)).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in domain_range] #flood_and_threshold_sum = sum(threshold_sums) # ##ts = [truths[i].multiply(weights[i]).divide(flood_and_threshold_sum).mask(images[i].select(band_name).lte(c)) for i in domain_range] ##entropies1 = [-safe_get_info(ts[i].multiply(ts[i].log()).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION))['b1'] for i in domain_range]# H(Y | X <= c) ##ts = [truths[i].multiply(weights[i]).divide(1 - flood_and_threshold_sum).mask(images[i].select(band_name).gt(c)) for i in domain_range] ##entropies2 = [-safe_get_info(ts[i].multiply(ts[i].log()).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION))['b1'] for i in domain_range]# H(Y | X > c) # ## Compute the sums of two entropy measures across all images #entropies1 = entropies2 = [] #for i in domain_range: # band_image = images[i].select(band_name) # weighted_truth = truths[i].multiply(weights[i]) # ts1 = weighted_truth.divide( flood_and_threshold_sum).mask(band_image.lte(c)) # <= threshold # ts2 = weighted_truth.divide(1 - flood_and_threshold_sum).mask(band_image.gt( c)) # > threshold # entropies1.append(-safe_get_info(ts1.multiply(ts1.log()).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION))['b1'])# H(Y | X <= c) # entropies2.append(-safe_get_info(ts2.multiply(ts2.log()).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION))['b1'])# H(Y | X > c) #entropy1 = sum(entropies1) #entropy2 = sum(entropies2) # ## Compute the gain for this threshold choice #gain = (entropy1 * ( flood_and_threshold_sum)+ # entropy2 * (1 - flood_and_threshold_sum)) #print 'c = %f, error = %f' % (c, error) if (best == None) or abs(0.5 - error) > abs( 0.5 - best_value): # Record the maximum gain best = k best_value = error # TODO: What is causing this inaccuracy? if best_value > 0.99: best_value = 0.99 # ?? return (choices[best], best + 1, best_value)
def getCloudPercentage(lowResModis, region): '''Returns the percentage of a region flagged as clouds by the MODIS metadata''' MODIS_CLOUD_RESOLUTION = 1000 # Clouds are flagged at this resolution # Divide the number of cloud pixels by the total number of pixels oneMask = ee.Image(1.0) cloudMask = getModisBadPixelMask(lowResModis) areaCount = oneMask.reduceRegion(ee.Reducer.sum(), region, MODIS_CLOUD_RESOLUTION) cloudCount = cloudMask.reduceRegion(ee.Reducer.sum(), region, MODIS_CLOUD_RESOLUTION) percentage = safe_get_info(cloudCount)['cloud_state'] / safe_get_info( areaCount)['constant'] return percentage
def __compute_threshold_ranges(training_domains, training_images, water_masks, bands): '''For each band, find lowest and highest fixed percentiles among the training domains.''' LOW_PERCENTILE = 20 HIGH_PERCENTILE = 100 EVAL_RESOLUTION = 250 band_splits = dict() for band_name in bands: # Loop through each band (weak classifier input) split = None print 'Computing threshold ranges for: ' + band_name mean = 0 for i in range(len(training_domains)): # Loop through all input domains # Compute the low and high percentiles for the data in the training image masked_input_band = training_images[i].select(band_name).mask(water_masks[i]) ret = safe_get_info(masked_input_band.reduceRegion(ee.Reducer.percentile([LOW_PERCENTILE, HIGH_PERCENTILE], ['s', 'b']), training_domains[i].bounds, EVAL_RESOLUTION)) s = [ret[band_name + '_s'], ret[band_name + '_b']] # Extract the two output values mean += modis_utilities.compute_binary_threshold(training_images[i].select([band_name], ['b1']), water_masks[i], training_domains[i].bounds) if split == None: # True for the first training domain split = s else: # Track the minimum and maximum percentiles for this band split[0] = min(split[0], s[0]) split[1] = max(split[1], s[1]) mean = mean / len(training_domains) # For this band: bound by lowest percentile and maximum percentile, start by evaluating mean band_splits[band_name] = [split[0], split[0] + (mean - split[0]) / 2, mean + (split[1] - mean) / 2, split[1]] return band_splits
def _create_adaboost_learning_image(domain, b): '''Like _create_learning_image but using a lot of simple classifiers to feed into Adaboost''' # A large set of MODIS band configurations, each is assigned a unique band name for reference. a = b['b1'].select(['sur_refl_b01'], ['b1' ]) a = a.addBands(b['b2'].select(['sur_refl_b02'], ['b2' ])) a = a.addBands(b['b2'].divide(b['b1']).select(['sur_refl_b02'], ['ratio' ])) a = a.addBands(b['LSWI'].subtract(b['NDVI']).subtract(0.05).select(['sur_refl_b02'], ['LSWIminusNDVI'])) a = a.addBands(b['LSWI'].subtract(b['EVI']).subtract(0.05).select(['sur_refl_b02'], ['LSWIminusEVI' ])) a = a.addBands(b['EVI'].subtract(0.3).select(['sur_refl_b02'], ['EVI' ])) a = a.addBands(b['LSWI'].select(['sur_refl_b02'], ['LSWI' ])) a = a.addBands(b['NDVI'].select(['sur_refl_b02'], ['NDVI' ])) a = a.addBands(b['NDWI'].select(['sur_refl_b01'], ['NDWI' ])) a = a.addBands(get_diff(b).select(['b1'], ['diff' ])) a = a.addBands(get_fai(b).select(['b1'], ['fai' ])) a = a.addBands(get_dartmouth(b).select(['b1'], ['dartmouth' ])) a = a.addBands(get_mod_ndwi(b).select(['b1'], ['MNDWI' ])) # If available, try adding Landsat data try: landsat_sensor = domain.get_landsat() added = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7'] a = a.addBands(landsat_sensor.image.select(added)) print 'Adding landsat bands!' except: # No Landsat data is present pass # If available, try adding radar data try: # Add all of the bands from the radar sensor # - All of the input training images need to have the same bands available! radar_sensor = domain.get_radar() a = a.addBands(radar_sensor.image) except: # No radar data is present pass # If available, try adding Skybox data try: try: # The Skybox data can be in one of two names skybox_sensor = domain.skybox except: skybox_sensor = domain.skybox_nir # Add all Skybox bands a = a.addBands(skybox_sensor.image) # Add an additional texture band rgbBands = skybox_sensor.Red.addBands(skybox_sensor.Green).addBands(skybox_sensor.Blue) grayBand = rgbBands.select('Red').add(rgbBands.select('Green')).add(rgbBands.select('Blue')).divide(ee.Image(3.0)).uint16() textureRaw = grayBand.glcmTexture() bandList = safe_get_info(textureRaw)['bands'] bandName = [x['id'] for x in bandList if 'idm' in x['id']] texture = textureRaw.select(bandName).convolve(ee.Kernel.square(5, 'pixels')) a = a.addBands(texture) except: # No Skybox data is present pass return a
def _create_learning_image(domain, b): '''Set up features for the classifier to be trained on''' outputBands = _get_modis_learning_bands( domain, b) # Get the standard set of MODIS learning bands #outputBands = _get_extensive_modis_learning_bands(domain, b) # Get the standard set of MODIS learning bands # Try to add a DEM try: dem = domain.get_dem().image outputBands.addBands(dem) #outputBands = dem except AttributeError: pass # Suppress error if there is no DEM data # Try to add Skybox RGB info (NIR is handled seperately because not all Skybox images have it) # - Use all the base bands plus a grayscale texture measure try: try: # The Skybox data can be in one of two names skyboxSensor = domain.skybox except: skyboxSensor = domain.skybox_nir rgbBands = skyboxSensor.Red.addBands(skyboxSensor.Green).addBands( skyboxSensor.Blue) grayBand = rgbBands.select('Red').add(rgbBands.select('Green')).add( rgbBands.select('Blue')).divide(ee.Image(3.0)).uint16() edges = grayBand.convolve(ee.Kernel.laplacian8(normalize=True)).abs() texture = edges.convolve(ee.Kernel.square(3, 'pixels')).select( ['Red'], ['Texture']) texture2Raw = grayBand.glcmTexture() bandList = safe_get_info(texture2Raw)['bands'] bandName = [x['id'] for x in bandList if 'idm' in x['id']] texture2 = texture2Raw.select(bandName).convolve( ee.Kernel.square(5, 'pixels')) #skyboxBands = rgbBands.addBands(texture).addBands(texture2) skyboxBands = rgbBands.addBands(texture2) outputBands = outputBands.addBands(skyboxBands) #outputBands = skyboxBands #addToMap(grayBand, {'min': 0, 'max': 1200}, 'grayBand') #addToMap(edges, {'min': 0, 'max': 250}, 'edges') #addToMap(texture, {'min': 0, 'max': 250}, 'texture') #addToMap(texture2, {'min': 0, 'max': 1}, 'texture2') except AttributeError: pass # Suppress error if there is no Skybox data # Try to add Skybox Near IR band try: outputBands = outputBands.addBands(domain.skybox_nir.NIR) #addToMap(domain.skybox.NIR, {'min': 0, 'max': 1200}, 'Near IR') except AttributeError: pass # Suppress error if there is no Skybox NIR data return outputBands
def _create_learning_image(domain, b): '''Set up features for the classifier to be trained on''' outputBands = _get_modis_learning_bands(domain, b) # Get the standard set of MODIS learning bands #outputBands = _get_extensive_modis_learning_bands(domain, b) # Get the standard set of MODIS learning bands # Try to add a DEM try: dem = domain.get_dem().image outputBands.addBands(dem) #outputBands = dem except AttributeError: pass # Suppress error if there is no DEM data # Try to add Skybox RGB info (NIR is handled seperately because not all Skybox images have it) # - Use all the base bands plus a grayscale texture measure try: try: # The Skybox data can be in one of two names skyboxSensor = domain.skybox except: skyboxSensor = domain.skybox_nir rgbBands = skyboxSensor.Red.addBands(skyboxSensor.Green).addBands(skyboxSensor.Blue) grayBand = rgbBands.select('Red').add(rgbBands.select('Green')).add(rgbBands.select('Blue')).divide(ee.Image(3.0)).uint16() edges = grayBand.convolve(ee.Kernel.laplacian8(normalize=True)).abs() texture = edges.convolve(ee.Kernel.square(3, 'pixels')).select(['Red'], ['Texture']) texture2Raw = grayBand.glcmTexture() bandList = safe_get_info(texture2Raw)['bands'] bandName = [x['id'] for x in bandList if 'idm' in x['id']] texture2 = texture2Raw.select(bandName).convolve(ee.Kernel.square(5, 'pixels')) #skyboxBands = rgbBands.addBands(texture).addBands(texture2) skyboxBands = rgbBands.addBands(texture2) outputBands = outputBands.addBands(skyboxBands) #outputBands = skyboxBands #addToMap(grayBand, {'min': 0, 'max': 1200}, 'grayBand') #addToMap(edges, {'min': 0, 'max': 250}, 'edges') #addToMap(texture, {'min': 0, 'max': 250}, 'texture') #addToMap(texture2, {'min': 0, 'max': 1}, 'texture2') except AttributeError: pass # Suppress error if there is no Skybox data # Try to add Skybox Near IR band try: outputBands = outputBands.addBands(domain.skybox_nir.NIR) #addToMap(domain.skybox.NIR, {'min': 0, 'max': 1200}, 'Near IR') except AttributeError: pass # Suppress error if there is no Skybox NIR data return outputBands
def getCloudPercentage(image, region): '''Estimates the cloud cover percentage in a Landsat image''' # The function will attempt the calculation in these ranges # - Native Landsat resolution is 30 MIN_RESOLUTION = 60 MAX_RESOLUTION = 1000 resolution = MIN_RESOLUTION while True: try: oneMask = ee.Image(1.0) cloudScore = detect_clouds(image) areaCount = oneMask.reduceRegion( ee.Reducer.sum(), region, resolution) cloudCount = cloudScore.reduceRegion(ee.Reducer.sum(), region, resolution) percentage = safe_get_info(cloudCount)['constant'] / safe_get_info(areaCount)['constant'] return percentage except Exception as e: # Keep trying with lower resolution until we succeed resolution = 2*resolution if resolution > MAX_RESOLUTION: raise e
def _find_adaboost_optimal_threshold(domains, images, truths, band_name, weights, splits): '''Binary search to find best threshold for this band''' EVAL_RESOLUTION = 250 choices = [] for i in range(len(splits) - 1): choices.append((splits[i] + splits[i+1]) / 2) domain_range = range(len(domains)) best = None best_value = None for k in range(len(choices)): # Pick a threshold and count how many pixels fall under it across all the input images c = choices[k] errors = [safe_get_info(weights[i].multiply(images[i].select(band_name).lte(c).neq(truths[i])).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION, 'EPSG:4326'))['constant'] for i in range(len(images))] error = sum(errors) #threshold_sums = [safe_get_info(weights[i].mask(images[i].select(band_name).lte(c)).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in domain_range] #flood_and_threshold_sum = sum(threshold_sums) # ##ts = [truths[i].multiply(weights[i]).divide(flood_and_threshold_sum).mask(images[i].select(band_name).lte(c)) for i in domain_range] ##entropies1 = [-safe_get_info(ts[i].multiply(ts[i].log()).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION))['b1'] for i in domain_range]# H(Y | X <= c) ##ts = [truths[i].multiply(weights[i]).divide(1 - flood_and_threshold_sum).mask(images[i].select(band_name).gt(c)) for i in domain_range] ##entropies2 = [-safe_get_info(ts[i].multiply(ts[i].log()).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION))['b1'] for i in domain_range]# H(Y | X > c) # ## Compute the sums of two entropy measures across all images #entropies1 = entropies2 = [] #for i in domain_range: # band_image = images[i].select(band_name) # weighted_truth = truths[i].multiply(weights[i]) # ts1 = weighted_truth.divide( flood_and_threshold_sum).mask(band_image.lte(c)) # <= threshold # ts2 = weighted_truth.divide(1 - flood_and_threshold_sum).mask(band_image.gt( c)) # > threshold # entropies1.append(-safe_get_info(ts1.multiply(ts1.log()).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION))['b1'])# H(Y | X <= c) # entropies2.append(-safe_get_info(ts2.multiply(ts2.log()).reduceRegion(ee.Reducer.sum(), domains[i].bounds, EVAL_RESOLUTION))['b1'])# H(Y | X > c) #entropy1 = sum(entropies1) #entropy2 = sum(entropies2) # ## Compute the gain for this threshold choice #gain = (entropy1 * ( flood_and_threshold_sum)+ # entropy2 * (1 - flood_and_threshold_sum)) #print 'c = %f, error = %f' % (c, error) if (best == None) or abs(0.5 - error) > abs(0.5 - best_value): # Record the maximum gain best = k best_value = error # TODO: What is causing this inaccuracy? if best_value > 0.99: best_value = 0.99 # ?? return (choices[best], best + 1, best_value)
def compute_binary_threshold(valueImage, classification, bounds, mixed_thresholds=False): '''Computes a threshold for a value given examples in a classified binary image''' # Build histograms of the true and false labeled values valueInFalse = valueImage.mask(classification.Not()) valueInTrue = valueImage.mask(classification) NUM_BINS = 128 SCALE = 250 # In meters histogramFalse = safe_get_info(valueInFalse.reduceRegion(ee.Reducer.histogram(NUM_BINS, None, None), bounds, SCALE))['b1'] histogramTrue = safe_get_info(valueInTrue.reduceRegion( ee.Reducer.histogram(NUM_BINS, None, None), bounds, SCALE))['b1'] # Get total number of pixels in each histogram false_total = sum(histogramFalse['histogram']) true_total = sum(histogramTrue[ 'histogram']) # WARNING: This method assumes that the false histogram is composed of greater numbers than the true histogram!! # : This happens to be the case for the three algorithms we are currently using this for. false_index = 0 false_sum = false_total true_sum = 0.0 threshold_index = None lower_mixed_index = None upper_mixed_index = None for i in range(len(histogramTrue['histogram'])): # Iterate through the bins of the true histogram # Add the number of pixels in the current true bin true_sum += histogramTrue['histogram'][i] # Set x equal to the max end of the current bin x = histogramTrue['bucketMin'] + (i+1)*histogramTrue['bucketWidth'] # Determine the bin of the false histogram that x falls in # - Also update the number of while ( (false_index < len(histogramFalse['histogram'])) and (histogramFalse['bucketMin'] + false_index*histogramFalse['bucketWidth'] < x) ): false_sum -= histogramFalse['histogram'][false_index] # Remove the pixels from the current false bin false_index += 1 # Move to the next bin of the false histogram percent_true_under_thresh = true_sum/true_total percent_false_over_thresh = false_sum/false_total if mixed_thresholds: if (false_total - false_sum) / float(true_sum) <= 0.05: lower_mixed_index = i if upper_mixed_index == None and (true_total - true_sum) / float(false_sum) <= 0.05: upper_mixed_index = i else: if threshold_index == None and (percent_false_over_thresh < percent_true_under_thresh) and (percent_true_under_thresh > 0.5): break if mixed_thresholds: if (not lower_mixed_index) or (not upper_mixed_index): raise Exception('Failed to compute mixed threshold values!') lower = histogramTrue['bucketMin'] + lower_mixed_index * histogramTrue['bucketWidth'] + histogramTrue['bucketWidth']/2 upper = histogramTrue['bucketMin'] + upper_mixed_index * histogramTrue['bucketWidth'] + histogramTrue['bucketWidth']/2 if lower > upper: temp = lower lower = upper upper = temp print 'Thresholds (%g, %g) found.' % (lower, upper) return (lower, upper) else: # Put threshold in the center of the current true histogram bin/bucket threshold = histogramTrue['bucketMin'] + i*histogramTrue['bucketWidth'] + histogramTrue['bucketWidth']/2 print 'Threshold %g Found. %g%% of water pixels and %g%% of land pixels separated.' % \ (threshold, true_sum / true_total * 100.0, false_sum / false_total * 100.0) return threshold
def compute_binary_threshold(valueImage, classification, bounds, mixed_thresholds=False): '''Computes a threshold for a value given examples in a classified binary image''' # Build histograms of the true and false labeled values valueInFalse = valueImage.mask(classification.Not()) valueInTrue = valueImage.mask(classification) NUM_BINS = 128 SCALE = 250 # In meters histogramFalse = safe_get_info( valueInFalse.reduceRegion(ee.Reducer.histogram(NUM_BINS, None, None), bounds, SCALE))['b1'] histogramTrue = safe_get_info( valueInTrue.reduceRegion(ee.Reducer.histogram(NUM_BINS, None, None), bounds, SCALE))['b1'] # Get total number of pixels in each histogram false_total = sum(histogramFalse['histogram']) true_total = sum(histogramTrue['histogram']) # WARNING: This method assumes that the false histogram is composed of greater numbers than the true histogram!! # : This happens to be the case for the three algorithms we are currently using this for. false_index = 0 false_sum = false_total true_sum = 0.0 threshold_index = None lower_mixed_index = None upper_mixed_index = None for i in range(len(histogramTrue['histogram']) ): # Iterate through the bins of the true histogram # Add the number of pixels in the current true bin true_sum += histogramTrue['histogram'][i] # Set x equal to the max end of the current bin x = histogramTrue['bucketMin'] + (i + 1) * histogramTrue['bucketWidth'] # Determine the bin of the false histogram that x falls in # - Also update the number of while ((false_index < len(histogramFalse['histogram'])) and (histogramFalse['bucketMin'] + false_index * histogramFalse['bucketWidth'] < x)): false_sum -= histogramFalse['histogram'][ false_index] # Remove the pixels from the current false bin false_index += 1 # Move to the next bin of the false histogram percent_true_under_thresh = true_sum / true_total percent_false_over_thresh = false_sum / false_total if mixed_thresholds: if (false_total - false_sum) / float(true_sum) <= 0.05: lower_mixed_index = i if upper_mixed_index == None and ( true_total - true_sum) / float(false_sum) <= 0.05: upper_mixed_index = i else: if threshold_index == None and ( percent_false_over_thresh < percent_true_under_thresh ) and (percent_true_under_thresh > 0.5): break if mixed_thresholds: if (not lower_mixed_index) or (not upper_mixed_index): raise Exception('Failed to compute mixed threshold values!') lower = histogramTrue['bucketMin'] + lower_mixed_index * histogramTrue[ 'bucketWidth'] + histogramTrue['bucketWidth'] / 2 upper = histogramTrue['bucketMin'] + upper_mixed_index * histogramTrue[ 'bucketWidth'] + histogramTrue['bucketWidth'] / 2 if lower > upper: temp = lower lower = upper upper = temp print('Thresholds (%g, %g) found.' % (lower, upper)) return (lower, upper) else: # Put threshold in the center of the current true histogram bin/bucket threshold = histogramTrue['bucketMin'] + i * histogramTrue[ 'bucketWidth'] + histogramTrue['bucketWidth'] / 2 print('Threshold %g Found. %g%% of water pixels and %g%% of land pixels separated.' % \ (threshold, true_sum / true_total * 100.0, false_sum / false_total * 100.0)) return threshold
def history_diff_core(high_res_modis, date, dev_thresh, change_thresh, bounds): '''Leverage historical data and the permanent water mask to improve the threshold method. This method computes statistics from the permanent water mask to set a good b2-b1 water detection threshold. It also adds pixels which have a b2-b1 level significantly lower than the historical seasonal average. ''' # Retrieve all the MODIS images for the region in the last several years NUM_YEARS_BACK = 5 NUM_DAYS_COMPARE_RANGE = 40.0 # Compare this many days before/after the target day in previous years YEAR_RANGE_PERCENTAGE = NUM_DAYS_COMPARE_RANGE / 365.0 #print 'YEAR_RANGE_PERCENTAGE = ' + str(YEAR_RANGE_PERCENTAGE) #print 'Start: ' + str(domain.date.advance(-1 - YEAR_RANGE_PERCENTAGE, 'year')) #print 'End: ' + str(domain.date.advance(-1 + YEAR_RANGE_PERCENTAGE, 'year')) # Get both the high and low res MODIS data historyHigh = ee.ImageCollection('MOD09GQ').filterDate( date.advance(-1 - YEAR_RANGE_PERCENTAGE, 'year'), date.advance(-1 + YEAR_RANGE_PERCENTAGE, 'year')).filterBounds(bounds) historyLow = ee.ImageCollection('MOD09GA').filterDate( date.advance(-1 - YEAR_RANGE_PERCENTAGE, 'year'), date.advance(-1 + YEAR_RANGE_PERCENTAGE, 'year')).filterBounds(bounds) for i in range(1, NUM_YEARS_BACK - 1): yearMin = -(i + 1) - YEAR_RANGE_PERCENTAGE yearMax = -(i + 1) + YEAR_RANGE_PERCENTAGE historyHigh.merge( ee.ImageCollection('MOD09GQ').filterDate( date.advance(yearMin, 'year'), date.advance(yearMax, 'year')).filterBounds(bounds)) historyLow.merge( ee.ImageCollection('MOD09GA').filterDate( date.advance(yearMin, 'year'), date.advance(yearMax, 'year')).filterBounds(bounds)) # TODO: Add a filter here to remove cloud-filled images # Simple function implements the b2 - b1 difference method # - This needs to work with domains and with the input from production_gui def flood_diff_function(image): try: return image.select(['sur_refl_b02' ]).subtract(image.select(['sur_refl_b01'])) except: return image.sur_refl_b02.subtract(image.sur_refl_b01) # Apply difference function to all images in history, then compute mean and standard deviation of difference scores. historyDiff = historyHigh.map(flood_diff_function) historyMean = historyDiff.mean() historyStdDev = historyDiff.reduce(ee.Reducer.stdDev()) # Display the mean image for bands 1/2/6 history3 = historyHigh.mean().select( ['sur_refl_b01', 'sur_refl_b02']).addBands(historyLow.mean().select(['sur_refl_b06'])) #addToMap(history3, {'bands': ['sur_refl_b01', 'sur_refl_b02', 'sur_refl_b06'], 'min' : 0, 'max': 3000, 'opacity' : 1.0}, 'MODIS_HIST', False) #addToMap(historyMean, {'min' : 0, 'max' : 4000}, 'History mean', False) #addToMap(historyStdDev, {'min' : 0, 'max' : 2000}, 'History stdDev', False) # Compute flood diff on current image and compare to historical mean/STD. floodDiff = flood_diff_function(high_res_modis) diffOfDiffs = floodDiff.subtract(historyMean) ddDivDev = diffOfDiffs.divide(historyStdDev) changeFlood = ddDivDev.lt( change_thresh ) # Mark all pixels which are enough STD's away from the mean. #addToMap(floodDiff, {'min' : 0, 'max' : 4000}, 'floodDiff', False) #addToMap(diffOfDiffs, {'min' : -2000, 'max' : 2000}, 'diffOfDiffs', False) #addToMap(ddDivDev, {'min' : -10, 'max' : 10}, 'ddDivDev', False) #addToMap(changeFlood, {'min' : 0, 'max' : 1}, 'changeFlood', False) #addToMap(domain.water_mask, {'min' : 0, 'max' : 1}, 'Permanent water mask', False) # Compute the difference statistics inside permanent water mask pixels MODIS_RESOLUTION = 250 # Meters water_mask = ee.Image("MODIS/MOD44W/MOD44W_005_2000_02_24").select( ['water_mask']) diffInWaterMask = floodDiff.multiply(water_mask) maskedMean = diffInWaterMask.reduceRegion(ee.Reducer.mean(), bounds, MODIS_RESOLUTION) maskedStdDev = diffInWaterMask.reduceRegion(ee.Reducer.stdDev(), bounds, MODIS_RESOLUTION) # Use the water mask statistics to compute a difference threshold, then find all pixels below the threshold. waterThreshold = safe_get_info(maskedMean)['sur_refl_b02'] + dev_thresh * ( safe_get_info(maskedStdDev)['sur_refl_b02']) #print 'Water threshold == ' + str(waterThreshold) waterPixels = flood_diff_function(high_res_modis).lte(waterThreshold) #waterPixels = modis_diff(domain, b, waterThreshold) #addToMap(waterPixels, {'min' : 0, 'max' : 1}, 'waterPixels', False) # ## Is it worth it to use band 6? #B6_DIFF_AMT = 500 #bHigh1 = (b['b6'].subtract(B6_DIFF_AMT).gt(b['b1'])) #bHigh2 = (b['b6'].subtract(B6_DIFF_AMT).gt(b['b2'])) #bHigh = bHigh1.And(bHigh2).reproject('EPSG:4326', scale=MODIS_RESOLUTION) #addToMap(bHigh.mask(bHigh), {'min' : 0, 'max' : 1}, 'B High', False) # Combine water pixels from the historical and water mask methods. return (waterPixels.Or(changeFlood)).select(['sur_refl_b02'], ['b1']) #.And(bHigh.eq(0));
def history_diff_core(high_res_modis, date, dev_thresh, change_thresh, bounds): '''Leverage historical data and the permanent water mask to improve the threshold method. This method computes statistics from the permanent water mask to set a good b2-b1 water detection threshold. It also adds pixels which have a b2-b1 level significantly lower than the historical seasonal average. ''' # Retrieve all the MODIS images for the region in the last several years NUM_YEARS_BACK = 5 NUM_DAYS_COMPARE_RANGE = 40.0 # Compare this many days before/after the target day in previous years YEAR_RANGE_PERCENTAGE = NUM_DAYS_COMPARE_RANGE / 365.0 #print 'YEAR_RANGE_PERCENTAGE = ' + str(YEAR_RANGE_PERCENTAGE) #print 'Start: ' + str(domain.date.advance(-1 - YEAR_RANGE_PERCENTAGE, 'year')) #print 'End: ' + str(domain.date.advance(-1 + YEAR_RANGE_PERCENTAGE, 'year')) # Get both the high and low res MODIS data historyHigh = ee.ImageCollection('MOD09GQ').filterDate(date.advance(-1 - YEAR_RANGE_PERCENTAGE, 'year'), date.advance(-1 + YEAR_RANGE_PERCENTAGE, 'year')).filterBounds(bounds); historyLow = ee.ImageCollection('MOD09GA').filterDate(date.advance(-1 - YEAR_RANGE_PERCENTAGE, 'year'), date.advance(-1 + YEAR_RANGE_PERCENTAGE, 'year')).filterBounds(bounds); for i in range(1, NUM_YEARS_BACK-1): yearMin = -(i+1) - YEAR_RANGE_PERCENTAGE yearMax = -(i+1) + YEAR_RANGE_PERCENTAGE historyHigh.merge(ee.ImageCollection('MOD09GQ').filterDate(date.advance(yearMin, 'year'), date.advance(yearMax, 'year')).filterBounds(bounds)); historyLow.merge( ee.ImageCollection('MOD09GA').filterDate(date.advance(yearMin, 'year'), date.advance(yearMax, 'year')).filterBounds(bounds)); # TODO: Add a filter here to remove cloud-filled images # Simple function implements the b2 - b1 difference method # - This needs to work with domains and with the input from production_gui def flood_diff_function(image): try: return image.select(['sur_refl_b02']).subtract(image.select(['sur_refl_b01'])) except: return image.sur_refl_b02.subtract(image.sur_refl_b01) # Apply difference function to all images in history, then compute mean and standard deviation of difference scores. historyDiff = historyHigh.map(flood_diff_function) historyMean = historyDiff.mean() historyStdDev = historyDiff.reduce(ee.Reducer.stdDev()) # Display the mean image for bands 1/2/6 history3 = historyHigh.mean().select(['sur_refl_b01', 'sur_refl_b02']).addBands(historyLow.mean().select(['sur_refl_b06'])) #addToMap(history3, {'bands': ['sur_refl_b01', 'sur_refl_b02', 'sur_refl_b06'], 'min' : 0, 'max': 3000, 'opacity' : 1.0}, 'MODIS_HIST', False) #addToMap(historyMean, {'min' : 0, 'max' : 4000}, 'History mean', False) #addToMap(historyStdDev, {'min' : 0, 'max' : 2000}, 'History stdDev', False) # Compute flood diff on current image and compare to historical mean/STD. floodDiff = flood_diff_function(high_res_modis) diffOfDiffs = floodDiff.subtract(historyMean) ddDivDev = diffOfDiffs.divide(historyStdDev) changeFlood = ddDivDev.lt(change_thresh) # Mark all pixels which are enough STD's away from the mean. #addToMap(floodDiff, {'min' : 0, 'max' : 4000}, 'floodDiff', False) #addToMap(diffOfDiffs, {'min' : -2000, 'max' : 2000}, 'diffOfDiffs', False) #addToMap(ddDivDev, {'min' : -10, 'max' : 10}, 'ddDivDev', False) #addToMap(changeFlood, {'min' : 0, 'max' : 1}, 'changeFlood', False) #addToMap(domain.water_mask, {'min' : 0, 'max' : 1}, 'Permanent water mask', False) # Compute the difference statistics inside permanent water mask pixels MODIS_RESOLUTION = 250 # Meters water_mask = ee.Image("MODIS/MOD44W/MOD44W_005_2000_02_24").select(['water_mask']) diffInWaterMask = floodDiff.multiply(water_mask) maskedMean = diffInWaterMask.reduceRegion(ee.Reducer.mean(), bounds, MODIS_RESOLUTION) maskedStdDev = diffInWaterMask.reduceRegion(ee.Reducer.stdDev(), bounds, MODIS_RESOLUTION) # Use the water mask statistics to compute a difference threshold, then find all pixels below the threshold. waterThreshold = safe_get_info(maskedMean)['sur_refl_b02'] + dev_thresh*(safe_get_info(maskedStdDev)['sur_refl_b02']); #print 'Water threshold == ' + str(waterThreshold) waterPixels = flood_diff_function(high_res_modis).lte(waterThreshold) #waterPixels = modis_diff(domain, b, waterThreshold) #addToMap(waterPixels, {'min' : 0, 'max' : 1}, 'waterPixels', False) # ## Is it worth it to use band 6? #B6_DIFF_AMT = 500 #bHigh1 = (b['b6'].subtract(B6_DIFF_AMT).gt(b['b1'])) #bHigh2 = (b['b6'].subtract(B6_DIFF_AMT).gt(b['b2'])) #bHigh = bHigh1.And(bHigh2).reproject('EPSG:4326', scale=MODIS_RESOLUTION) #addToMap(bHigh.mask(bHigh), {'min' : 0, 'max' : 1}, 'B High', False) # Combine water pixels from the historical and water mask methods. return (waterPixels.Or(changeFlood)).select(['sur_refl_b02'], ['b1'])#.And(bHigh.eq(0));
def dnns(domain, b, use_modis_diff=False): '''Dynamic Nearest Neighbor Search adapted from the paper: "Li, Sun, Yu, et. al. "A new short-wave infrared (SWIR) method for quantitative water fraction derivation and evaluation with EOS/MODIS and Landsat/TM data." IEEE Transactions on Geoscience and Remote Sensing, 2013." The core idea of this algorithm is to compute local estimates of a "pure water" and "pure land" pixel and compute each pixel's water percentage as a mixed composition of those two pure spectral types. ''' # This algorithm has some differences from the original paper implementation. # The most signficant of these is that it does not make use of land/water/partial # preclassifications like the original paper does. The search range is also # much smaller in order to make the algorithm run faster in Earth Engine. # - Running this with a tiny kernel (effectively treating the entire region # as part of the kernel) might get the best results! # Parameters KERNEL_SIZE = 40 # The original paper used a 100x100 pixel box = 25,000 meters! AVERAGE_SCALE_METERS = 250 # This scale is used to compute averages over the entire region # Set up two square kernels of the same size # - These kernels define the search range for nearby pure water and land pixels kernel = ee.Kernel.square(KERNEL_SIZE, 'pixels', False) kernel_normalized = ee.Kernel.square(KERNEL_SIZE, 'pixels', True) # Compute b1/b6 and b2/b6 composite_image = b['b1'].addBands(b['b2']).addBands(b['b6']) # Use CART classifier to divide pixels up into water, land, and mixed. # - Mixed pixels are just low probability water/land pixels. if use_modis_diff: unflooded_b = modis_utilities.compute_modis_indices(domain.unflooded_domain) water_mask = get_permanent_water_mask() thresholds = modis_utilities.compute_binary_threshold(simple_modis_algorithms.get_diff(unflooded_b), water_mask, domain.bounds, True) pureWater = simple_modis_algorithms.modis_diff(domain, b, thresholds[0]) pureLand = simple_modis_algorithms.modis_diff(domain, b, thresholds[1]).Not() mixed = pureWater.Or(pureLand).Not() else: classes = ee_classifiers.earth_engine_classifier(domain, b, 'Pegasos', {'classifier_mode' : 'probability'}) pureWater = classes.gte(0.95) pureLand = classes.lte(0.05) #addToMap(classes, {'min': -1, 'max': 1}, 'CLASSES') #raise Exception('DEBUG') mixed = pureWater.Not().And(pureLand.Not()) averageWater = safe_get_info(pureWater.mask(pureWater).multiply(composite_image).reduceRegion(ee.Reducer.mean(), domain.bounds, AVERAGE_SCALE_METERS)) averageWaterImage = ee.Image([averageWater['sur_refl_b01'], averageWater['sur_refl_b02'], averageWater['sur_refl_b06']]) # For each pixel, compute the number of nearby pure water pixels pureWaterCount = pureWater.convolve(kernel) # Get mean of nearby pure water (b1,b2,b6) values for each pixel with enough pure water nearby MIN_PUREWATER_NEARBY = 1 pureWaterRef = pureWater.multiply(composite_image).convolve(kernel).multiply(pureWaterCount.gte(MIN_PUREWATER_NEARBY)).divide(pureWaterCount) # For pixels that did not have enough pure water nearby, just use the global average water value pureWaterRef = pureWaterRef.add(averageWaterImage.multiply(pureWaterRef.Not())) # Compute a backup, global pure land value to use when pixels have none nearby. averagePureLand = safe_get_info(pureLand.mask(pureLand).multiply(composite_image).reduceRegion(ee.Reducer.mean(), domain.bounds, AVERAGE_SCALE_METERS)) #averagePureLand = composite_image.mask(pureLand).reduceRegion(ee.Reducer.mean(), domain.bounds, AVERAGE_SCALE_METERS) averagePureLandImage = ee.Image([averagePureLand['sur_refl_b01'], averagePureLand['sur_refl_b02'], averagePureLand['sur_refl_b06']]) # Implement equations 10 and 11 from the paper --> It takes many lines of code to compute the local land pixels! oneOverSix = b['b1'].divide(b['b6']) twoOverSix = b['b2'].divide(b['b6']) eqTenLeft = oneOverSix.subtract( pureWaterRef.select('sur_refl_b01').divide(b['b6']) ) eqElevenLeft = twoOverSix.subtract( pureWaterRef.select('sur_refl_b02').divide(b['b6']) ) # For each pixel, grab all the ratios from nearby pixels nearbyPixelsOneOverSix = oneOverSix.neighborhoodToBands(kernel) # Each of these images has one band per nearby pixel nearbyPixelsTwoOverSix = twoOverSix.neighborhoodToBands(kernel) nearbyPixelsOne = b['b1'].neighborhoodToBands(kernel) nearbyPixelsTwo = b['b2'].neighborhoodToBands(kernel) nearbyPixelsSix = b['b6'].neighborhoodToBands(kernel) # Find which nearby pixels meet the EQ 10 and 11 criteria eqTenMatches = ( nearbyPixelsOneOverSix.gt(eqTenLeft ) ).And( nearbyPixelsOneOverSix.lt(oneOverSix) ) eqElevenMatches = ( nearbyPixelsTwoOverSix.gt(eqElevenLeft) ).And( nearbyPixelsTwoOverSix.lt(twoOverSix) ) nearbyLandPixels = eqTenMatches.And(eqElevenMatches) # Find the average of the nearby matching pixels numNearbyLandPixels = nearbyLandPixels.reduce(ee.Reducer.sum()) meanNearbyBandOne = nearbyPixelsOne.multiply(nearbyLandPixels).reduce(ee.Reducer.sum()).divide(numNearbyLandPixels) meanNearbyBandTwo = nearbyPixelsTwo.multiply(nearbyLandPixels).reduce(ee.Reducer.sum()).divide(numNearbyLandPixels) meanNearbyBandSix = nearbyPixelsSix.multiply(nearbyLandPixels).reduce(ee.Reducer.sum()).divide(numNearbyLandPixels) # Pack the results into a three channel image for the whole region # - Use the global pure land calculation to fill in if there are no nearby equation matching pixels MIN_PURE_NEARBY = 1 meanPureLand = meanNearbyBandOne.addBands(meanNearbyBandTwo).addBands(meanNearbyBandSix) meanPureLand = meanPureLand.multiply(numNearbyLandPixels.gte(MIN_PURE_NEARBY)).add( averagePureLandImage.multiply(numNearbyLandPixels.lt(MIN_PURE_NEARBY)) ) # Compute the water fraction: (land[b6] - b6) / (land[b6] - water[b6]) # - Ultimately, relying solely on band 6 for the final classification may not be a good idea! meanPureLandSix = meanPureLand.select('sum_2') water_fraction = (meanPureLandSix.subtract(b['b6'])).divide(meanPureLandSix.subtract(pureWaterRef.select('sur_refl_b06'))).clamp(0, 1) # Set pure water to 1, pure land to 0 water_fraction = water_fraction.add(pureWater).subtract(pureLand).clamp(0, 1) #addToMap(fraction, {'min': 0, 'max': 1}, 'fraction', False) #addToMap(pureWater, {'min': 0, 'max': 1}, 'pure water', False) #addToMap(pureLand, {'min': 0, 'max': 1}, 'pure land', False) #addToMap(mixed, {'min': 0, 'max': 1}, 'mixed', False) #addToMap(pureWaterCount, {'min': 0, 'max': 300}, 'pure water count', False) #addToMap(water_fraction, {'min': 0, 'max': 5}, 'water_fractionDNNS', False) #addToMap(pureWaterRef, {'min': 0, 'max': 3000, 'bands': ['sur_refl_b01', 'sur_refl_b02', 'sur_refl_b06']}, 'pureWaterRef', False) return water_fraction.select(['sum_2'], ['b1']) # Rename sum_2 to b1
def adaboost_learn(ignored=None, ignored2=None): '''Train Adaboost classifier''' EVAL_RESOLUTION = 250 # Learn this many weak classifiers NUM_CLASSIFIERS_TO_TRAIN = 50 # Load inputs for this domain and preprocess # - Kashmore does not have a good unflooded comparison location so it is left out of the training. #all_problems = ['kashmore_2010_8.xml', 'mississippi_2011_5.xml', 'mississippi_2011_6.xml', 'new_orleans_2005_9.xml', 'sf_bay_area_2011_4.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] #training_domains = [domain.unflooded_domain for domain in all_domains[:-1]] + [all_domains[-1]] # SF is unflooded # This is a cleaned up set where all the permanent water masks are known to be decent. all_problems = ['unflooded_mississippi_2010.xml', 'unflooded_new_orleans_2004.xml', 'sf_bay_area_2011_4.xml', 'unflooded_bosnia_2013.xml'] #all_problems.extend(['arkansas_city_2011_5.xml', 'baghlan_south_2014_6.xml', # 'bosnia_west_2014_5.xml', 'kashmore_north_2010_8.xml', 'slidell_2005_9.xml']) #all_problems = ['unflooded_mississippi_2010_5.xml'] all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # Try testing on radar domains #all_problems = ['rome.xml',] # #'malawi_2015_1.xml',] # #'mississippi.xml'] #all_domains = [Domain('config/domains/sentinel1/rome.xml'),] # #Domain('config/domains/sentinel1/malawi_2015_1.xml'), # #Domain('config/domains/uavsar/mississippi.xml')] #training_domains = [domain.training_domain for domain in all_domains] ## Try testing on Skybox images #all_problems = [#'gloucester_2014_10.xml',] # TODO: Need dense training data for the other images!!! # #'new_bedford_2014_10.xml', # #'sumatra_2014_10.xml',] # 'malawi_2015.xml',] #all_domains = [Domain('config/domains/skybox/' + d) for d in all_problems] #training_domains = [domain.training_domain for domain in all_domains] ## Add a bunch of lakes to the training data #lake_problems = ['Amistad_Reservoir/Amistad_Reservoir_2014-07-01_train.xml', # 'Cascade_Reservoir/Cascade_Reservoir_2014-09-01_train.xml', # 'Edmund/Edmund_2014-07-01_train.xml', # 'Hulun/Hulun_2014-07-01_train.xml', # 'Keeley/Keeley_2014-06-01_train.xml', # 'Lake_Mead/Lake_Mead_2014-09-01_train.xml', # 'Miguel_Aleman/Miguel_Aleman_2014-08-01_train.xml', # 'Oneida_Lake/Oneida_Lake_2014-06-01_train.xml', # 'Quesnel/Quesnel_2014-08-01_train.xml', # 'Shuswap/Shuswap_2014-08-01_train.xml', # 'Trikhonis/Trikhonis_2014-07-01_train.xml', # 'Pickwick_Lake/Pickwick_Lake_2014-07-01_train.xml', # 'Rogoaguado/Rogoaguado_2014-08-01_train.xml', # 'Zapatosa/Zapatosa_2014-09-01_train.xml'] #lake_domains = [Domain('/home/smcmich1/data/Floods/lakeStudy/' + d) for d in lake_problems] #all_problems += lake_problems #all_domains += lake_domains #all_problems = ['unflooded_mississippi_2010.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] #all_problems = ['sf_bay_area_2011_4.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # #all_problems = ['unflooded_bosnia_2013.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # #all_problems = ['unflooded_new_orleans_2004.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] training_domains = all_domains water_masks = [modis_utilities.get_permanent_water_mask() for d in training_domains] for i in range(len(all_domains)): if all_domains[i].ground_truth != None: water_masks[i] = all_domains[i].ground_truth #water_masks = [d.ground_truth for d in training_domains] # Manual mask training_images = [_create_adaboost_learning_image(d, modis_utilities.compute_modis_indices(d)) for d in training_domains] # add pixels in flood permanent water masks to training #training_domains.extend(all_domains) #water_masks.extend([get_permanent_water_mask() for d in all_domains]) #training_images.append([_create_adaboost_learning_image(domain, compute_modis_indices(domain)).mask(get_permanent_water_mask()) for domain in all_domains]) transformed_masks = [water_mask.multiply(2).subtract(1) for water_mask in water_masks] bands = safe_get_info(training_images[0].bandNames()) print('Computing threshold ranges.') band_splits = __compute_threshold_ranges(training_domains, training_images, water_masks, bands) counts = [safe_get_info(training_images[i].select('b1').reduceRegion(ee.Reducer.count(), training_domains[i].bounds, 250))['b1'] for i in range(len(training_images))] count = sum(counts) weights = [ee.Image(1.0 / count) for i in training_images] # Each input pixel in the training images has an equal weight # Initialize for pre-existing partially trained classifier full_classifier = [] for (c, t, alpha) in full_classifier: band_splits[c].append(t) band_splits[c] = sorted(band_splits[c]) total = 0 for i in range(len(training_images)): weights[i] = weights[i].multiply(apply_classifier(training_images[i], c, t).multiply(transformed_masks[i]).multiply(-alpha).exp()) total += safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images)): weights[i] = weights[i].divide(total) ## Apply weak classifiers to the input test image #test_image = _create_adaboost_learning_image(domain, b) while len(full_classifier) < NUM_CLASSIFIERS_TO_TRAIN: best = None for band_name in bands: # For each weak classifier # Find the best threshold that we can choose (threshold, ind, error) = _find_adaboost_optimal_threshold(training_domains, training_images, water_masks, band_name, weights, band_splits[band_name]) # Compute the sum of weighted classification errors across all of the training domains using this threshold #errors = [safe_get_info(weights[i].multiply(training_images[i].select(band_name).lte(threshold).neq(water_masks[i])).reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))] #error = sum(errors) print('%s found threshold %g with error %g' % (band_name, threshold, error)) # Record the band/threshold combination with the highest abs(error) if (best == None) or (abs(0.5 - error) > abs(0.5 - best[0])): # Classifiers that are always wrong are also good with negative alpha best = (error, band_name, threshold, ind) # add an additional split point to search between for thresholds band_splits[best[1]].insert(best[3], best[2]) print('---> Using %s < %g. Error %g.' % (best[1], best[2], best[0])) alpha = 0.5 * math.log((1 - best[0]) / best[0]) classifier = (best[1], best[2], alpha) full_classifier.append(classifier) print('---> Now have %d out of %d classifiers.' % (len(full_classifier), NUM_CLASSIFIERS_TO_TRAIN)) # update the weights weights = [weights[i].multiply(apply_classifier(training_images[i], classifier[0], classifier[1]).multiply(transformed_masks[i]).multiply(-alpha).exp()) for i in range(len(training_images))] totals = [safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))] total = sum(totals) weights = [w.divide(total) for w in weights] print(full_classifier)
def adaboost_learn(ignored=None, ignored2=None): '''Train Adaboost classifier''' EVAL_RESOLUTION = 250 # Learn this many weak classifiers NUM_CLASSIFIERS_TO_TRAIN = 50 # Load inputs for this domain and preprocess # - Kashmore does not have a good unflooded comparison location so it is left out of the training. #all_problems = ['kashmore_2010_8.xml', 'mississippi_2011_5.xml', 'mississippi_2011_6.xml', 'new_orleans_2005_9.xml', 'sf_bay_area_2011_4.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] #training_domains = [domain.unflooded_domain for domain in all_domains[:-1]] + [all_domains[-1]] # SF is unflooded # This is a cleaned up set where all the permanent water masks are known to be decent. all_problems = ['unflooded_mississippi_2010.xml', 'unflooded_new_orleans_2004.xml', 'sf_bay_area_2011_4.xml', 'unflooded_bosnia_2013.xml'] #all_problems.extend(['arkansas_city_2011_5.xml', 'baghlan_south_2014_6.xml', # 'bosnia_west_2014_5.xml', 'kashmore_north_2010_8.xml', 'slidell_2005_9.xml']) #all_problems = ['unflooded_mississippi_2010_5.xml'] all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # Try testing on radar domains #all_problems = ['rome.xml',] # #'malawi_2015_1.xml',] # #'mississippi.xml'] #all_domains = [Domain('config/domains/sentinel1/rome.xml'),] # #Domain('config/domains/sentinel1/malawi_2015_1.xml'), # #Domain('config/domains/uavsar/mississippi.xml')] #training_domains = [domain.training_domain for domain in all_domains] ## Try testing on Skybox images #all_problems = [#'gloucester_2014_10.xml',] # TODO: Need dense training data for the other images!!! # #'new_bedford_2014_10.xml', # #'sumatra_2014_10.xml',] # 'malawi_2015.xml',] #all_domains = [Domain('config/domains/skybox/' + d) for d in all_problems] #training_domains = [domain.training_domain for domain in all_domains] ## Add a bunch of lakes to the training data #lake_problems = ['Amistad_Reservoir/Amistad_Reservoir_2014-07-01_train.xml', # 'Cascade_Reservoir/Cascade_Reservoir_2014-09-01_train.xml', # 'Edmund/Edmund_2014-07-01_train.xml', # 'Hulun/Hulun_2014-07-01_train.xml', # 'Keeley/Keeley_2014-06-01_train.xml', # 'Lake_Mead/Lake_Mead_2014-09-01_train.xml', # 'Miguel_Aleman/Miguel_Aleman_2014-08-01_train.xml', # 'Oneida_Lake/Oneida_Lake_2014-06-01_train.xml', # 'Quesnel/Quesnel_2014-08-01_train.xml', # 'Shuswap/Shuswap_2014-08-01_train.xml', # 'Trikhonis/Trikhonis_2014-07-01_train.xml', # 'Pickwick_Lake/Pickwick_Lake_2014-07-01_train.xml', # 'Rogoaguado/Rogoaguado_2014-08-01_train.xml', # 'Zapatosa/Zapatosa_2014-09-01_train.xml'] #lake_domains = [Domain('/home/smcmich1/data/Floods/lakeStudy/' + d) for d in lake_problems] #all_problems += lake_problems #all_domains += lake_domains #all_problems = ['unflooded_mississippi_2010.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] #all_problems = ['sf_bay_area_2011_4.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # #all_problems = ['unflooded_bosnia_2013.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # #all_problems = ['unflooded_new_orleans_2004.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] training_domains = all_domains water_masks = [modis_utilities.get_permanent_water_mask() for d in training_domains] for i in range(len(all_domains)): if all_domains[i].ground_truth != None: water_masks[i] = all_domains[i].ground_truth #water_masks = [d.ground_truth for d in training_domains] # Manual mask training_images = [_create_adaboost_learning_image(d, modis_utilities.compute_modis_indices(d)) for d in training_domains] # add pixels in flood permanent water masks to training #training_domains.extend(all_domains) #water_masks.extend([get_permanent_water_mask() for d in all_domains]) #training_images.append([_create_adaboost_learning_image(domain, compute_modis_indices(domain)).mask(get_permanent_water_mask()) for domain in all_domains]) transformed_masks = [water_mask.multiply(2).subtract(1) for water_mask in water_masks] bands = safe_get_info(training_images[0].bandNames()) print 'Computing threshold ranges.' band_splits = __compute_threshold_ranges(training_domains, training_images, water_masks, bands) counts = [safe_get_info(training_images[i].select('b1').reduceRegion(ee.Reducer.count(), training_domains[i].bounds, 250))['b1'] for i in range(len(training_images))] count = sum(counts) weights = [ee.Image(1.0 / count) for i in training_images] # Each input pixel in the training images has an equal weight # Initialize for pre-existing partially trained classifier full_classifier = [] for (c, t, alpha) in full_classifier: band_splits[c].append(t) band_splits[c] = sorted(band_splits[c]) total = 0 for i in range(len(training_images)): weights[i] = weights[i].multiply(apply_classifier(training_images[i], c, t).multiply(transformed_masks[i]).multiply(-alpha).exp()) total += safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images)): weights[i] = weights[i].divide(total) ## Apply weak classifiers to the input test image #test_image = _create_adaboost_learning_image(domain, b) while len(full_classifier) < NUM_CLASSIFIERS_TO_TRAIN: best = None for band_name in bands: # For each weak classifier # Find the best threshold that we can choose (threshold, ind, error) = _find_adaboost_optimal_threshold(training_domains, training_images, water_masks, band_name, weights, band_splits[band_name]) # Compute the sum of weighted classification errors across all of the training domains using this threshold #errors = [safe_get_info(weights[i].multiply(training_images[i].select(band_name).lte(threshold).neq(water_masks[i])).reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))] #error = sum(errors) print '%s found threshold %g with error %g' % (band_name, threshold, error) # Record the band/threshold combination with the highest abs(error) if (best == None) or (abs(0.5 - error) > abs(0.5 - best[0])): # Classifiers that are always wrong are also good with negative alpha best = (error, band_name, threshold, ind) # add an additional split point to search between for thresholds band_splits[best[1]].insert(best[3], best[2]) print '---> Using %s < %g. Error %g.' % (best[1], best[2], best[0]) alpha = 0.5 * math.log((1 - best[0]) / best[0]) classifier = (best[1], best[2], alpha) full_classifier.append(classifier) print '---> Now have %d out of %d classifiers.' % (len(full_classifier), NUM_CLASSIFIERS_TO_TRAIN) # update the weights weights = [weights[i].multiply(apply_classifier(training_images[i], classifier[0], classifier[1]).multiply(transformed_masks[i]).multiply(-alpha).exp()) for i in range(len(training_images))] totals = [safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))] total = sum(totals) weights = [w.divide(total) for w in weights] print full_classifier
def _create_adaboost_learning_image(domain, b): '''Like _create_learning_image but using a lot of simple classifiers to feed into Adaboost''' # A large set of MODIS band configurations, each is assigned a unique band name for reference. a = b['b1'].select(['sur_refl_b01'], ['b1' ]) a = a.addBands(b['b2'].select(['sur_refl_b02'], ['b2' ])) #a = a.addBands(b['b3'].select(['sur_refl_b03'], ['b3' ])) #a = a.addBands(b['b4'].select(['sur_refl_b04'], ['b4' ])) #a = a.addBands(b['b5'].select(['sur_refl_b05'], ['b5' ])) #a = a.addBands(b['b6'].select(['sur_refl_b06'], ['b6' ])) a = a.addBands(b['b2'].divide(b['b1']).select(['sur_refl_b02'], ['ratio' ])) a = a.addBands(b['LSWI'].subtract(b['NDVI']).subtract(0.05).select(['sur_refl_b02'], ['LSWIminusNDVI'])) a = a.addBands(b['LSWI'].subtract(b['EVI']).subtract(0.05).select(['sur_refl_b02'], ['LSWIminusEVI' ])) a = a.addBands(b['EVI'].subtract(0.3).select(['sur_refl_b02'], ['EVI' ])) a = a.addBands(b['LSWI'].select(['sur_refl_b02'], ['LSWI' ])) a = a.addBands(b['NDVI'].select(['sur_refl_b02'], ['NDVI' ])) a = a.addBands(b['NDWI'].select(['sur_refl_b01'], ['NDWI' ])) a = a.addBands(get_diff(b).select(['b1'], ['diff' ])) a = a.addBands(get_fai(b).select(['b1'], ['fai' ])) a = a.addBands(get_dartmouth(b).select(['b1'], ['dartmouth' ])) a = a.addBands(get_mod_ndwi(b).select(['b1'], ['MNDWI' ])) # If available, try adding Landsat data try: landsat_sensor = domain.get_landsat() added = ['blue', 'green', 'red', 'nir', 'swir1', 'temp', 'swir2'] a = a.addBands(landsat_sensor.image.select(added)) print('Added Landsat to Adaboost!') except: # No Landsat data is present pass # If available, try adding radar data try: # Add all of the bands from the radar sensor # - All of the input training images need to have the same bands available! radar_sensor = domain.get_radar() a = a.addBands(radar_sensor.image) print('Added Radar to Adaboost!') except: # No radar data is present pass # If available, try adding Skybox data try: try: # The Skybox data can be in one of two names skybox_sensor = domain.skybox except: skybox_sensor = domain.skybox_nir # Add all Skybox bands a = a.addBands(skybox_sensor.image) # Add an additional texture band rgbBands = skybox_sensor.Red.addBands(skybox_sensor.Green).addBands(skybox_sensor.Blue) grayBand = rgbBands.select('Red').add(rgbBands.select('Green')).add(rgbBands.select('Blue')).divide(ee.Image(3.0)).uint16() textureRaw = grayBand.glcmTexture() bandList = safe_get_info(textureRaw)['bands'] bandName = [x['id'] for x in bandList if 'idm' in x['id']] texture = textureRaw.select(bandName).convolve(ee.Kernel.square(5, 'pixels')) a = a.addBands(texture) except: # No Skybox data is present pass return a
def dnns_revised(domain, b): '''Dynamic Nearest Neighbor Search with revisions to improve performance on our test data''' # One issue with this algorithm is that its large search range slows down even Earth Engine! # - With a tiny kernel size, everything is relative to the region average which seems to work pretty well. # Another problem is that we don't have a good way of identifying 'Definately land' pixels like we do for water. # Parameters KERNEL_SIZE = 1 # The original paper used a 100x100 pixel box = 25,000 meters! PURELAND_THRESHOLD = 3500 # TODO: Vary by domain? PURE_WATER_THRESHOLD_RATIO = 0.1 # Set up two square kernels of the same size # - These kernels define the search range for nearby pure water and land pixels kernel = ee.Kernel.square(KERNEL_SIZE, 'pixels', False) kernel_normalized = ee.Kernel.square(KERNEL_SIZE, 'pixels', True) composite_image = b['b1'].addBands(b['b2']).addBands(b['b6']) # Compute (b2 - b1) < threshold, a simple water detection algorithm. Treat the result as "pure water" pixels. pureWaterThreshold = float(domain.algorithm_params['modis_diff_threshold']) * PURE_WATER_THRESHOLD_RATIO pureWater = modis_diff(domain, b, pureWaterThreshold) # Compute the mean value of pure water pixels across the entire region, then store in a constant value image. AVERAGE_SCALE_METERS = 30 # This value seems to have no effect on the results averageWater = safe_get_info(pureWater.mask(pureWater).multiply(composite_image).reduceRegion(ee.Reducer.mean(), domain.bounds, AVERAGE_SCALE_METERS)) averageWaterImage = ee.Image([averageWater['sur_refl_b01'], averageWater['sur_refl_b02'], averageWater['sur_refl_b06']]) # For each pixel, compute the number of nearby pure water pixels pureWaterCount = pureWater.convolve(kernel) # Get mean of nearby pure water (b1,b2,b6) values for each pixel with enough pure water nearby MIN_PURE_NEARBY = 10 averageWaterLocal = pureWater.multiply(composite_image).convolve(kernel).multiply(pureWaterCount.gte(MIN_PURE_NEARBY)).divide(pureWaterCount) # For pixels that did not have enough pure water nearby, just use the global average water value averageWaterLocal = averageWaterLocal.add(averageWaterImage.multiply(averageWaterLocal.Not())) # Use simple diff method to select pure land pixels #LAND_THRESHOLD = 2000 # TODO: Move to domain selector pureLand = b['b2'].subtract(b['b1']).gte(PURELAND_THRESHOLD).select(['sur_refl_b02'], ['b1']) # Rename sur_refl_b02 to b1 averagePureLand = safe_get_info(pureLand.mask(pureLand).multiply(composite_image).reduceRegion(ee.Reducer.mean(), domain.bounds, AVERAGE_SCALE_METERS)) averagePureLandImage = ee.Image([averagePureLand['sur_refl_b01'], averagePureLand['sur_refl_b02'], averagePureLand['sur_refl_b06']]) pureLandCount = pureLand.convolve(kernel) # Get nearby pure land count for each pixel averagePureLandLocal = pureLand.multiply(composite_image).convolve(kernel).multiply(pureLandCount.gte(MIN_PURE_NEARBY)).divide(pureLandCount) averagePureLandLocal = averagePureLandLocal.add(averagePureLandImage.multiply(averagePureLandLocal.Not())) # For pixels that did not have any pure land nearby, use mean # Implement equations 10 and 11 from the paper oneOverSix = b['b1'].divide(b['b6']) twoOverSix = b['b2'].divide(b['b6']) eqTenLeft = oneOverSix.subtract( averageWaterLocal.select('sur_refl_b01').divide(b['b6']) ) eqElevenLeft = twoOverSix.subtract( averageWaterLocal.select('sur_refl_b02').divide(b['b6']) ) # For each pixel, grab all the ratios from nearby pixels nearbyPixelsOneOverSix = oneOverSix.neighborhoodToBands(kernel) # Each of these images has one band per nearby pixel nearbyPixelsTwoOverSix = twoOverSix.neighborhoodToBands(kernel) nearbyPixelsOne = b['b1'].neighborhoodToBands(kernel) nearbyPixelsTwo = b['b2'].neighborhoodToBands(kernel) nearbyPixelsSix = b['b6'].neighborhoodToBands(kernel) # Find which nearby pixels meet the EQ 10 and 11 criteria eqTenMatches = ( nearbyPixelsOneOverSix.gt(eqTenLeft ) ).And( nearbyPixelsOneOverSix.lt(oneOverSix) ) eqElevenMatches = ( nearbyPixelsTwoOverSix.gt(eqElevenLeft) ).And( nearbyPixelsTwoOverSix.lt(twoOverSix) ) nearbyLandPixels = eqTenMatches.And(eqElevenMatches) # Find the average of the nearby matching pixels numNearbyLandPixels = nearbyLandPixels.reduce(ee.Reducer.sum()) meanNearbyBandOne = nearbyPixelsOne.multiply(nearbyLandPixels).reduce(ee.Reducer.sum()).divide(numNearbyLandPixels) meanNearbyBandTwo = nearbyPixelsTwo.multiply(nearbyLandPixels).reduce(ee.Reducer.sum()).divide(numNearbyLandPixels) meanNearbyBandSix = nearbyPixelsSix.multiply(nearbyLandPixels).reduce(ee.Reducer.sum()).divide(numNearbyLandPixels) # Pack the results into a three channel image for the whole region meanNearbyLand = meanNearbyBandOne.addBands(meanNearbyBandTwo).addBands(meanNearbyBandSix) meanNearbyLand = meanNearbyLand.multiply(numNearbyLandPixels.gte(MIN_PURE_NEARBY)).add( averagePureLandImage.multiply(numNearbyLandPixels.lt(MIN_PURE_NEARBY)) ) addToMap(numNearbyLandPixels, {'min': 0, 'max': 400, }, 'numNearbyLandPixels', False) addToMap(meanNearbyLand, {'min': 0, 'max': 3000, 'bands': ['sum', 'sum_1', 'sum_2']}, 'meanNearbyLand', False) # Compute the water fraction: (land - b) / (land - water) landDiff = averagePureLandLocal.subtract(composite_image) waterDiff = averageWaterLocal.subtract(composite_image) typeDiff = averagePureLandLocal.subtract(averageWaterLocal) #water_vector = (averageLandLocal.subtract(b)).divide(averageLandLocal.subtract(averageWaterLocal)) landDist = landDiff.expression("b('sur_refl_b01')*b('sur_refl_b01') + b('sur_refl_b02') *b('sur_refl_b02') + b('sur_refl_b06')*b('sur_refl_b06')").sqrt(); waterDist = waterDiff.expression("b('sur_refl_b01')*b('sur_refl_b01') + b('sur_refl_b02') *b('sur_refl_b02') + b('sur_refl_b06')*b('sur_refl_b06')").sqrt(); typeDist = typeDiff.expression("b('sur_refl_b01')*b('sur_refl_b01') + b('sur_refl_b02') *b('sur_refl_b02') + b('sur_refl_b06')*b('sur_refl_b06')").sqrt(); #waterOff = landDist.divide(waterDist.add(landDist)) waterOff = landDist.divide(typeDist) # TODO: Improve this math, maybe full matrix treatment? # Set pure water to 1, pure land to 0 waterOff = waterOff.subtract(pureLand.multiply(waterOff)) waterOff = waterOff.add(pureWater.multiply(ee.Image(1.0).subtract(waterOff))) # TODO: Better way of filtering out low fraction pixels. waterOff = waterOff.multiply(waterOff) waterOff = waterOff.gt(0.6) #addToMap(fraction, {'min': 0, 'max': 1}, 'fraction', False) addToMap(pureWater, {'min': 0, 'max': 1}, 'pure water', False) addToMap(pureLand, {'min': 0, 'max': 1}, 'pure land', False) addToMap(pureWaterCount, {'min': 0, 'max': 100}, 'pure water count', False) addToMap(pureLandCount, {'min': 0, 'max': 100}, 'pure land count', False) #addToMap(numNearbyLandPixels, {'min': 0, 'max': 400, }, 'numNearbyLandPixels', False) #addToMap(meanNearbyLand, {'min': 0, 'max': 3000, 'bands': ['sum', 'sum_1', 'sum_2']}, 'meanNearbyLand', False) addToMap(averageWaterImage, {'min': 0, 'max': 3000, 'bands': ['constant', 'constant_1', 'constant_2']}, 'average water', False) addToMap(averagePureLandImage, {'min': 0, 'max': 3000, 'bands': ['constant', 'constant_1', 'constant_2']}, 'average pure land', False) addToMap(averageWaterLocal, {'min': 0, 'max': 3000, 'bands': ['sur_refl_b01', 'sur_refl_b02', 'sur_refl_b06']}, 'local water ref', False) addToMap(averagePureLandLocal, {'min': 0, 'max': 3000, 'bands': ['sur_refl_b01', 'sur_refl_b02', 'sur_refl_b06']}, 'local pure land ref', False) return waterOff.select(['sur_refl_b01'], ['b1']) # Rename sur_refl_b02 to b1