Example #1
0
def adaboost_radar(domain):
    '''Run Adaboost classifier trained for a radar data set'''

    classifier = learned_adaboost.radar_classifiers['malawi']
    
    b = modis_utilities.compute_modis_indices(domain)
    total = get_adaboost_sum(domain, b, classifier)
    return total.gte(-1.0) # Just threshold the results at zero (equal chance of flood / not flood)
Example #2
0
def adaboost_radar(domain):
    '''Run Adaboost classifier trained for a radar data set'''

    classifier = learned_adaboost.radar_classifiers['malawi']
    
    b = modis_utilities.compute_modis_indices(domain)
    total = get_adaboost_sum(domain, b, classifier)
    return total.gte(-1.0) # Just threshold the results at zero (equal chance of flood / not flood)
Example #3
0
def adaboost_learn(ignored=None, ignored2=None):
    '''Train Adaboost classifier'''
    
    EVAL_RESOLUTION = 250

    # Learn this many weak classifiers
    NUM_CLASSIFIERS_TO_TRAIN = 50

    # Load inputs for this domain and preprocess
    # - Kashmore does not have a good unflooded comparison location so it is left out of the training.
    #all_problems      = ['kashmore_2010_8.xml', 'mississippi_2011_5.xml', 'mississippi_2011_6.xml', 'new_orleans_2005_9.xml', 'sf_bay_area_2011_4.xml']
    #all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]
    #training_domains  = [domain.unflooded_domain for domain in all_domains[:-1]] + [all_domains[-1]] # SF is unflooded
    
    # This is a cleaned up set where all the permanent water masks are known to be decent.
    all_problems      = ['unflooded_mississippi_2010.xml', 'unflooded_new_orleans_2004.xml', 'sf_bay_area_2011_4.xml', 'unflooded_bosnia_2013.xml']
    #all_problems.extend(['arkansas_city_2011_5.xml', 'baghlan_south_2014_6.xml', 
    #                     'bosnia_west_2014_5.xml', 'kashmore_north_2010_8.xml', 'slidell_2005_9.xml'])
    #all_problems      = ['unflooded_mississippi_2010_5.xml']
    all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]
    
    # Try testing on radar domains
    #all_problems   = ['rome.xml',]
    #                  #'malawi_2015_1.xml',]
    #                  #'mississippi.xml']
    #all_domains    = [Domain('config/domains/sentinel1/rome.xml'),]
    #                  #Domain('config/domains/sentinel1/malawi_2015_1.xml'),
    #                  #Domain('config/domains/uavsar/mississippi.xml')]
    #training_domains  = [domain.training_domain for domain in all_domains]
    
    ## Try testing on Skybox images
    #all_problems     = [#'gloucester_2014_10.xml',] # TODO: Need dense training data for the other images!!!
    #                    #'new_bedford_2014_10.xml',
    #                    #'sumatra_2014_10.xml',]
    #                    'malawi_2015.xml',] 
    #all_domains      = [Domain('config/domains/skybox/' + d) for d in all_problems]
    #training_domains = [domain.training_domain for domain in all_domains]
    
    ## Add a bunch of lakes to the training data
    #lake_problems = ['Amistad_Reservoir/Amistad_Reservoir_2014-07-01_train.xml',
    #                 'Cascade_Reservoir/Cascade_Reservoir_2014-09-01_train.xml',
    #                 'Edmund/Edmund_2014-07-01_train.xml',
    #                 'Hulun/Hulun_2014-07-01_train.xml',
    #                 'Keeley/Keeley_2014-06-01_train.xml',
    #                 'Lake_Mead/Lake_Mead_2014-09-01_train.xml',
    #                 'Miguel_Aleman/Miguel_Aleman_2014-08-01_train.xml',
    #                 'Oneida_Lake/Oneida_Lake_2014-06-01_train.xml',
    #                 'Quesnel/Quesnel_2014-08-01_train.xml',
    #                 'Shuswap/Shuswap_2014-08-01_train.xml',
    #                 'Trikhonis/Trikhonis_2014-07-01_train.xml',
    #                 'Pickwick_Lake/Pickwick_Lake_2014-07-01_train.xml',
    #                 'Rogoaguado/Rogoaguado_2014-08-01_train.xml',
    #                 'Zapatosa/Zapatosa_2014-09-01_train.xml']
    #lake_domains  = [Domain('/home/smcmich1/data/Floods/lakeStudy/' + d) for d in lake_problems]
    #all_problems += lake_problems
    #all_domains  += lake_domains
    
    #all_problems      = ['unflooded_mississippi_2010.xml']
    #all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]

    #all_problems      = ['sf_bay_area_2011_4.xml']
    #all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]
    #
    #all_problems      = ['unflooded_bosnia_2013.xml']
    #all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]
    #
    #all_problems      = ['unflooded_new_orleans_2004.xml']
    #all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]
    
    training_domains  = all_domains
    
    water_masks = [modis_utilities.get_permanent_water_mask() for d in training_domains]
    for i in range(len(all_domains)):
        if all_domains[i].ground_truth != None:
            water_masks[i] = all_domains[i].ground_truth
    #water_masks = [d.ground_truth for d in training_domains] # Manual mask   
    
    training_images = [_create_adaboost_learning_image(d, modis_utilities.compute_modis_indices(d)) for d in training_domains]
    
    # add pixels in flood permanent water masks to training
    #training_domains.extend(all_domains)
    #water_masks.extend([get_permanent_water_mask() for d in all_domains])
    #training_images.append([_create_adaboost_learning_image(domain, compute_modis_indices(domain)).mask(get_permanent_water_mask()) for domain in all_domains])
    
    transformed_masks = [water_mask.multiply(2).subtract(1) for water_mask in water_masks]

    bands             = safe_get_info(training_images[0].bandNames())
    print('Computing threshold ranges.')
    band_splits = __compute_threshold_ranges(training_domains, training_images, water_masks, bands)
    counts = [safe_get_info(training_images[i].select('b1').reduceRegion(ee.Reducer.count(), training_domains[i].bounds, 250))['b1'] for i in range(len(training_images))]
    count = sum(counts)
    weights = [ee.Image(1.0 / count) for i in training_images] # Each input pixel in the training images has an equal weight
    
    # Initialize for pre-existing partially trained classifier
    full_classifier = []
    for (c, t, alpha) in full_classifier:
        band_splits[c].append(t)
        band_splits[c] = sorted(band_splits[c])
        total = 0
        for i in range(len(training_images)):
            weights[i] = weights[i].multiply(apply_classifier(training_images[i], c, t).multiply(transformed_masks[i]).multiply(-alpha).exp())
            total += safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant']
        for i in range(len(training_images)):
            weights[i] = weights[i].divide(total)
    
    ## Apply weak classifiers to the input test image
    #test_image = _create_adaboost_learning_image(domain, b)
    
    
    while len(full_classifier) < NUM_CLASSIFIERS_TO_TRAIN:
        best = None
        for band_name in bands: # For each weak classifier
            # Find the best threshold that we can choose
            (threshold, ind, error) = _find_adaboost_optimal_threshold(training_domains, training_images, water_masks, band_name, weights, band_splits[band_name])
            
            # Compute the sum of weighted classification errors across all of the training domains using this threshold
            #errors = [safe_get_info(weights[i].multiply(training_images[i].select(band_name).lte(threshold).neq(water_masks[i])).reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))]
            #error  = sum(errors)
            print('%s found threshold %g with error %g' % (band_name, threshold, error))
            
            # Record the band/threshold combination with the highest abs(error)
            if (best == None) or (abs(0.5 - error) > abs(0.5 - best[0])): # Classifiers that are always wrong are also good with negative alpha
                best = (error, band_name, threshold, ind)
        
        # add an additional split point to search between for thresholds
        band_splits[best[1]].insert(best[3], best[2])
      
        print('---> Using %s < %g. Error %g.' % (best[1], best[2], best[0]))
        alpha      = 0.5 * math.log((1 - best[0]) / best[0])
        classifier = (best[1], best[2], alpha)
        full_classifier.append(classifier)
        print('---> Now have %d out of %d classifiers.' % (len(full_classifier), NUM_CLASSIFIERS_TO_TRAIN))
        
        # update the weights
        weights = [weights[i].multiply(apply_classifier(training_images[i], classifier[0], classifier[1]).multiply(transformed_masks[i]).multiply(-alpha).exp()) for i in range(len(training_images))]
        totals  = [safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))]
        total   = sum(totals)
        weights = [w.divide(total) for w in weights]
        print(full_classifier)
Example #4
0
def adaboost_learn(ignored=None, ignored2=None):
    '''Train Adaboost classifier'''
    
    EVAL_RESOLUTION = 250

    # Learn this many weak classifiers
    NUM_CLASSIFIERS_TO_TRAIN = 50

    # Load inputs for this domain and preprocess
    # - Kashmore does not have a good unflooded comparison location so it is left out of the training.
    #all_problems      = ['kashmore_2010_8.xml', 'mississippi_2011_5.xml', 'mississippi_2011_6.xml', 'new_orleans_2005_9.xml', 'sf_bay_area_2011_4.xml']
    #all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]
    #training_domains  = [domain.unflooded_domain for domain in all_domains[:-1]] + [all_domains[-1]] # SF is unflooded
    
    # This is a cleaned up set where all the permanent water masks are known to be decent.
    all_problems      = ['unflooded_mississippi_2010.xml', 'unflooded_new_orleans_2004.xml', 'sf_bay_area_2011_4.xml', 'unflooded_bosnia_2013.xml']
    #all_problems.extend(['arkansas_city_2011_5.xml', 'baghlan_south_2014_6.xml', 
    #                     'bosnia_west_2014_5.xml', 'kashmore_north_2010_8.xml', 'slidell_2005_9.xml'])
    #all_problems      = ['unflooded_mississippi_2010_5.xml']
    all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]
    
    # Try testing on radar domains
    #all_problems   = ['rome.xml',]
    #                  #'malawi_2015_1.xml',]
    #                  #'mississippi.xml']
    #all_domains    = [Domain('config/domains/sentinel1/rome.xml'),]
    #                  #Domain('config/domains/sentinel1/malawi_2015_1.xml'),
    #                  #Domain('config/domains/uavsar/mississippi.xml')]
    #training_domains  = [domain.training_domain for domain in all_domains]
    
    ## Try testing on Skybox images
    #all_problems     = [#'gloucester_2014_10.xml',] # TODO: Need dense training data for the other images!!!
    #                    #'new_bedford_2014_10.xml',
    #                    #'sumatra_2014_10.xml',]
    #                    'malawi_2015.xml',] 
    #all_domains      = [Domain('config/domains/skybox/' + d) for d in all_problems]
    #training_domains = [domain.training_domain for domain in all_domains]
    
    ## Add a bunch of lakes to the training data
    #lake_problems = ['Amistad_Reservoir/Amistad_Reservoir_2014-07-01_train.xml',
    #                 'Cascade_Reservoir/Cascade_Reservoir_2014-09-01_train.xml',
    #                 'Edmund/Edmund_2014-07-01_train.xml',
    #                 'Hulun/Hulun_2014-07-01_train.xml',
    #                 'Keeley/Keeley_2014-06-01_train.xml',
    #                 'Lake_Mead/Lake_Mead_2014-09-01_train.xml',
    #                 'Miguel_Aleman/Miguel_Aleman_2014-08-01_train.xml',
    #                 'Oneida_Lake/Oneida_Lake_2014-06-01_train.xml',
    #                 'Quesnel/Quesnel_2014-08-01_train.xml',
    #                 'Shuswap/Shuswap_2014-08-01_train.xml',
    #                 'Trikhonis/Trikhonis_2014-07-01_train.xml',
    #                 'Pickwick_Lake/Pickwick_Lake_2014-07-01_train.xml',
    #                 'Rogoaguado/Rogoaguado_2014-08-01_train.xml',
    #                 'Zapatosa/Zapatosa_2014-09-01_train.xml']
    #lake_domains  = [Domain('/home/smcmich1/data/Floods/lakeStudy/' + d) for d in lake_problems]
    #all_problems += lake_problems
    #all_domains  += lake_domains
    
    #all_problems      = ['unflooded_mississippi_2010.xml']
    #all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]

    #all_problems      = ['sf_bay_area_2011_4.xml']
    #all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]
    #
    #all_problems      = ['unflooded_bosnia_2013.xml']
    #all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]
    #
    #all_problems      = ['unflooded_new_orleans_2004.xml']
    #all_domains       = [Domain('config/domains/modis/' + d) for d in all_problems]
    
    training_domains  = all_domains
    
    water_masks = [modis_utilities.get_permanent_water_mask() for d in training_domains]
    for i in range(len(all_domains)):
        if all_domains[i].ground_truth != None:
            water_masks[i] = all_domains[i].ground_truth
    #water_masks = [d.ground_truth for d in training_domains] # Manual mask   
    
    training_images = [_create_adaboost_learning_image(d, modis_utilities.compute_modis_indices(d)) for d in training_domains]
    
    # add pixels in flood permanent water masks to training
    #training_domains.extend(all_domains)
    #water_masks.extend([get_permanent_water_mask() for d in all_domains])
    #training_images.append([_create_adaboost_learning_image(domain, compute_modis_indices(domain)).mask(get_permanent_water_mask()) for domain in all_domains])
    
    transformed_masks = [water_mask.multiply(2).subtract(1) for water_mask in water_masks]

    bands             = safe_get_info(training_images[0].bandNames())
    print 'Computing threshold ranges.'
    band_splits = __compute_threshold_ranges(training_domains, training_images, water_masks, bands)
    counts = [safe_get_info(training_images[i].select('b1').reduceRegion(ee.Reducer.count(), training_domains[i].bounds, 250))['b1'] for i in range(len(training_images))]
    count = sum(counts)
    weights = [ee.Image(1.0 / count) for i in training_images] # Each input pixel in the training images has an equal weight
    
    # Initialize for pre-existing partially trained classifier
    full_classifier = []
    for (c, t, alpha) in full_classifier:
        band_splits[c].append(t)
        band_splits[c] = sorted(band_splits[c])
        total = 0
        for i in range(len(training_images)):
            weights[i] = weights[i].multiply(apply_classifier(training_images[i], c, t).multiply(transformed_masks[i]).multiply(-alpha).exp())
            total += safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant']
        for i in range(len(training_images)):
            weights[i] = weights[i].divide(total)
    
    ## Apply weak classifiers to the input test image
    #test_image = _create_adaboost_learning_image(domain, b)
    
    
    while len(full_classifier) < NUM_CLASSIFIERS_TO_TRAIN:
        best = None
        for band_name in bands: # For each weak classifier
            # Find the best threshold that we can choose
            (threshold, ind, error) = _find_adaboost_optimal_threshold(training_domains, training_images, water_masks, band_name, weights, band_splits[band_name])
            
            # Compute the sum of weighted classification errors across all of the training domains using this threshold
            #errors = [safe_get_info(weights[i].multiply(training_images[i].select(band_name).lte(threshold).neq(water_masks[i])).reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))]
            #error  = sum(errors)
            print '%s found threshold %g with error %g' % (band_name, threshold, error)
            
            # Record the band/threshold combination with the highest abs(error)
            if (best == None) or (abs(0.5 - error) > abs(0.5 - best[0])): # Classifiers that are always wrong are also good with negative alpha
                best = (error, band_name, threshold, ind)
        
        # add an additional split point to search between for thresholds
        band_splits[best[1]].insert(best[3], best[2])
      
        print '---> Using %s < %g. Error %g.' % (best[1], best[2], best[0])
        alpha      = 0.5 * math.log((1 - best[0]) / best[0])
        classifier = (best[1], best[2], alpha)
        full_classifier.append(classifier)
        print '---> Now have %d out of %d classifiers.' % (len(full_classifier), NUM_CLASSIFIERS_TO_TRAIN)
        
        # update the weights
        weights = [weights[i].multiply(apply_classifier(training_images[i], classifier[0], classifier[1]).multiply(transformed_masks[i]).multiply(-alpha).exp()) for i in range(len(training_images))]
        totals  = [safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))]
        total   = sum(totals)
        weights = [w.divide(total) for w in weights]
        print full_classifier