Esempio n. 1
0
def main():
    """Use local data to train the neural net, probably made by bin/create_training_data.py."""
    parser = create_parser()
    args = parser.parse_args()
    with open(CACHE_PATH + 'raster_data_paths.pickle', 'r') as infile:
        raster_data_paths = pickle.load(infile)
    test_images, model = train_on_cached_data(raster_data_paths, args.neural_net, args.bands,
                                              args.tile_size, args.number_of_epochs)
    if not args.omit_findings:
        for path in raster_data_paths:
            print path
            labels, images = load_training_tiles(path)
            if len(labels) == 0 or len(images) == 0:
                print("WARNING, there is a borked naip image file")
                continue
            false_positives, false_negatives, fp_images, fn_images = list_findings(labels, images,
                                                                                   model)
            path_parts = path.split('/')
            filename = path_parts[len(path_parts) - 1]
            print("FINDINGS: {} false pos and {} false neg, of {} tiles, from {}".format(
                len(false_positives), len(false_negatives), len(images), filename))
            render_results_for_analysis([path], false_positives, fp_images, args.bands,
                                        args.tile_size)

    if args.render_results:
        predictions = predictions_for_tiles(test_images, model)
        render_results_for_analysis(raster_data_paths, predictions, test_images, args.bands,
                                    args.tile_size)
Esempio n. 2
0
def train_on_cached_data(raster_data_paths, neural_net_type, bands, tile_size,
                         number_of_epochs):
    """Load tiled/cached data, which was prepared for the NAIPs listed in raster_data_paths.

    Read in each NAIP's images/labels, add to train/test data, run some epochs as each is added.
    Keep the train and test sets to a max of 10K images by throwing out random data sometimes.
    """
    training_images = []
    onehot_training_labels = []
    test_images = []
    onehot_test_labels = []
    model = None

    for path in raster_data_paths:
        # read in another NAIP worth of data
        labels, images = load_training_tiles(path)
        if len(labels) == 0 or len(images) == 0:
            continue
        equal_count_way_list, equal_count_tile_list = equalize_data(
            labels, images, False)
        new_test_labels, training_labels, new_test_images, new_training_images = \
            split_train_test(equal_count_tile_list, equal_count_way_list, .9)

        if len(training_labels) == 0:
            print("WARNING: a naip image didn't have any road labels?")
            continue
        if len(new_test_labels) == 0:
            print("WARNING: a naip image didn't have any road images?")
            continue

        # add it to the training and test lists
        [training_images.append(i) for i in new_training_images]
        [test_images.append(i) for i in new_test_images]
        [
            onehot_training_labels.append(l)
            for l in format_as_onehot_arrays(training_labels)
        ]
        [
            onehot_test_labels.append(l)
            for l in format_as_onehot_arrays(new_test_labels)
        ]

        # once we have 100 test_images, maybe from more than one NAIP, train on a mini batch
        if len(training_images) >= 100:
            # continue training the model with the new data set
            model = train_with_data(onehot_training_labels, onehot_test_labels,
                                    test_images, training_images,
                                    neural_net_type, bands, tile_size,
                                    number_of_epochs, model)
            training_images = []
            onehot_training_labels = []

        # keep test list to 10000 images, in case the machine doesn't have much memory
        if len(test_images) > 10000:
            # shuffle so when we chop off data, it's from many NAIPs, not just the last one
            shuffle_in_unison(test_images, onehot_test_labels)
            test_images = test_images[:9000]
            onehot_test_labels = onehot_test_labels[:9000]

    return test_images, model
Esempio n. 3
0
def post_findings_to_s3(raster_data_paths, model, training_info, render_results):
    """Aggregate findings from all NAIPs into a pickled list, post to S3."""
    findings = []
    for path in raster_data_paths:
        labels, images = load_training_tiles(path)
        if len(labels) == 0 or len(images) == 0:
            print("WARNING, there is a borked naip image file")
            continue
        false_positives, fp_images = list_findings(labels, images, model)
        path_parts = path.split('/')
        filename = path_parts[len(path_parts) - 1]
        print("FINDINGS: {} false pos of {} tiles, from {}".format(
            len(false_positives), len(images), filename))
        if render_results:
            # render JPEGs showing findings
            render_results_for_analysis([path], false_positives, fp_images, training_info['bands'],
                                        training_info['tile_size'])

        # combine findings for all NAIP images analyzedfor the region
        [findings.append(f) for f in tag_with_locations(fp_images, false_positives,
                                                        training_info['tile_size'])]

    # dump combined findings to disk as a pickle
    try:
        os.mkdir(CACHE_PATH + training_info['naip_state'])
    except:
        pass
    naip_path_in_cache_dir = training_info['naip_state'] + '/' + 'findings.pickle'
    local_path = CACHE_PATH + naip_path_in_cache_dir
    with open(local_path, 'w') as outfile:
        pickle.dump(findings, outfile)

    # push pickle to S3
    s3_client = boto3.client('s3')
    s3_client.upload_file(local_path, FINDINGS_S3_BUCKET, naip_path_in_cache_dir)
Esempio n. 4
0
def train_on_cached_data(raster_data_paths, neural_net_type, bands, tile_size):
    """Load tiled/cached data, which was prepared for the NAIPs listed in raster_data_paths.

    Read in each NAIP's images/labels, add to train/test data, run some epochs as each is added.
    Keep the train and test sets to a max of 10K images by throwing out random data sometimes.
    """
    training_images = []
    onehot_training_labels = []
    test_images = []
    onehot_test_labels = []
    model = None
    epoch = 0

    for path in raster_data_paths:
        # keep test list to 1000 images
        if len(test_images) > 10000:
            test_images = test_images[:9000]
            onehot_test_labels = onehot_test_labels[:9000]

        # keep train list to 10000 images
        if len(training_images) > 10000:
            training_images = training_images[:9000]
            onehot_training_labels = onehot_training_labels[:9000]

        # read in another NAIP worth of data
        labels, images = load_training_tiles(path)
        if len(labels) == 0 or len(images) == 0:
            continue
        equal_count_way_list, equal_count_tile_list = equalize_data(labels, images, False)
        new_test_labels, training_labels, new_test_images, new_training_images = \
            split_train_test(equal_count_tile_list, equal_count_way_list, .9)
        if len(training_labels) == 0:
            print("WARNING: a naip image didn't have any road labels?")
            continue
        if len(new_test_labels) == 0:
            print("WARNING: a naip image didn't have any road images?")
            continue

        # add it to the training and test lists
        [training_images.append(i) for i in new_training_images]
        [test_images.append(i) for i in new_test_images]
        [onehot_training_labels.append(l) for l in format_as_onehot_arrays(training_labels)]
        [onehot_test_labels.append(l) for l in format_as_onehot_arrays(new_test_labels)]

        # shuffle it so when we chop off data it's from many NAIPs, not just the last one
        shuffle_in_unison(training_images, onehot_training_labels)
        shuffle_in_unison(test_images, onehot_test_labels)

        # continue training the model with the new data set
        model = train_with_data(onehot_training_labels, onehot_test_labels, test_images,
                                training_images, neural_net_type, bands, tile_size,
                                epoch, model)
        epoch += 1
    return test_images, model
def render_errors(raster_data_paths, model, training_info, render_results):
    """Render JPEGs showing findings."""
    for path in raster_data_paths:
        labels, images = load_training_tiles(path)
        if len(labels) == 0 or len(images) == 0:
            print("WARNING, there is a borked naip image file")
            continue
        false_positives, fp_images = list_findings(labels, images, model)
        path_parts = path.split('/')
        filename = path_parts[len(path_parts) - 1]
        print("FINDINGS: {} false pos of {} tiles, from {}".format(
            len(false_positives), len(images), filename))
        render_results_for_analysis([path], false_positives, fp_images, training_info['bands'],
                                    training_info['tile_size'])
Esempio n. 6
0
def render_errors(raster_data_paths, model, training_info, render_results):
    """Render JPEGs showing findings."""
    for path in raster_data_paths:
        labels, images = load_training_tiles(path)
        if len(labels) == 0 or len(images) == 0:
            print("WARNING, there is a borked naip image file")
            continue
        false_positives, fp_images = list_findings(labels, images, model)
        path_parts = path.split('/')
        filename = path_parts[len(path_parts) - 1]
        print("FINDINGS: {} false pos of {} tiles, from {}".format(
            len(false_positives), len(images), filename))
        render_results_for_analysis([path], false_positives, fp_images, training_info['bands'],
                                    training_info['tile_size'])
Esempio n. 7
0
def main():
    """Use local data to train the neural net, probably made by bin/create_training_data.py."""
    parser = create_parser()
    args = parser.parse_args()
    with open(CACHE_PATH + 'raster_data_paths.pickle', 'r') as infile:
        raster_data_paths = pickle.load(infile)
    test_images, model = train_on_cached_data(raster_data_paths,
                                              args.neural_net, args.bands,
                                              args.tile_size,
                                              args.number_of_epochs)
    if not args.omit_findings:
        findings = []
        for path in raster_data_paths:
            print path
            labels, images = load_training_tiles(path)
            if len(labels) == 0 or len(images) == 0:
                print("WARNING, there is a borked naip image file")
                continue
            false_positives, false_negatives, fp_images, fn_images = list_findings(
                labels, images, model)
            path_parts = path.split('/')
            filename = path_parts[len(path_parts) - 1]
            print(
                "FINDINGS: {} false pos and {} false neg, of {} tiles, from {}"
                .format(len(false_positives), len(false_negatives),
                        len(images), filename))
            # render JPEGs showing findings
            render_results_for_analysis([path], false_positives, fp_images,
                                        args.bands, args.tile_size)

            # combine findings for all NAIP images analyzed
            [
                findings.append(f) for f in tag_with_locations(
                    fp_images, false_positives, args.tile_size)
            ]

        # dump combined findings to disk as a pickle
        with open(CACHE_PATH + 'findings.pickle', 'w') as outfile:
            pickle.dump(findings, outfile)

        # push pickle to S3
        s3_client = boto3.client('s3')
        s3_client.upload_file(CACHE_PATH + 'findings.pickle', 'deeposm',
                              'findings.pickle')

    if args.render_results:
        predictions = predictions_for_tiles(test_images, model)
        render_results_for_analysis(raster_data_paths, predictions,
                                    test_images, args.bands, args.tile_size)
Esempio n. 8
0
def main():
    """Use local data to train the neural net, probably made by bin/create_training_data.py."""
    parser = create_parser()
    args = parser.parse_args()
    with open(CACHE_PATH + 'raster_data_paths.pickle', 'r') as infile:
        raster_data_paths = pickle.load(infile)
    test_images, model = train_on_cached_data(raster_data_paths, args.neural_net, args.bands,
                                              args.tile_size, args.number_of_epochs)
    if not args.omit_findings:
        findings = []
        for path in raster_data_paths:
            print path
            labels, images = load_training_tiles(path)
            if len(labels) == 0 or len(images) == 0:
                print("WARNING, there is a borked naip image file")
                continue
            false_positives, false_negatives, fp_images, fn_images = list_findings(labels, images,
                                                                                   model)
            path_parts = path.split('/')
            filename = path_parts[len(path_parts) - 1]
            print("FINDINGS: {} false pos and {} false neg, of {} tiles, from {}".format(
                len(false_positives), len(false_negatives), len(images), filename))
            # render JPEGs showing findings
            render_results_for_analysis([path], false_positives, fp_images, args.bands,
                                        args.tile_size)

            # combine findings for all NAIP images analyzed
            [findings.append(f) for f in tag_with_locations(fp_images, false_positives,
                                                            args.tile_size)]

        # dump combined findings to disk as a pickle
        with open(CACHE_PATH + 'findings.pickle', 'w') as outfile:
            pickle.dump(findings, outfile)

        # push pickle to S3
        s3_client = boto3.client('s3')
        s3_client.upload_file(CACHE_PATH + 'findings.pickle', 'deeposm', 'findings.pickle')

    if args.render_results:
        predictions = predictions_for_tiles(test_images, model)
        render_results_for_analysis(raster_data_paths, predictions, test_images, args.bands,
                                    args.tile_size)
Esempio n. 9
0
def train_on_cached_data(neural_net_type, number_of_epochs):
    """Load tiled/cached training data in batches, and train the neural net."""

    with open(CACHE_PATH + METADATA_PATH, 'r') as infile:
        training_info = pickle.load(infile)
    bands = training_info['bands']
    tile_size = training_info['tile_size']

    training_images = []
    onehot_training_labels = []
    model = None

    # there are usually 100+ images with road through the middle, out of every 10,000
    # because we want half on, half off, and discard most images
    EQUALIZATION_BATCH_SIZE = 10000

    # the number of times to pull EQUALIZATION_BATCH_SIZE images from disk
    NUMBER_OF_BATCHES = 50

    for x in range(0, NUMBER_OF_BATCHES):
        print("BATCH: {} of {}".format(str(x + 1), str(NUMBER_OF_BATCHES)))
        new_label_paths = load_training_tiles(EQUALIZATION_BATCH_SIZE)
        print("Got batch of {} labels".format(len(new_label_paths)))
        new_training_images, new_onehot_training_labels = format_as_onehot_arrays(
            new_label_paths)
        equal_count_way_list, equal_count_tile_list = equalize_data(
            new_onehot_training_labels, new_training_images, False)
        [training_images.append(i) for i in equal_count_tile_list]
        [onehot_training_labels.append(l) for l in equal_count_way_list]

        # once we have 100 test_images, train on a mini batch
        if len(training_images) >= 100:
            # continue training the model with the new data set
            model = train_with_data(onehot_training_labels, training_images,
                                    neural_net_type, bands, tile_size,
                                    number_of_epochs, model)
            training_images = []
            onehot_training_labels = []

    save_model(model, neural_net_type, bands, tile_size)

    return model
Esempio n. 10
0
def post_findings_to_s3(raster_data_paths, model, training_info,
                        render_results):
    """Aggregate findings from all NAIPs into a pickled list, post to S3."""
    findings = []
    for path in raster_data_paths:
        labels, images = load_training_tiles(path)
        if len(labels) == 0 or len(images) == 0:
            print("WARNING, there is a borked naip image file")
            continue
        false_positives, fp_images = list_findings(labels, images, model)
        path_parts = path.split('/')
        filename = path_parts[len(path_parts) - 1]
        print("FINDINGS: {} false pos of {} tiles, from {}".format(
            len(false_positives), len(images), filename))
        if render_results:
            # render JPEGs showing findings
            render_results_for_analysis([path], false_positives, fp_images,
                                        training_info['bands'],
                                        training_info['tile_size'])

        # combine findings for all NAIP images analyzedfor the region
        [
            findings.append(f) for f in tag_with_locations(
                fp_images, false_positives, training_info['tile_size'])
        ]

    # dump combined findings to disk as a pickle
    try:
        os.mkdir(CACHE_PATH + training_info['naip_state'])
    except:
        pass
    naip_path_in_cache_dir = training_info[
        'naip_state'] + '/' + 'findings.pickle'
    local_path = CACHE_PATH + naip_path_in_cache_dir
    with open(local_path, 'w') as outfile:
        pickle.dump(findings, outfile)

    # push pickle to S3
    s3_client = boto3.client('s3')
    s3_client.upload_file(local_path, FINDINGS_S3_BUCKET,
                          naip_path_in_cache_dir)
Esempio n. 11
0
def train_on_cached_data(neural_net_type, number_of_epochs):
    """Load tiled/cached training data in batches, and train the neural net."""

    with open(CACHE_PATH + METADATA_PATH, "r") as infile:
        training_info = pickle.load(infile)
    bands = training_info["bands"]
    tile_size = training_info["tile_size"]

    training_images = []
    onehot_training_labels = []
    model = None

    # there are usually 100+ images with road through the middle, out of every 10,000
    # because we want half on, half off, and discard most images
    EQUALIZATION_BATCH_SIZE = 10000

    # the number of times to pull EQUALIZATION_BATCH_SIZE images from disk
    NUMBER_OF_BATCHES = 10

    for x in range(0, NUMBER_OF_BATCHES):
        new_label_paths = load_training_tiles(EQUALIZATION_BATCH_SIZE)
        print("Got batch of {} labels".format(len(new_label_paths)))
        new_training_images, new_onehot_training_labels = format_as_onehot_arrays(new_label_paths)
        equal_count_way_list, equal_count_tile_list = equalize_data(
            new_onehot_training_labels, new_training_images, False
        )
        [training_images.append(i) for i in equal_count_tile_list]
        [onehot_training_labels.append(l) for l in equal_count_way_list]

        # once we have 100 test_images, train on a mini batch
        if len(training_images) >= 100:
            # continue training the model with the new data set
            model = train_with_data(
                onehot_training_labels, training_images, neural_net_type, bands, tile_size, number_of_epochs, model
            )
            training_images = []
            onehot_training_labels = []

    save_model(model, neural_net_type, bands, tile_size)

    return model