예제 #1
0
def main():
    parser = create_parser()
    args = parser.parse_args()
    NAIP_STATE, NAIP_YEAR = args.naip_path
    naiper = NAIPDownloader(args.number_of_naips, args.randomize_naips,
                            NAIP_STATE, NAIP_YEAR)
    raster_data_paths = naiper.download_naips()

    road_labels, naip_tiles, waymap = random_training_data(
        raster_data_paths, args.extract_type, args.bands, args.tile_size,
        args.pixels_to_fatten_roads, args.label_data_files, args.tile_overlap)

    equal_count_way_list, equal_count_tile_list = equalize_data(
        road_labels, naip_tiles, args.save_clippings)

    test_labels, training_labels, test_images, training_images = split_train_test(
        equal_count_tile_list, equal_count_way_list,
        args.percent_for_training_data)

    onehot_training_labels, onehot_test_labels = format_as_onehot_arrays(
        waymap.extracter.types, training_labels, test_labels)

    dump_data_to_disk(raster_data_paths, training_images, training_labels,
                      test_images, test_labels, waymap.extracter.types,
                      onehot_training_labels, onehot_test_labels)
예제 #2
0
def train_on_cached_data(raster_data_paths, neural_net_type, bands, tile_size,
                         number_of_epochs):
    """Load tiled/cached data, which was prepared for the NAIPs listed in raster_data_paths.

    Read in each NAIP's images/labels, add to train/test data, run some epochs as each is added.
    Keep the train and test sets to a max of 10K images by throwing out random data sometimes.
    """
    training_images = []
    onehot_training_labels = []
    test_images = []
    onehot_test_labels = []
    model = None

    for path in raster_data_paths:
        # read in another NAIP worth of data
        labels, images = load_training_tiles(path)
        if len(labels) == 0 or len(images) == 0:
            continue
        equal_count_way_list, equal_count_tile_list = equalize_data(
            labels, images, False)
        new_test_labels, training_labels, new_test_images, new_training_images = \
            split_train_test(equal_count_tile_list, equal_count_way_list, .9)

        if len(training_labels) == 0:
            print("WARNING: a naip image didn't have any road labels?")
            continue
        if len(new_test_labels) == 0:
            print("WARNING: a naip image didn't have any road images?")
            continue

        # add it to the training and test lists
        [training_images.append(i) for i in new_training_images]
        [test_images.append(i) for i in new_test_images]
        [
            onehot_training_labels.append(l)
            for l in format_as_onehot_arrays(training_labels)
        ]
        [
            onehot_test_labels.append(l)
            for l in format_as_onehot_arrays(new_test_labels)
        ]

        # once we have 100 test_images, maybe from more than one NAIP, train on a mini batch
        if len(training_images) >= 100:
            # continue training the model with the new data set
            model = train_with_data(onehot_training_labels, onehot_test_labels,
                                    test_images, training_images,
                                    neural_net_type, bands, tile_size,
                                    number_of_epochs, model)
            training_images = []
            onehot_training_labels = []

        # keep test list to 10000 images, in case the machine doesn't have much memory
        if len(test_images) > 10000:
            # shuffle so when we chop off data, it's from many NAIPs, not just the last one
            shuffle_in_unison(test_images, onehot_test_labels)
            test_images = test_images[:9000]
            onehot_test_labels = onehot_test_labels[:9000]

    return test_images, model
예제 #3
0
def train_on_cached_data(raster_data_paths, neural_net_type, bands, tile_size):
    """Load tiled/cached data, which was prepared for the NAIPs listed in raster_data_paths.

    Read in each NAIP's images/labels, add to train/test data, run some epochs as each is added.
    Keep the train and test sets to a max of 10K images by throwing out random data sometimes.
    """
    training_images = []
    onehot_training_labels = []
    test_images = []
    onehot_test_labels = []
    model = None
    epoch = 0

    for path in raster_data_paths:
        # keep test list to 1000 images
        if len(test_images) > 10000:
            test_images = test_images[:9000]
            onehot_test_labels = onehot_test_labels[:9000]

        # keep train list to 10000 images
        if len(training_images) > 10000:
            training_images = training_images[:9000]
            onehot_training_labels = onehot_training_labels[:9000]

        # read in another NAIP worth of data
        labels, images = load_training_tiles(path)
        if len(labels) == 0 or len(images) == 0:
            continue
        equal_count_way_list, equal_count_tile_list = equalize_data(labels, images, False)
        new_test_labels, training_labels, new_test_images, new_training_images = \
            split_train_test(equal_count_tile_list, equal_count_way_list, .9)
        if len(training_labels) == 0:
            print("WARNING: a naip image didn't have any road labels?")
            continue
        if len(new_test_labels) == 0:
            print("WARNING: a naip image didn't have any road images?")
            continue

        # add it to the training and test lists
        [training_images.append(i) for i in new_training_images]
        [test_images.append(i) for i in new_test_images]
        [onehot_training_labels.append(l) for l in format_as_onehot_arrays(training_labels)]
        [onehot_test_labels.append(l) for l in format_as_onehot_arrays(new_test_labels)]

        # shuffle it so when we chop off data it's from many NAIPs, not just the last one
        shuffle_in_unison(training_images, onehot_training_labels)
        shuffle_in_unison(test_images, onehot_test_labels)

        # continue training the model with the new data set
        model = train_with_data(onehot_training_labels, onehot_test_labels, test_images,
                                training_images, neural_net_type, bands, tile_size,
                                epoch, model)
        epoch += 1
    return test_images, model
예제 #4
0
def train_on_cached_data(neural_net_type, number_of_epochs):
    """Load tiled/cached training data in batches, and train the neural net."""

    with open(CACHE_PATH + METADATA_PATH, 'r') as infile:
        training_info = pickle.load(infile)
    bands = training_info['bands']
    tile_size = training_info['tile_size']

    training_images = []
    onehot_training_labels = []
    model = None

    # there are usually 100+ images with road through the middle, out of every 10,000
    # because we want half on, half off, and discard most images
    EQUALIZATION_BATCH_SIZE = 10000

    # the number of times to pull EQUALIZATION_BATCH_SIZE images from disk
    NUMBER_OF_BATCHES = 50

    for x in range(0, NUMBER_OF_BATCHES):
        print("BATCH: {} of {}".format(str(x + 1), str(NUMBER_OF_BATCHES)))
        new_label_paths = load_training_tiles(EQUALIZATION_BATCH_SIZE)
        print("Got batch of {} labels".format(len(new_label_paths)))
        new_training_images, new_onehot_training_labels = format_as_onehot_arrays(
            new_label_paths)
        equal_count_way_list, equal_count_tile_list = equalize_data(
            new_onehot_training_labels, new_training_images, False)
        [training_images.append(i) for i in equal_count_tile_list]
        [onehot_training_labels.append(l) for l in equal_count_way_list]

        # once we have 100 test_images, train on a mini batch
        if len(training_images) >= 100:
            # continue training the model with the new data set
            model = train_with_data(onehot_training_labels, training_images,
                                    neural_net_type, bands, tile_size,
                                    number_of_epochs, model)
            training_images = []
            onehot_training_labels = []

    save_model(model, neural_net_type, bands, tile_size)

    return model
예제 #5
0
def train_on_cached_data(neural_net_type, number_of_epochs):
    """Load tiled/cached training data in batches, and train the neural net."""

    with open(CACHE_PATH + METADATA_PATH, "r") as infile:
        training_info = pickle.load(infile)
    bands = training_info["bands"]
    tile_size = training_info["tile_size"]

    training_images = []
    onehot_training_labels = []
    model = None

    # there are usually 100+ images with road through the middle, out of every 10,000
    # because we want half on, half off, and discard most images
    EQUALIZATION_BATCH_SIZE = 10000

    # the number of times to pull EQUALIZATION_BATCH_SIZE images from disk
    NUMBER_OF_BATCHES = 10

    for x in range(0, NUMBER_OF_BATCHES):
        new_label_paths = load_training_tiles(EQUALIZATION_BATCH_SIZE)
        print("Got batch of {} labels".format(len(new_label_paths)))
        new_training_images, new_onehot_training_labels = format_as_onehot_arrays(new_label_paths)
        equal_count_way_list, equal_count_tile_list = equalize_data(
            new_onehot_training_labels, new_training_images, False
        )
        [training_images.append(i) for i in equal_count_tile_list]
        [onehot_training_labels.append(l) for l in equal_count_way_list]

        # once we have 100 test_images, train on a mini batch
        if len(training_images) >= 100:
            # continue training the model with the new data set
            model = train_with_data(
                onehot_training_labels, training_images, neural_net_type, bands, tile_size, number_of_epochs, model
            )
            training_images = []
            onehot_training_labels = []

    save_model(model, neural_net_type, bands, tile_size)

    return model
예제 #6
0
def main():
    parser = create_parser()
    args = parser.parse_args()
    NAIP_STATE, NAIP_YEAR = args.naip_path
    naiper = NAIPDownloader(args.number_of_naips, args.randomize_naips, NAIP_STATE, NAIP_YEAR)
    raster_data_paths = naiper.download_naips()

    road_labels, naip_tiles, waymap = random_training_data(
        raster_data_paths, args.extract_type, args.bands, args.tile_size,
        args.pixels_to_fatten_roads, args.label_data_files, args.tile_overlap)

    equal_count_way_list, equal_count_tile_list = equalize_data(road_labels, naip_tiles,
                                                                args.save_clippings)

    test_labels, training_labels, test_images, training_images = split_train_test(
        equal_count_tile_list, equal_count_way_list, args.percent_for_training_data)

    onehot_training_labels, onehot_test_labels = format_as_onehot_arrays(
        waymap.extracter.types, training_labels, test_labels)

    dump_data_to_disk(raster_data_paths, training_images, training_labels, test_images, test_labels,
                      waymap.extracter.types, onehot_training_labels, onehot_test_labels)