def main(): parser = create_parser() args = parser.parse_args() NAIP_STATE, NAIP_YEAR = args.naip_path naiper = NAIPDownloader(args.number_of_naips, args.randomize_naips, NAIP_STATE, NAIP_YEAR) raster_data_paths = naiper.download_naips() road_labels, naip_tiles, waymap = random_training_data( raster_data_paths, args.extract_type, args.bands, args.tile_size, args.pixels_to_fatten_roads, args.label_data_files, args.tile_overlap) equal_count_way_list, equal_count_tile_list = equalize_data( road_labels, naip_tiles, args.save_clippings) test_labels, training_labels, test_images, training_images = split_train_test( equal_count_tile_list, equal_count_way_list, args.percent_for_training_data) onehot_training_labels, onehot_test_labels = format_as_onehot_arrays( waymap.extracter.types, training_labels, test_labels) dump_data_to_disk(raster_data_paths, training_images, training_labels, test_images, test_labels, waymap.extracter.types, onehot_training_labels, onehot_test_labels)
def train_on_cached_data(raster_data_paths, neural_net_type, bands, tile_size, number_of_epochs): """Load tiled/cached data, which was prepared for the NAIPs listed in raster_data_paths. Read in each NAIP's images/labels, add to train/test data, run some epochs as each is added. Keep the train and test sets to a max of 10K images by throwing out random data sometimes. """ training_images = [] onehot_training_labels = [] test_images = [] onehot_test_labels = [] model = None for path in raster_data_paths: # read in another NAIP worth of data labels, images = load_training_tiles(path) if len(labels) == 0 or len(images) == 0: continue equal_count_way_list, equal_count_tile_list = equalize_data( labels, images, False) new_test_labels, training_labels, new_test_images, new_training_images = \ split_train_test(equal_count_tile_list, equal_count_way_list, .9) if len(training_labels) == 0: print("WARNING: a naip image didn't have any road labels?") continue if len(new_test_labels) == 0: print("WARNING: a naip image didn't have any road images?") continue # add it to the training and test lists [training_images.append(i) for i in new_training_images] [test_images.append(i) for i in new_test_images] [ onehot_training_labels.append(l) for l in format_as_onehot_arrays(training_labels) ] [ onehot_test_labels.append(l) for l in format_as_onehot_arrays(new_test_labels) ] # once we have 100 test_images, maybe from more than one NAIP, train on a mini batch if len(training_images) >= 100: # continue training the model with the new data set model = train_with_data(onehot_training_labels, onehot_test_labels, test_images, training_images, neural_net_type, bands, tile_size, number_of_epochs, model) training_images = [] onehot_training_labels = [] # keep test list to 10000 images, in case the machine doesn't have much memory if len(test_images) > 10000: # shuffle so when we chop off data, it's from many NAIPs, not just the last one shuffle_in_unison(test_images, onehot_test_labels) test_images = test_images[:9000] onehot_test_labels = onehot_test_labels[:9000] return test_images, model
def train_on_cached_data(raster_data_paths, neural_net_type, bands, tile_size): """Load tiled/cached data, which was prepared for the NAIPs listed in raster_data_paths. Read in each NAIP's images/labels, add to train/test data, run some epochs as each is added. Keep the train and test sets to a max of 10K images by throwing out random data sometimes. """ training_images = [] onehot_training_labels = [] test_images = [] onehot_test_labels = [] model = None epoch = 0 for path in raster_data_paths: # keep test list to 1000 images if len(test_images) > 10000: test_images = test_images[:9000] onehot_test_labels = onehot_test_labels[:9000] # keep train list to 10000 images if len(training_images) > 10000: training_images = training_images[:9000] onehot_training_labels = onehot_training_labels[:9000] # read in another NAIP worth of data labels, images = load_training_tiles(path) if len(labels) == 0 or len(images) == 0: continue equal_count_way_list, equal_count_tile_list = equalize_data(labels, images, False) new_test_labels, training_labels, new_test_images, new_training_images = \ split_train_test(equal_count_tile_list, equal_count_way_list, .9) if len(training_labels) == 0: print("WARNING: a naip image didn't have any road labels?") continue if len(new_test_labels) == 0: print("WARNING: a naip image didn't have any road images?") continue # add it to the training and test lists [training_images.append(i) for i in new_training_images] [test_images.append(i) for i in new_test_images] [onehot_training_labels.append(l) for l in format_as_onehot_arrays(training_labels)] [onehot_test_labels.append(l) for l in format_as_onehot_arrays(new_test_labels)] # shuffle it so when we chop off data it's from many NAIPs, not just the last one shuffle_in_unison(training_images, onehot_training_labels) shuffle_in_unison(test_images, onehot_test_labels) # continue training the model with the new data set model = train_with_data(onehot_training_labels, onehot_test_labels, test_images, training_images, neural_net_type, bands, tile_size, epoch, model) epoch += 1 return test_images, model
def train_on_cached_data(neural_net_type, number_of_epochs): """Load tiled/cached training data in batches, and train the neural net.""" with open(CACHE_PATH + METADATA_PATH, 'r') as infile: training_info = pickle.load(infile) bands = training_info['bands'] tile_size = training_info['tile_size'] training_images = [] onehot_training_labels = [] model = None # there are usually 100+ images with road through the middle, out of every 10,000 # because we want half on, half off, and discard most images EQUALIZATION_BATCH_SIZE = 10000 # the number of times to pull EQUALIZATION_BATCH_SIZE images from disk NUMBER_OF_BATCHES = 50 for x in range(0, NUMBER_OF_BATCHES): print("BATCH: {} of {}".format(str(x + 1), str(NUMBER_OF_BATCHES))) new_label_paths = load_training_tiles(EQUALIZATION_BATCH_SIZE) print("Got batch of {} labels".format(len(new_label_paths))) new_training_images, new_onehot_training_labels = format_as_onehot_arrays( new_label_paths) equal_count_way_list, equal_count_tile_list = equalize_data( new_onehot_training_labels, new_training_images, False) [training_images.append(i) for i in equal_count_tile_list] [onehot_training_labels.append(l) for l in equal_count_way_list] # once we have 100 test_images, train on a mini batch if len(training_images) >= 100: # continue training the model with the new data set model = train_with_data(onehot_training_labels, training_images, neural_net_type, bands, tile_size, number_of_epochs, model) training_images = [] onehot_training_labels = [] save_model(model, neural_net_type, bands, tile_size) return model
def train_on_cached_data(neural_net_type, number_of_epochs): """Load tiled/cached training data in batches, and train the neural net.""" with open(CACHE_PATH + METADATA_PATH, "r") as infile: training_info = pickle.load(infile) bands = training_info["bands"] tile_size = training_info["tile_size"] training_images = [] onehot_training_labels = [] model = None # there are usually 100+ images with road through the middle, out of every 10,000 # because we want half on, half off, and discard most images EQUALIZATION_BATCH_SIZE = 10000 # the number of times to pull EQUALIZATION_BATCH_SIZE images from disk NUMBER_OF_BATCHES = 10 for x in range(0, NUMBER_OF_BATCHES): new_label_paths = load_training_tiles(EQUALIZATION_BATCH_SIZE) print("Got batch of {} labels".format(len(new_label_paths))) new_training_images, new_onehot_training_labels = format_as_onehot_arrays(new_label_paths) equal_count_way_list, equal_count_tile_list = equalize_data( new_onehot_training_labels, new_training_images, False ) [training_images.append(i) for i in equal_count_tile_list] [onehot_training_labels.append(l) for l in equal_count_way_list] # once we have 100 test_images, train on a mini batch if len(training_images) >= 100: # continue training the model with the new data set model = train_with_data( onehot_training_labels, training_images, neural_net_type, bands, tile_size, number_of_epochs, model ) training_images = [] onehot_training_labels = [] save_model(model, neural_net_type, bands, tile_size) return model
def main(): parser = create_parser() args = parser.parse_args() NAIP_STATE, NAIP_YEAR = args.naip_path naiper = NAIPDownloader(args.number_of_naips, args.randomize_naips, NAIP_STATE, NAIP_YEAR) raster_data_paths = naiper.download_naips() road_labels, naip_tiles, waymap = random_training_data( raster_data_paths, args.extract_type, args.bands, args.tile_size, args.pixels_to_fatten_roads, args.label_data_files, args.tile_overlap) equal_count_way_list, equal_count_tile_list = equalize_data(road_labels, naip_tiles, args.save_clippings) test_labels, training_labels, test_images, training_images = split_train_test( equal_count_tile_list, equal_count_way_list, args.percent_for_training_data) onehot_training_labels, onehot_test_labels = format_as_onehot_arrays( waymap.extracter.types, training_labels, test_labels) dump_data_to_disk(raster_data_paths, training_images, training_labels, test_images, test_labels, waymap.extracter.types, onehot_training_labels, onehot_test_labels)