def test_class_match_segmentation(self): """Test class match function for segmentation problems""" ml_type = 'segmentation' class_index = 2 passing = np.ones((256, 256), dtype=np.int) * 2 failing = np.ones((256, 256), dtype=np.int) self.assertTrue(class_match(ml_type, passing, class_index)) self.assertFalse(class_match(ml_type, failing, class_index))
def test_class_match_object(self): """Test class match function for object detection problems""" ml_type = 'object-detection' class_index = 2 passing = np.array([[0, 0, 0, 0, 2]]) failing = np.array([[0, 0, 0, 0, 1]]) self.assertTrue(class_match(ml_type, passing, class_index)) self.assertFalse(class_match(ml_type, failing, class_index))
def test_class_match_classification(self): """Test class match function for classification problems""" ml_type = 'classification' class_index = 2 passing = np.array([0, 0, 1]) failing = np.array([0, 1, 0]) self.assertTrue(class_match(ml_type, passing, class_index)) self.assertFalse(class_match(ml_type, failing, class_index))
def _tile_results_summary(ml_type, classes): print('---') labels = list(tile_results.values()) all_tiles = list(tile_results.keys()) if ml_type == 'object-detection': # for each class, show number of features and number of tiles for i, cl in enumerate(classes): cl_features = len( [bb for l in labels for bb in l if bb[4] == i + 1]) cl_tiles = len([ l for l in labels if len(list(filter(_bbox_class(i + 1), l))) ]) # pylint: disable=cell-var-from-loop print('{}: {} features in {} tiles'.format(cl.get('name'), cl_features, cl_tiles)) elif ml_type == 'classification': class_tile_counts = list(np.sum(labels, axis=0)) for i, cl in enumerate(classes): print('{}: {} tiles'.format(cl.get('name'), int(class_tile_counts[i + 1]))) elif ml_type == 'segmentation': for i, cl in enumerate(classes): count = len([l for l in labels if class_match(ml_type, l, i + 1)]) print('{}: {} tiles'.format(cl.get('name'), count)) print('Total tiles: {}'.format(len(all_tiles)))
def make_labels(dest_folder, zoom, country, classes, ml_type, bounding_box, sparse, **kwargs): """Create label data from OSM QA tiles for specified classes Perform the following operations: - If necessary, re-tile OSM QA Tiles to the specified zoom level - Iterate over all tiles within the bounding box and produce a label for each - Save the label file as labels.npz - Create an output for previewing the labels (GeoJSON or PNG depending upon ml_type) Parameters ------------ dest_folder: str Folder to save labels and example tiles into zoom: int The zoom level to create tiles at country: str The OSM QA Tile extract to download. The value should be a country string matching a value found in `label_maker/countries.txt` classes: list A list of classes for machine learning training. Each class is defined as a dict with two required properties: - name: class name - filter: A Mapbox GL Filter. See the README for more details ml_type: str Defines the type of machine learning. One of "classification", "object-detection", or "segmentation" bounding_box: list The bounding box to create images from. This should be given in the form: `[xmin, ymin, xmax, ymax]` as longitude and latitude values between `[-180, 180]` and `[-90, 90]` respectively sparse: boolean Limit the total background tiles to write based on `background_ratio` kwarg. geojson: str Filepath to optional geojson label input **kwargs: dict Other properties from CLI config passed as keywords to other utility functions """ mbtiles_file = op.join(dest_folder, '{}.mbtiles'.format(country)) mbtiles_file_zoomed = op.join(dest_folder, '{}-z{!s}.mbtiles'.format(country, zoom)) if not op.exists(mbtiles_file_zoomed): filtered_geo = kwargs.get('geojson') or op.join( dest_folder, '{}.geojson'.format(country)) fast_parse = [] if not op.exists(filtered_geo): fast_parse = ['-P'] print('Retiling QA Tiles to zoom level {} (takes a bit)'.format( zoom)) ps = Popen(['tippecanoe-decode', '-c', '-f', mbtiles_file], stdout=PIPE) stream_filter_fpath = op.join(op.dirname(label_maker.__file__), 'stream_filter.py') run([ sys.executable, stream_filter_fpath, json.dumps(bounding_box) ], stdin=ps.stdout, stdout=open(filtered_geo, 'w')) ps.wait() run(['tippecanoe', '--no-feature-limit', '--no-tile-size-limit'] + fast_parse + [ '-l', 'osm', '-f', '-z', str(zoom), '-Z', str(zoom), '-o', mbtiles_file_zoomed, filtered_geo ]) # Call tilereduce print('Determining labels for each tile') mbtiles_to_reduce = mbtiles_file_zoomed tilereduce( dict(zoom=zoom, source=mbtiles_to_reduce, bbox=bounding_box, args=dict(ml_type=ml_type, classes=classes)), _mapper, _callback, _done) # Add empty labels to any tiles which didn't have data empty_label = _create_empty_label(ml_type, classes) for tile in tiles(*bounding_box, [zoom]): index = '-'.join([str(i) for i in tile]) global tile_results if tile_results.get(index) is None: tile_results[index] = empty_label # Print a summary of the labels _tile_results_summary(ml_type, classes) # If the --sparse flag is provided, limit the total background tiles to write if sparse: pos_examples, neg_examples = [], [] for k in tile_results.keys(): # if we don't match any class, this is a negative example if not sum([ class_match(ml_type, tile_results[k], i + 1) for i, c in enumerate(classes) ]): neg_examples.append(k) else: pos_examples.append(k) # Choose random subset of negative examples n_neg_ex = int(kwargs['background_ratio'] * len(pos_examples)) neg_examples = np.random.choice(neg_examples, n_neg_ex, replace=False).tolist() tile_results = { k: tile_results.get(k) for k in pos_examples + neg_examples } print('Using sparse mode; subselected {} background tiles'.format( n_neg_ex)) # write out labels as numpy arrays labels_file = op.join(dest_folder, 'labels.npz') print('Writing out labels to {}'.format(labels_file)) np.savez(labels_file, **tile_results) # write out labels as GeoJSON or PNG if ml_type == 'classification': features = [] for tile, label in tile_results.items(): feat = feature(Tile(*[int(t) for t in tile.split('-')])) features.append( Feature(geometry=feat['geometry'], properties=dict(label=label.tolist()))) json.dump(fc(features), open(op.join(dest_folder, 'classification.geojson'), 'w')) elif ml_type == 'object-detection': label_folder = op.join(dest_folder, 'labels') if not op.isdir(label_folder): makedirs(label_folder) for tile, label in tile_results.items(): # if we have at least one bounding box label if bool(label.shape[0]): label_file = '{}.png'.format(tile) img = Image.new('RGB', (256, 256)) draw = ImageDraw.Draw(img) for box in label: draw.rectangle(((box[0], box[1]), (box[2], box[3])), outline=class_color(box[4])) print('Writing {}'.format(label_file)) img.save(op.join(label_folder, label_file)) elif ml_type == 'segmentation': label_folder = op.join(dest_folder, 'labels') if not op.isdir(label_folder): makedirs(label_folder) for tile, label in tile_results.items(): # if we have any class pixels if np.sum(label): label_file = '{}.png'.format(tile) visible_label = np.array([ class_color(l) for l in np.nditer(label) ]).reshape(256, 256, 3) img = Image.fromarray(visible_label.astype(np.uint8)) print('Writing {}'.format(label_file)) img.save(op.join(label_folder, label_file))
def preview(dest_folder, number, classes, imagery, ml_type, imagery_offset, **kwargs): """Produce imagery examples for specified classes Parameters ------------ dest_folder: str Folder to save labels and example tiles into number: int Number of preview images to download per class classes: list A list of classes for machine learning training. Each class is defined as a dict with two required properties: - name: class name - filter: A Mapbox GL Filter. See the README for more details imagery: str Imagery template to download satellite images from. Ex: http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN ml_type: str Defines the type of machine learning. One of "classification", "object-detection", or "segmentation" imagery_offset: list An optional list of integers representing the number of pixels to offset imagery. Ex. [15, -5] will move the images 15 pixels right and 5 pixels up relative to the requested tile bounds **kwargs: dict Other properties from CLI config passed as keywords to other utility functions """ # open labels file labels_file = op.join(dest_folder, 'labels.npz') tiles = np.load(labels_file) # create example tiles directory examples_dir = op.join(dest_folder, 'examples') if not op.isdir(examples_dir): makedirs(examples_dir) # find examples tiles for each class and download print('Writing example images to {}'.format(examples_dir)) # get image acquisition function based on imagery string image_function = download_tile_tms if is_tif(imagery): image_function = get_tile_tif for i, cl in enumerate(classes): # create class directory class_dir = op.join(dest_folder, 'examples', cl.get('name')) if not op.isdir(class_dir): makedirs(class_dir) class_tiles = (t for t in tiles.files if class_match(ml_type, tiles[t], i + 1)) print('Downloading at most {} tiles for class {}'.format( number, cl.get('name'))) for n, tile in enumerate(class_tiles): if n > number: break tile_img = image_function(tile, imagery, class_dir, imagery_offset) if ml_type == 'object-detection': img = Image.open(tile_img) draw = ImageDraw.Draw(img) for box in tiles[tile]: draw.rectangle(((box[0], box[1]), (box[2], box[3])), outline='red') img.save(tile_img) elif ml_type == 'segmentation': final = Image.new('RGB', (256, 256)) img = Image.open(tile_img) mask = Image.fromarray(tiles[tile] * 255) final.paste(img, mask) final.save(tile_img)
def preview(dest_folder, number, classes, imagery, ml_type, **kwargs): """Produce imagery examples for specified classes Parameters ------------ dest_folder: str Folder to save labels and example tiles into number: int Number of preview images to download per class classes: list A list of classes for machine learning training. Each class is defined as a dict with two required properties: - name: class name - filter: A Mapbox GL Filter. See the README for more details imagery: str Imagery template to download satellite images from. Ex: http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN ml_type: str Defines the type of machine learning. One of "classification", "object-detection", or "segmentation" **kwargs: dict Other properties from CLI config passed as keywords to other utility functions """ # open labels file labels_file = op.join(dest_folder, 'labels.npz') tiles = np.load(labels_file) # create example tiles directory examples_dir = op.join(dest_folder, 'examples') if not op.isdir(examples_dir): makedirs(examples_dir) # find examples tiles for each class and download print('Writing example images to {}'.format(examples_dir)) o = urlparse(imagery) _, image_format = op.splitext(o.path) for i, cl in enumerate(classes): # create class directory class_dir = op.join(dest_folder, 'examples', cl.get('name')) if not op.isdir(class_dir): makedirs(class_dir) class_tiles = [t for t in tiles.files if class_match(ml_type, tiles[t], i + 1)] class_tiles = class_tiles[:number] print('Downloading {} tiles for class {}'.format(len(class_tiles), cl.get('name'))) for tile in class_tiles: r = requests.get(url(tile.split('-'), imagery)) tile_img = op.join(dest_folder, 'examples', cl.get('name'), '{}{}'.format(tile, image_format)) open(tile_img, 'wb').write(r.content) if ml_type == 'object-detection': img = Image.open(tile_img) draw = ImageDraw.Draw(img) for box in tiles[tile]: draw.rectangle(((box[0], box[1]), (box[2], box[3])), outline='red') img.save(tile_img) elif ml_type == 'segmentation': final = Image.new('RGB', (256, 256)) img = Image.open(tile_img) mask = Image.fromarray(tiles[tile] * 255) final.paste(img, mask) final.save(tile_img)