def make_labels(dest_folder, zoom, country, classes, ml_type, bounding_box, sparse, **kwargs): """Create label data from OSM QA tiles for specified classes Perform the following operations: - If necessary, re-tile OSM QA Tiles to the specified zoom level - Iterate over all tiles within the bounding box and produce a label for each - Save the label file as labels.npz - Create an output for previewing the labels (GeoJSON or PNG depending upon ml_type) Parameters ------------ dest_folder: str Folder to save labels and example tiles into zoom: int The zoom level to create tiles at country: str The OSM QA Tile extract to download. The value should be a country string matching a value found in `label_maker/countries.txt` classes: list A list of classes for machine learning training. Each class is defined as a dict with two required properties: - name: class name - filter: A Mapbox GL Filter. See the README for more details ml_type: str Defines the type of machine learning. One of "classification", "object-detection", or "segmentation" bounding_box: list The bounding box to create images from. This should be given in the form: `[xmin, ymin, xmax, ymax]` as longitude and latitude values between `[-180, 180]` and `[-90, 90]` respectively sparse: boolean Limit the total background tiles to write based on `background_ratio` kwarg. geojson: str Filepath to optional geojson label input **kwargs: dict Other properties from CLI config passed as keywords to other utility functions """ mbtiles_file = op.join(dest_folder, '{}.mbtiles'.format(country)) mbtiles_file_zoomed = op.join(dest_folder, '{}-z{!s}.mbtiles'.format(country, zoom)) if not op.exists(mbtiles_file_zoomed): filtered_geo = kwargs.get('geojson') or op.join( dest_folder, '{}.geojson'.format(country)) fast_parse = [] if not op.exists(filtered_geo): fast_parse = ['-P'] print('Retiling QA Tiles to zoom level {} (takes a bit)'.format( zoom)) ps = Popen(['tippecanoe-decode', '-c', '-f', mbtiles_file], stdout=PIPE) stream_filter_fpath = op.join(op.dirname(label_maker.__file__), 'stream_filter.py') run([ sys.executable, stream_filter_fpath, json.dumps(bounding_box) ], stdin=ps.stdout, stdout=open(filtered_geo, 'w')) ps.wait() run(['tippecanoe', '--no-feature-limit', '--no-tile-size-limit'] + fast_parse + [ '-l', 'osm', '-f', '-z', str(zoom), '-Z', str(zoom), '-o', mbtiles_file_zoomed, filtered_geo ]) # Call tilereduce print('Determining labels for each tile') mbtiles_to_reduce = mbtiles_file_zoomed tilereduce( dict(zoom=zoom, source=mbtiles_to_reduce, bbox=bounding_box, args=dict(ml_type=ml_type, classes=classes)), _mapper, _callback, _done) # Add empty labels to any tiles which didn't have data empty_label = _create_empty_label(ml_type, classes) for tile in tiles(*bounding_box, [zoom]): index = '-'.join([str(i) for i in tile]) global tile_results if tile_results.get(index) is None: tile_results[index] = empty_label # Print a summary of the labels _tile_results_summary(ml_type, classes) # If the --sparse flag is provided, limit the total background tiles to write if sparse: pos_examples, neg_examples = [], [] for k in tile_results.keys(): # if we don't match any class, this is a negative example if not sum([ class_match(ml_type, tile_results[k], i + 1) for i, c in enumerate(classes) ]): neg_examples.append(k) else: pos_examples.append(k) # Choose random subset of negative examples n_neg_ex = int(kwargs['background_ratio'] * len(pos_examples)) neg_examples = np.random.choice(neg_examples, n_neg_ex, replace=False).tolist() tile_results = { k: tile_results.get(k) for k in pos_examples + neg_examples } print('Using sparse mode; subselected {} background tiles'.format( n_neg_ex)) # write out labels as numpy arrays labels_file = op.join(dest_folder, 'labels.npz') print('Writing out labels to {}'.format(labels_file)) np.savez(labels_file, **tile_results) # write out labels as GeoJSON or PNG if ml_type == 'classification': features = [] for tile, label in tile_results.items(): feat = feature(Tile(*[int(t) for t in tile.split('-')])) features.append( Feature(geometry=feat['geometry'], properties=dict(label=label.tolist()))) json.dump(fc(features), open(op.join(dest_folder, 'classification.geojson'), 'w')) elif ml_type == 'object-detection': label_folder = op.join(dest_folder, 'labels') if not op.isdir(label_folder): makedirs(label_folder) for tile, label in tile_results.items(): # if we have at least one bounding box label if bool(label.shape[0]): label_file = '{}.png'.format(tile) img = Image.new('RGB', (256, 256)) draw = ImageDraw.Draw(img) for box in label: draw.rectangle(((box[0], box[1]), (box[2], box[3])), outline=class_color(box[4])) print('Writing {}'.format(label_file)) img.save(op.join(label_folder, label_file)) elif ml_type == 'segmentation': label_folder = op.join(dest_folder, 'labels') if not op.isdir(label_folder): makedirs(label_folder) for tile, label in tile_results.items(): # if we have any class pixels if np.sum(label): label_file = '{}.png'.format(tile) visible_label = np.array([ class_color(l) for l in np.nditer(label) ]).reshape(256, 256, 3) img = Image.fromarray(visible_label.astype(np.uint8)) print('Writing {}'.format(label_file)) img.save(op.join(label_folder, label_file))
def make_labels(dest_folder, zoom, country, classes, ml_type, bounding_box, **kwargs): """Create label data from OSM QA tiles for specified classes Perform the following operations: - If necessary, re-tile OSM QA Tiles to the specified zoom level - Iterate over all tiles within the bounding box and produce a label for each - Save the label file as labels.npz - Create an output for previewing the labels (GeoJSON or PNG depending upon ml_type) Parameters ------------ dest_folder: str Folder to save labels and example tiles into zoom: int The zoom level to create tiles at classes: list A list of classes for machine learning training. Each class is defined as a dict with two required properties: - name: class name - filter: A Mapbox GL Filter. See the README for more details imagery: str Imagery template to download satellite images from. Ex: http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN ml_type: str Defines the type of machine learning. One of "classification", "object-detection", or "segmentation" bounding_box: list The bounding box to create images from. This should be given in the form: `[xmin, ymin, xmax, ymax]` as longitude and latitude values between `[-180, 180]` and `[-90, 90]` respectively **kwargs: dict Other properties from CLI config passed as keywords to other utility functions """ mbtiles_file = op.join(dest_folder, '{}.mbtiles'.format(country)) mbtiles_file_zoomed = op.join(dest_folder, '{}-z{!s}.mbtiles'.format(country, zoom)) if not op.exists(mbtiles_file_zoomed): print('Retiling QA Tiles to zoom level {} (takes a bit)'.format(zoom)) filtered_geo = op.join(dest_folder, '{}.geojson'.format(country)) ps = Popen(['tippecanoe-decode', '-c', '-f', mbtiles_file], stdout=PIPE) run([ 'python', 'label_maker/stream_filter.py', json.dumps(bounding_box) ], stdin=ps.stdout, stdout=open(filtered_geo, 'w')) ps.wait() run([ 'tippecanoe', '--no-feature-limit', '--no-tile-size-limit', '-P', '-l', 'osm', '-f', '-z', str(zoom), '-Z', str(zoom), '-o', mbtiles_file_zoomed, filtered_geo ]) # Call tilereduce print('Determining labels for each tile') mbtiles_to_reduce = mbtiles_file_zoomed tilereduce( dict(zoom=zoom, source=mbtiles_to_reduce, bbox=bounding_box, args=dict(ml_type=ml_type, classes=classes)), _mapper, _callback, _done) # Add empty labels to any tiles which didn't have data empty_label = _create_empty_label(ml_type, classes) for tile in tiles(*bounding_box, [zoom]): index = '-'.join([str(i) for i in tile]) if tile_results.get(index) is None: tile_results[index] = empty_label # Print a summary of the labels _tile_results_summary(ml_type, classes) # write out labels as numpy arrays labels_file = op.join(dest_folder, 'labels.npz') print('Write out labels to {}'.format(labels_file)) np.savez(labels_file, **tile_results) # write out labels as GeoJSON or PNG if ml_type == 'classification': features = [] for tile, label in tile_results.items(): feat = feature(Tile(*[int(t) for t in tile.split('-')])) features.append( Feature(geometry=feat['geometry'], properties=dict(label=label.tolist()))) json.dump(fc(features), open(op.join(dest_folder, 'classification.geojson'), 'w')) elif ml_type == 'object-detection': label_folder = op.join(dest_folder, 'labels') if not op.isdir(label_folder): makedirs(label_folder) for tile, label in tile_results.items(): # if we have at least one bounding box label if bool(label.shape[0]): label_file = '{}.png'.format(tile) img = Image.new('RGB', (256, 256)) draw = ImageDraw.Draw(img) for box in label: draw.rectangle(((box[0], box[1]), (box[2], box[3])), outline='red') print('Writing {}'.format(label_file)) img.save(op.join(label_folder, label_file)) elif ml_type == 'segmentation': label_folder = op.join(dest_folder, 'labels') if not op.isdir(label_folder): makedirs(label_folder) for tile, label in tile_results.items(): # if we have any class pixels if np.sum(label): label_file = '{}.png'.format(tile) img = Image.fromarray(label * 255) print('Writing {}'.format(label_file)) img.save(op.join(label_folder, label_file))