예제 #1
0
 def test_class_match_segmentation(self):
     """Test class match function for segmentation problems"""
     ml_type = 'segmentation'
     class_index = 2
     passing = np.ones((256, 256), dtype=np.int) * 2
     failing = np.ones((256, 256), dtype=np.int)
     self.assertTrue(class_match(ml_type, passing, class_index))
     self.assertFalse(class_match(ml_type, failing, class_index))
예제 #2
0
 def test_class_match_object(self):
     """Test class match function for object detection problems"""
     ml_type = 'object-detection'
     class_index = 2
     passing = np.array([[0, 0, 0, 0, 2]])
     failing = np.array([[0, 0, 0, 0, 1]])
     self.assertTrue(class_match(ml_type, passing, class_index))
     self.assertFalse(class_match(ml_type, failing, class_index))
예제 #3
0
 def test_class_match_classification(self):
     """Test class match function for classification problems"""
     ml_type = 'classification'
     class_index = 2
     passing = np.array([0, 0, 1])
     failing = np.array([0, 1, 0])
     self.assertTrue(class_match(ml_type, passing, class_index))
     self.assertFalse(class_match(ml_type, failing, class_index))
예제 #4
0
def _tile_results_summary(ml_type, classes):
    print('---')
    labels = list(tile_results.values())
    all_tiles = list(tile_results.keys())
    if ml_type == 'object-detection':
        # for each class, show number of features and number of tiles
        for i, cl in enumerate(classes):
            cl_features = len(
                [bb for l in labels for bb in l if bb[4] == i + 1])
            cl_tiles = len([
                l for l in labels if len(list(filter(_bbox_class(i + 1), l)))
            ])  # pylint: disable=cell-var-from-loop
            print('{}: {} features in {} tiles'.format(cl.get('name'),
                                                       cl_features, cl_tiles))
    elif ml_type == 'classification':
        class_tile_counts = list(np.sum(labels, axis=0))
        for i, cl in enumerate(classes):
            print('{}: {} tiles'.format(cl.get('name'),
                                        int(class_tile_counts[i + 1])))
    elif ml_type == 'segmentation':
        for i, cl in enumerate(classes):
            count = len([l for l in labels if class_match(ml_type, l, i + 1)])
            print('{}: {} tiles'.format(cl.get('name'), count))

    print('Total tiles: {}'.format(len(all_tiles)))
예제 #5
0
def make_labels(dest_folder, zoom, country, classes, ml_type, bounding_box,
                sparse, **kwargs):
    """Create label data from OSM QA tiles for specified classes

    Perform the following operations:
    - If necessary, re-tile OSM QA Tiles to the specified zoom level
    - Iterate over all tiles within the bounding box and produce a label for each
    - Save the label file as labels.npz
    - Create an output for previewing the labels (GeoJSON or PNG depending upon ml_type)

    Parameters
    ------------
    dest_folder: str
        Folder to save labels and example tiles into
    zoom: int
        The zoom level to create tiles at
    country: str
        The OSM QA Tile extract to download. The value should be a country string matching a value found in
        `label_maker/countries.txt`
    classes: list
        A list of classes for machine learning training. Each class is defined as a dict
        with two required properties:
          - name: class name
          - filter: A Mapbox GL Filter.
        See the README for more details
    ml_type: str
        Defines the type of machine learning. One of "classification", "object-detection", or "segmentation"
    bounding_box: list
        The bounding box to create images from. This should be given in the form: `[xmin, ymin, xmax, ymax]`
        as longitude and latitude values between `[-180, 180]` and `[-90, 90]` respectively
    sparse: boolean
        Limit the total background tiles to write based on `background_ratio` kwarg.
    geojson: str
        Filepath to optional geojson label input
    **kwargs: dict
        Other properties from CLI config passed as keywords to other utility functions
    """

    mbtiles_file = op.join(dest_folder, '{}.mbtiles'.format(country))
    mbtiles_file_zoomed = op.join(dest_folder,
                                  '{}-z{!s}.mbtiles'.format(country, zoom))

    if not op.exists(mbtiles_file_zoomed):
        filtered_geo = kwargs.get('geojson') or op.join(
            dest_folder, '{}.geojson'.format(country))
        fast_parse = []
        if not op.exists(filtered_geo):
            fast_parse = ['-P']
            print('Retiling QA Tiles to zoom level {} (takes a bit)'.format(
                zoom))
            ps = Popen(['tippecanoe-decode', '-c', '-f', mbtiles_file],
                       stdout=PIPE)
            stream_filter_fpath = op.join(op.dirname(label_maker.__file__),
                                          'stream_filter.py')
            run([
                sys.executable, stream_filter_fpath,
                json.dumps(bounding_box)
            ],
                stdin=ps.stdout,
                stdout=open(filtered_geo, 'w'))
            ps.wait()
        run(['tippecanoe', '--no-feature-limit', '--no-tile-size-limit'] +
            fast_parse + [
                '-l', 'osm', '-f', '-z',
                str(zoom), '-Z',
                str(zoom), '-o', mbtiles_file_zoomed, filtered_geo
            ])

    # Call tilereduce
    print('Determining labels for each tile')
    mbtiles_to_reduce = mbtiles_file_zoomed
    tilereduce(
        dict(zoom=zoom,
             source=mbtiles_to_reduce,
             bbox=bounding_box,
             args=dict(ml_type=ml_type, classes=classes)), _mapper, _callback,
        _done)

    # Add empty labels to any tiles which didn't have data
    empty_label = _create_empty_label(ml_type, classes)
    for tile in tiles(*bounding_box, [zoom]):
        index = '-'.join([str(i) for i in tile])
        global tile_results
        if tile_results.get(index) is None:
            tile_results[index] = empty_label

    # Print a summary of the labels
    _tile_results_summary(ml_type, classes)

    # If the --sparse flag is provided, limit the total background tiles to write
    if sparse:
        pos_examples, neg_examples = [], []
        for k in tile_results.keys():
            # if we don't match any class, this is a negative example
            if not sum([
                    class_match(ml_type, tile_results[k], i + 1)
                    for i, c in enumerate(classes)
            ]):
                neg_examples.append(k)
            else:
                pos_examples.append(k)

        # Choose random subset of negative examples
        n_neg_ex = int(kwargs['background_ratio'] * len(pos_examples))
        neg_examples = np.random.choice(neg_examples, n_neg_ex,
                                        replace=False).tolist()

        tile_results = {
            k: tile_results.get(k)
            for k in pos_examples + neg_examples
        }
        print('Using sparse mode; subselected {} background tiles'.format(
            n_neg_ex))

    # write out labels as numpy arrays
    labels_file = op.join(dest_folder, 'labels.npz')
    print('Writing out labels to {}'.format(labels_file))
    np.savez(labels_file, **tile_results)

    # write out labels as GeoJSON or PNG
    if ml_type == 'classification':
        features = []
        for tile, label in tile_results.items():
            feat = feature(Tile(*[int(t) for t in tile.split('-')]))
            features.append(
                Feature(geometry=feat['geometry'],
                        properties=dict(label=label.tolist())))
        json.dump(fc(features),
                  open(op.join(dest_folder, 'classification.geojson'), 'w'))
    elif ml_type == 'object-detection':
        label_folder = op.join(dest_folder, 'labels')
        if not op.isdir(label_folder):
            makedirs(label_folder)
        for tile, label in tile_results.items():
            # if we have at least one bounding box label
            if bool(label.shape[0]):
                label_file = '{}.png'.format(tile)
                img = Image.new('RGB', (256, 256))
                draw = ImageDraw.Draw(img)
                for box in label:
                    draw.rectangle(((box[0], box[1]), (box[2], box[3])),
                                   outline=class_color(box[4]))
                print('Writing {}'.format(label_file))
                img.save(op.join(label_folder, label_file))
    elif ml_type == 'segmentation':
        label_folder = op.join(dest_folder, 'labels')
        if not op.isdir(label_folder):
            makedirs(label_folder)
        for tile, label in tile_results.items():
            # if we have any class pixels
            if np.sum(label):
                label_file = '{}.png'.format(tile)
                visible_label = np.array([
                    class_color(l) for l in np.nditer(label)
                ]).reshape(256, 256, 3)
                img = Image.fromarray(visible_label.astype(np.uint8))
                print('Writing {}'.format(label_file))
                img.save(op.join(label_folder, label_file))
예제 #6
0
def preview(dest_folder, number, classes, imagery, ml_type, imagery_offset,
            **kwargs):
    """Produce imagery examples for specified classes

    Parameters
    ------------
    dest_folder: str
        Folder to save labels and example tiles into
    number: int
        Number of preview images to download per class
    classes: list
        A list of classes for machine learning training. Each class is defined as a dict
        with two required properties:
          - name: class name
          - filter: A Mapbox GL Filter.
        See the README for more details
    imagery: str
        Imagery template to download satellite images from.
        Ex: http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN
    ml_type: str
        Defines the type of machine learning. One of "classification", "object-detection", or "segmentation"
    imagery_offset: list
        An optional list of integers representing the number of pixels to offset imagery. Ex. [15, -5] will
        move the images 15 pixels right and 5 pixels up relative to the requested tile bounds
    **kwargs: dict
        Other properties from CLI config passed as keywords to other utility functions
    """
    # open labels file
    labels_file = op.join(dest_folder, 'labels.npz')
    tiles = np.load(labels_file)

    # create example tiles directory
    examples_dir = op.join(dest_folder, 'examples')
    if not op.isdir(examples_dir):
        makedirs(examples_dir)

    # find examples tiles for each class and download
    print('Writing example images to {}'.format(examples_dir))

    # get image acquisition function based on imagery string
    image_function = download_tile_tms
    if is_tif(imagery):
        image_function = get_tile_tif

    for i, cl in enumerate(classes):
        # create class directory
        class_dir = op.join(dest_folder, 'examples', cl.get('name'))
        if not op.isdir(class_dir):
            makedirs(class_dir)

        class_tiles = (t for t in tiles.files
                       if class_match(ml_type, tiles[t], i + 1))
        print('Downloading at most {} tiles for class {}'.format(
            number, cl.get('name')))
        for n, tile in enumerate(class_tiles):
            if n > number:
                break

            tile_img = image_function(tile, imagery, class_dir, imagery_offset)

            if ml_type == 'object-detection':
                img = Image.open(tile_img)
                draw = ImageDraw.Draw(img)
                for box in tiles[tile]:
                    draw.rectangle(((box[0], box[1]), (box[2], box[3])),
                                   outline='red')
                img.save(tile_img)
            elif ml_type == 'segmentation':
                final = Image.new('RGB', (256, 256))
                img = Image.open(tile_img)
                mask = Image.fromarray(tiles[tile] * 255)
                final.paste(img, mask)
                final.save(tile_img)
예제 #7
0
def preview(dest_folder, number, classes, imagery, ml_type, **kwargs):
    """Produce imagery examples for specified classes

    Parameters
    ------------
    dest_folder: str
        Folder to save labels and example tiles into
    number: int
        Number of preview images to download per class
    classes: list
        A list of classes for machine learning training. Each class is defined as a dict
        with two required properties:
          - name: class name
          - filter: A Mapbox GL Filter.
        See the README for more details
    imagery: str
        Imagery template to download satellite images from.
        Ex: http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN
    ml_type: str
        Defines the type of machine learning. One of "classification", "object-detection", or "segmentation"
    **kwargs: dict
        Other properties from CLI config passed as keywords to other utility functions
    """
    # open labels file
    labels_file = op.join(dest_folder, 'labels.npz')
    tiles = np.load(labels_file)

    # create example tiles directory
    examples_dir = op.join(dest_folder, 'examples')
    if not op.isdir(examples_dir):
        makedirs(examples_dir)

    # find examples tiles for each class and download
    print('Writing example images to {}'.format(examples_dir))
    o = urlparse(imagery)
    _, image_format = op.splitext(o.path)
    for i, cl in enumerate(classes):
        # create class directory
        class_dir = op.join(dest_folder, 'examples', cl.get('name'))
        if not op.isdir(class_dir):
            makedirs(class_dir)

        class_tiles = [t for t in tiles.files if class_match(ml_type, tiles[t], i + 1)]
        class_tiles = class_tiles[:number]
        print('Downloading {} tiles for class {}'.format(len(class_tiles), cl.get('name')))
        for tile in class_tiles:
            r = requests.get(url(tile.split('-'), imagery))
            tile_img = op.join(dest_folder, 'examples', cl.get('name'),
                               '{}{}'.format(tile, image_format))
            open(tile_img, 'wb').write(r.content)
            if ml_type == 'object-detection':
                img = Image.open(tile_img)
                draw = ImageDraw.Draw(img)
                for box in tiles[tile]:
                    draw.rectangle(((box[0], box[1]), (box[2], box[3])), outline='red')
                img.save(tile_img)
            elif ml_type == 'segmentation':
                final = Image.new('RGB', (256, 256))
                img = Image.open(tile_img)
                mask = Image.fromarray(tiles[tile] * 255)
                final.paste(img, mask)
                final.save(tile_img)