Beispiel #1
0
def _upload_to_zoo(exp_cfg: dict, collect_dir: str, upload_dir: str) -> None:
    src_uris = {}
    dst_uris = {}

    src_uris['eval'] = join(collect_dir, 'eval', 'eval.json')
    src_uris['bundle'] = join(collect_dir, 'bundle', 'model-bundle.zip')
    src_uris['sample_predictions'] = join(collect_dir, 'sample-predictions')

    dst_uris['eval'] = join(upload_dir, 'eval.json')
    dst_uris['bundle'] = join(upload_dir, 'model-bundle.zip')
    dst_uris['sample_predictions'] = join(upload_dir, 'sample-predictions')

    assert len(src_uris) == len(dst_uris)

    for k, src in src_uris.items():
        dst = dst_uris[k]
        if not exists(src):
            console_failure(f'{k}: {src} not found.')
        if isfile(src):
            console_info(f'Uploading {k} file: {src} to {dst}.')
            upload_or_copy(src, dst)
        elif isdir(src):
            console_info(f'Syncing {k} dir: {src} to {dst}.')
            sync_to_dir(src, dst)
        else:
            raise ValueError()
    def test_download_if_needed_local(self):
        with self.assertRaises(NotReadableError):
            file_to_str(self.local_path)

        str_to_file(self.content_str, self.local_path)
        upload_or_copy(self.local_path, self.local_path)
        local_path = download_if_needed(self.local_path, self.tmp_dir.name)
        self.assertEqual(local_path, self.local_path)
    def test_file_exists_s3_true(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        s3_path = 's3://{}/lorem.txt'.format(self.bucket_name)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_path))
    def test_copy_to_local(self):
        path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        path2 = os.path.join(self.tmp_dir.name, 'yyy', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        dir2 = os.path.dirname(path2)
        make_dir(dir1, check_empty=False)
        make_dir(dir2, check_empty=False)

        str_to_file(self.lorem, path1)

        upload_or_copy(path1, path2)
        self.assertEqual(len(list_paths(dir2)), 1)
    def test_list_paths_s3(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        list_paths(s3_directory)
        self.assertEqual(len(list_paths(s3_directory)), 1)
    def test_last_modified_s3(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt')
        s3_path = 's3://{}/lorem1.txt'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        fs = FileSystem.get_file_system(s3_path, 'r')

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)
        stamp = fs.last_modified(s3_path)

        self.assertTrue(isinstance(stamp, datetime.datetime))
    def test_download_if_needed_s3(self):
        with self.assertRaises(NotReadableError):
            file_to_str(self.s3_path)

        str_to_file(self.content_str, self.local_path)
        upload_or_copy(self.local_path, self.s3_path)
        local_path = download_if_needed(self.s3_path, self.tmp_dir.name)
        content_str = file_to_str(local_path)
        self.assertEqual(self.content_str, content_str)

        wrong_path = 's3://wrongpath/x.txt'
        with self.assertRaises(NotWritableError):
            upload_or_copy(local_path, wrong_path)
Beispiel #8
0
    def __exit__(self, type, value, traceback):
        """
        This writes a zip file for a group of scenes at {output_uri}/{uuid}.zip.

        This method is called once per instance of the chip command.
        A number of instances of the chip command can run simultaneously to
        process chips in parallel. The uuid in the zip path above is what allows
        separate instances to avoid overwriting each others' output.
        """
        output_path = join(self.tmp_dir_obj.name, 'output.zip')
        zipdir(self.sample_dir, output_path)
        upload_or_copy(output_path, self.output_uri)
        self.tmp_dir_obj.cleanup()
    def test_file_exists(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_path_prefix = 's3://{}/xxx/lorem'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        make_dir(path, check_empty=False, use_dirname=True)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_directory, include_dir=True))
        self.assertTrue(file_exists(s3_path, include_dir=False))
        self.assertFalse(file_exists(s3_path_prefix, include_dir=True))
        self.assertFalse(file_exists(s3_directory, include_dir=False))
        self.assertFalse(
            file_exists(s3_directory + 'NOTPOSSIBLE', include_dir=False))
Beispiel #10
0
def crop_image(image_uri, window, crop_uri):
    im_dataset = rasterio.open(image_uri)
    rasterio_window = window.rasterio_format()
    im = im_dataset.read(window=rasterio_window)

    with TemporaryDirectory() as tmp_dir:
        crop_path = get_local_path(crop_uri, tmp_dir)
        make_dir(crop_path, use_dirname=True)

        meta = im_dataset.meta
        meta['width'], meta['height'] = window.get_width(), window.get_height()
        meta['transform'] = rasterio.windows.transform(rasterio_window,
                                                       im_dataset.transform)

        with rasterio.open(crop_path, 'w', **meta) as dst:
            dst.colorinterp = im_dataset.colorinterp
            dst.write(im)

        upload_or_copy(crop_path, crop_uri)
    def test_copy_to_http(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        dst = 'http://localhost/'
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertRaises(NotWritableError, lambda: upload_or_copy(path, dst))
        os.remove(path)
Beispiel #12
0
def create_cog(source_uri,
               dest_uri,
               local_dir,
               block_size=DEFAULT_BLOCK_SIZE,
               resample_method=DEFAULT_RESAMPLE_METHOD,
               compression=DEFAULT_COMPRESSION,
               overviews=None):
    local_path = download_or_copy(source_uri, local_dir)

    commands, output_path = gdal_cog_commands(
        local_path,
        local_dir,
        block_size=block_size,
        resample_method=resample_method,
        compression=compression,
        overviews=overviews)
    for command in commands:
        run_cmd(command)

    upload_or_copy(output_path, dest_uri)
Beispiel #13
0
    def write_vector_outputs(self, labels: SemanticSegmentationLabels) -> None:
        """Write vectorized outputs for all configs in self.vector_outputs."""
        import mask_to_polygons.vectorification as vectorification
        import mask_to_polygons.processing.denoise as denoise

        log.info('Writing vector output to disk.')

        label_arr = self._labels_to_full_label_arr(labels)
        with click.progressbar(self.vector_outputs) as bar:
            for i, vo in enumerate(bar):
                if vo.uri is None:
                    log.info(f'Skipping VectorOutputConfig at index {i} '
                             'due to missing uri.')
                    continue
                uri = get_local_path(vo.uri, self.tmp_dir)
                denoise_radius = vo.denoise
                mode = vo.get_mode()
                class_mask = (label_arr == vo.class_id).astype(np.uint8)

                def transform(x, y):
                    return self.crs_transformer.pixel_to_map((x, y))

                if denoise_radius > 0:
                    class_mask = denoise.denoise(class_mask, denoise_radius)

                if mode == 'buildings':
                    geojson = vectorification.geojson_from_mask(
                        mask=class_mask,
                        transform=transform,
                        mode=mode,
                        min_aspect_ratio=vo.min_aspect_ratio,
                        min_area=vo.min_area,
                        width_factor=vo.element_width_factor,
                        thickness=vo.element_thickness)
                elif mode == 'polygons':
                    geojson = vectorification.geojson_from_mask(
                        mask=class_mask, transform=transform, mode=mode)

                str_to_file(geojson, uri)
                upload_or_copy(uri, vo.uri)
    def save(self, labels):
        """Save.

        Args:
            labels - (SemanticSegmentationLabels) labels to be saved
        """
        local_path = get_local_path(self.uri, self.tmp_dir)
        make_dir(local_path, use_dirname=True)

        transform = self.crs_transformer.get_affine_transform()
        crs = self.crs_transformer.get_image_crs()

        band_count = 1
        dtype = np.uint8
        if self.class_trans:
            band_count = 3

        mask = (np.zeros((self.extent.ymax, self.extent.xmax), dtype=np.uint8)
                if self.vector_output else None)

        # https://github.com/mapbox/rasterio/blob/master/docs/quickstart.rst
        # https://rasterio.readthedocs.io/en/latest/topics/windowed-rw.html
        with rasterio.open(local_path,
                           'w',
                           driver='GTiff',
                           height=self.extent.ymax,
                           width=self.extent.xmax,
                           count=band_count,
                           dtype=dtype,
                           transform=transform,
                           crs=crs) as dataset:
            for window in labels.get_windows():
                label_arr = labels.get_label_arr(window)
                window = window.intersection(self.extent)
                label_arr = label_arr[0:window.get_height(),
                                      0:window.get_width()]

                if mask is not None:
                    mask[window.ymin:window.ymax,
                         window.xmin:window.xmax] = label_arr

                window = window.rasterio_format()
                if self.class_trans:
                    rgb_labels = self.class_trans.class_to_rgb(label_arr)
                    for chan in range(3):
                        dataset.write_band(chan + 1,
                                           rgb_labels[:, :, chan],
                                           window=window)
                else:
                    img = label_arr.astype(dtype)
                    dataset.write_band(1, img, window=window)

        upload_or_copy(local_path, self.uri)

        if self.vector_output:
            import mask_to_polygons.vectorification as vectorification
            import mask_to_polygons.processing.denoise as denoise

            for vo in self.vector_output:
                denoise_radius = vo.denoise
                uri = vo.uri
                mode = vo.get_mode()
                class_id = vo.class_id
                class_mask = np.array(mask == class_id, dtype=np.uint8)

                def transform(x, y):
                    return self.crs_transformer.pixel_to_map((x, y))

                if denoise_radius > 0:
                    class_mask = denoise.denoise(class_mask, denoise_radius)

                if uri and mode == 'buildings':
                    geojson = vectorification.geojson_from_mask(
                        mask=class_mask,
                        transform=transform,
                        mode=mode,
                        min_aspect_ratio=vo.min_aspect_ratio,
                        min_area=vo.min_area,
                        width_factor=vo.element_width_factor,
                        thickness=vo.element_thickness)
                elif uri and mode == 'polygons':
                    geojson = vectorification.geojson_from_mask(
                        mask=class_mask, transform=transform, mode=mode)
                str_to_file(geojson, uri)
Beispiel #15
0
def _zxy2geotiff(tile_schema, zoom, bounds, output_uri, make_cog=False):
    """Generates a GeoTIFF of a bounded region from a ZXY tile server.

    Args:
        tile_schema: (str) the URI schema for zxy tiles (ie. a slippy map tile server)
            of the form /tileserver-uri/{z}/{x}/{y}.png. If {-y} is used, the tiles
            are assumed to be indexed using TMS coordinates, where the y axis starts
            at the southernmost point. The URI can be for http, S3, or the local
            file system.
        zoom: (int) the zoom level to use when retrieving tiles
        bounds: (list) a list of length 4 containing min_lat, min_lng,
            max_lat, max_lng
        output_uri: (str) where to save the GeoTIFF. The URI can be for http, S3, or the
            local file system
    """
    min_lat, min_lng, max_lat, max_lng = bounds
    if min_lat >= max_lat:
        raise ValueError('min_lat must be < max_lat')
    if min_lng >= max_lng:
        raise ValueError('min_lng must be < max_lng')

    is_tms = False
    if '{-y}' in tile_schema:
        tile_schema = tile_schema.replace('{-y}', '{y}')
        is_tms = True

    tmp_dir_obj = tempfile.TemporaryDirectory()
    tmp_dir = tmp_dir_obj.name

    # Get range of tiles that cover bounds.
    output_path = get_local_path(output_uri, tmp_dir)
    tile_sz = 256
    t = mercantile.tile(min_lng, max_lat, zoom)
    xmin, ymin = t.x, t.y
    t = mercantile.tile(max_lng, min_lat, zoom)
    xmax, ymax = t.x, t.y

    # The supplied bounds are contained within the "tile bounds" -- ie. the
    # bounds of the set of tiles that covers the supplied bounds. Therefore,
    # we need to crop out the imagery that lies within the supplied bounds.
    # We do this by computing a top, bottom, left, and right offset in pixel
    # units of the supplied bounds against the tile bounds. Getting the offsets
    # in pixel units involves converting lng/lat to web mercator units since we
    # assume that is the CRS of the tiles. These offsets are then used to crop
    # individual tiles and place them correctly into the output raster.
    nw_merc_x, nw_merc_y = lnglat2merc(min_lng, max_lat)
    left_pix_offset, top_pix_offset = merc2pixel(xmin, ymin, zoom, nw_merc_x,
                                                 nw_merc_y)

    se_merc_x, se_merc_y = lnglat2merc(max_lng, min_lat)
    se_left_pix_offset, se_top_pix_offset = merc2pixel(xmax, ymax, zoom,
                                                       se_merc_x, se_merc_y)
    right_pix_offset = tile_sz - se_left_pix_offset
    bottom_pix_offset = tile_sz - se_top_pix_offset

    uncropped_height = tile_sz * (ymax - ymin + 1)
    uncropped_width = tile_sz * (xmax - xmin + 1)
    height = uncropped_height - top_pix_offset - bottom_pix_offset
    width = uncropped_width - left_pix_offset - right_pix_offset

    transform = rasterio.transform.from_bounds(nw_merc_x, se_merc_y, se_merc_x,
                                               nw_merc_y, width, height)
    with rasterio.open(output_path,
                       'w',
                       driver='GTiff',
                       height=height,
                       width=width,
                       count=3,
                       crs='epsg:3857',
                       transform=transform,
                       dtype=rasterio.uint8) as dataset:
        out_x = 0
        for xi, x in enumerate(range(xmin, xmax + 1)):
            tile_xmin, tile_xmax = 0, tile_sz - 1
            if x == xmin:
                tile_xmin += left_pix_offset
            if x == xmax:
                tile_xmax -= right_pix_offset
            window_width = tile_xmax - tile_xmin + 1

            out_y = 0
            for yi, y in enumerate(range(ymin, ymax + 1)):
                tile_ymin, tile_ymax = 0, tile_sz - 1
                if y == ymin:
                    tile_ymin += top_pix_offset
                if y == ymax:
                    tile_ymax -= bottom_pix_offset
                window_height = tile_ymax - tile_ymin + 1

                # Convert from xyz to tms if needed.
                # https://gist.github.com/tmcw/4954720
                if is_tms:
                    y = (2**zoom) - y - 1
                tile_uri = tile_schema.format(x=x, y=y, z=zoom)
                tile_path = download_if_needed(tile_uri, tmp_dir)
                img = np.array(Image.open(tile_path))
                img = img[tile_ymin:tile_ymax + 1, tile_xmin:tile_xmax + 1, :]

                window = Window(out_x, out_y, window_width, window_height)
                dataset.write(np.transpose(img[:, :, 0:3], (2, 0, 1)),
                              window=window)
                out_y += window_height
            out_x += window_width

    if make_cog:
        create_cog(output_path, output_uri, tmp_dir)
    else:
        upload_or_copy(output_path, output_uri)