Ejemplo n.º 1
0
    def save_model_bundle(self):
        """Save a model bundle.

        This is a zip file with the model weights in .pth format and a serialized
        copy of the LearningConfig, which allows for making predictions in the future.
        """
        from rastervision.pytorch_learner.learner_pipeline_config import (
            LearnerPipelineConfig)

        log.info('Creating bundle.')
        model_bundle_dir = join(self.tmp_dir, 'model-bundle')
        make_dir(model_bundle_dir)

        shutil.copyfile(self.last_model_path,
                        join(model_bundle_dir, 'model.pth'))

        # copy modules into bundle
        if isdir(self.modules_dir):
            log.info('Copying modules into bundle.')
            bundle_modules_dir = join(model_bundle_dir, MODULES_DIRNAME)
            if isdir(bundle_modules_dir):
                shutil.rmtree(bundle_modules_dir)
            shutil.copytree(self.modules_dir, bundle_modules_dir)

        pipeline_cfg = LearnerPipelineConfig(learner=self.cfg)
        save_pipeline_config(pipeline_cfg,
                             join(model_bundle_dir, 'pipeline-config.json'))
        zipdir(model_bundle_dir, self.model_bundle_path)
Ejemplo n.º 2
0
 def setup_tensorboard(self):
     """Setup for logging stats to TB."""
     self.tb_writer = None
     if self.cfg.log_tensorboard:
         self.tb_log_dir = join(self.output_dir, 'tb-logs')
         make_dir(self.tb_log_dir)
         self.tb_writer = SummaryWriter(log_dir=self.tb_log_dir)
Ejemplo n.º 3
0
    def plot_batch(self, x: Tensor, y, output_path: str, z=None):
        """Plot a whole batch in a grid using plot_xyz.

        Args:
            x: batch of images
            y: ground truth labels
            output_path: local path where to save plot image
            z: optional predicted labels
        """
        batch_sz = x.shape[0]
        ncols = nrows = math.ceil(math.sqrt(batch_sz))
        fig = plt.figure(constrained_layout=True,
                         figsize=(3 * ncols, 3 * nrows))
        grid = gridspec.GridSpec(ncols=ncols, nrows=nrows, figure=fig)

        # (N, c, h, w) --> (N, h, w, c)
        x = x.permute(0, 2, 3, 1)

        # apply transform, if given
        if self.cfg.data.plot_options.transform is not None:
            tf = A.from_dict(self.cfg.data.plot_options.transform)
            x = tf(image=x.numpy())['image']
            x = torch.from_numpy(x)

        for i in range(batch_sz):
            ax = fig.add_subplot(grid[i])
            if z is None:
                self.plot_xyz(ax, x[i], y[i])
            else:
                self.plot_xyz(ax, x[i], y[i], z=z[i])

        make_dir(output_path, use_dirname=True)
        plt.savefig(output_path)
        plt.close()
Ejemplo n.º 4
0
    def plot_batch(self, x: Tensor, y, output_path: str, z=None):
        """Plot a whole batch in a grid using plot_xyz.

        Args:
            x: batch of images
            y: ground truth labels
            output_path: local path where to save plot image
            z: optional predicted labels
        """
        batch_sz = x.shape[0]
        ncols = nrows = math.ceil(math.sqrt(batch_sz))
        fig = plt.figure(constrained_layout=True,
                         figsize=(3 * ncols, 3 * nrows))
        grid = gridspec.GridSpec(ncols=ncols, nrows=nrows, figure=fig)

        for i in range(batch_sz):
            ax = fig.add_subplot(grid[i])
            if z is None:
                self.plot_xyz(ax, x[i], y[i])
            else:
                self.plot_xyz(ax, x[i], y[i], z=z[i])

        make_dir(output_path, use_dirname=True)
        plt.savefig(output_path)
        plt.close()
Ejemplo n.º 5
0
    def __enter__(self):
        self.tmp_dir_obj = tempfile.TemporaryDirectory(dir=self.tmp_dir)
        self.sample_dir = join(self.tmp_dir_obj.name, 'samples')
        make_dir(self.sample_dir)
        self.sample_ind = 0

        return self
Ejemplo n.º 6
0
def copy_split(split, image_paths, output_dir):
    split_dir = join(output_dir, split)
    make_dir(split_dir)
    for image_path in image_paths:
        dst_path = join(split_dir, *image_path.split('/')[-2:])
        make_dir(dst_path, use_dirname=True)
        shutil.copyfile(image_path, dst_path)
Ejemplo n.º 7
0
def collect_experiment(key, root_uri, output_dir, get_pred_package=False):
    print('\nCollecting experiment {}...\n'.format(key))

    if root_uri.startswith('s3://'):
        predict_package_uris = list_paths(join(root_uri, key, 'bundle'),
                                          ext='predict_package.zip')
        eval_json_uris = list_paths(join(root_uri, key, 'eval'),
                                    ext='eval.json')
    else:
        predict_package_uris = glob.glob(
            join(root_uri, key, 'bundle', '*', 'predict_package.zip'))
        eval_json_uris = glob.glob(
            join(root_uri, key, 'eval', '*', 'eval.json'))

    if len(predict_package_uris) > 1 or len(eval_json_uris) > 1:
        print('Cannot collect from key with multiple experiments!!!')
        return

    if len(predict_package_uris) == 0 or len(eval_json_uris) == 0:
        print('Missing output!!!')
        return

    predict_package_uri = predict_package_uris[0]
    eval_json_uri = eval_json_uris[0]
    make_dir(join(output_dir, key))
    if get_pred_package:
        download_or_copy(predict_package_uri, join(output_dir, key))

    download_or_copy(eval_json_uri, join(output_dir, key))

    eval_json = file_to_json(join(output_dir, key, 'eval.json'))
    pprint.pprint(eval_json['overall'], indent=4)
    def test_file_exists_local_true(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertTrue(file_exists(path))
    def test_check_empty(self):
        path = os.path.join(self.tmp_dir.name, 'hello', 'hello.txt')
        dir = os.path.dirname(path)
        str_to_file('hello', path)

        make_dir(dir, check_empty=False)
        with self.assertRaises(Exception):
            make_dir(dir, check_empty=True)
Ejemplo n.º 10
0
    def plot_batch(self,
                   x: torch.Tensor,
                   y: Union[torch.Tensor, np.ndarray],
                   output_path: str,
                   z: Optional[torch.Tensor] = None,
                   batch_limit: Optional[int] = None) -> None:
        """Plot a whole batch in a grid using plot_xyz.

        Args:
            x: batch of images
            y: ground truth labels
            output_path: local path where to save plot image
            z: optional predicted labels
            batch_limit: optional limit on (rendered) batch size
        """
        batch_sz, c, h, w = x.shape
        batch_sz = min(batch_sz,
                       batch_limit) if batch_limit is not None else batch_sz
        if batch_sz == 0:
            return

        channel_groups = self.cfg.data.channel_display_groups

        nrows = batch_sz
        # one col for each group + 1 for labels + 1 for predictions
        ncols = len(channel_groups) + 1
        if z is not None:
            ncols += 1

        fig, axes = plt.subplots(
            nrows=nrows,
            ncols=ncols,
            squeeze=False,
            constrained_layout=True,
            figsize=(3 * ncols, 3 * nrows))

        assert axes.shape == (nrows, ncols)

        # (N, c, h, w) --> (N, h, w, c)
        x = x.permute(0, 2, 3, 1)

        # apply transform, if given
        if self.cfg.data.plot_options.transform is not None:
            tf = A.from_dict(self.cfg.data.plot_options.transform)
            imgs = [tf(image=img)['image'] for img in x.numpy()]
            x = torch.from_numpy(np.stack(imgs))

        for i in range(batch_sz):
            ax = (fig, axes[i])
            if z is None:
                self.plot_xyz(ax, x[i], y[i])
            else:
                self.plot_xyz(ax, x[i], y[i], z=z[i])

        make_dir(output_path, use_dirname=True)
        plt.savefig(output_path, bbox_inches='tight')
        plt.close()
    def test_copy_to_http(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        dst = 'http://localhost/'
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertRaises(NotWritableError, lambda: upload_or_copy(path, dst))
        os.remove(path)
    def test_force_empty(self):
        path = os.path.join(self.tmp_dir.name, 'hello', 'hello.txt')
        dir = os.path.dirname(path)
        str_to_file('hello', path)

        make_dir(dir, force_empty=False)
        self.assertTrue(os.path.isfile(path))
        make_dir(dir, force_empty=True)
        is_empty = len(os.listdir(dir)) == 0
        self.assertTrue(is_empty)
    def test_sync_from_dir_noop_local(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        src = os.path.join(self.tmp_dir.name, 'lorem')
        make_dir(src, check_empty=False)

        fs = FileSystem.get_file_system(src, 'r')
        fs.write_bytes(path, bytes([0x00, 0x01]))
        sync_from_dir(src, src, delete=True)

        self.assertEqual(len(list_paths(src)), 1)
    def test_last_modified(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        fs = FileSystem.get_file_system(path, 'r')

        str_to_file(self.lorem, path)
        stamp = fs.last_modified(path)

        self.assertTrue(isinstance(stamp, datetime.datetime))
    def test_file_exists_s3_true(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        s3_path = 's3://{}/lorem.txt'.format(self.bucket_name)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_path))
    def test_list_paths_s3(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        list_paths(s3_directory)
        self.assertEqual(len(list_paths(s3_directory)), 1)
    def test_copy_to_local(self):
        path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        path2 = os.path.join(self.tmp_dir.name, 'yyy', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        dir2 = os.path.dirname(path2)
        make_dir(dir1, check_empty=False)
        make_dir(dir2, check_empty=False)

        str_to_file(self.lorem, path1)

        upload_or_copy(path1, path2)
        self.assertEqual(len(list_paths(dir2)), 1)
    def test_sync_to_dir_local(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        src = os.path.dirname(path)
        dst = os.path.join(self.tmp_dir.name, 'xxx')
        make_dir(src, check_empty=False)
        make_dir(dst, check_empty=False)

        fs = FileSystem.get_file_system(path, 'r')
        fs.write_bytes(path, bytes([0x00, 0x01]))
        sync_to_dir(src, dst, delete=True)

        self.assertEqual(len(list_paths(dst)), 1)
    def test_bytes_local(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        expected = bytes([0x00, 0x01, 0x02])
        fs = FileSystem.get_file_system(path, 'r')

        fs.write_bytes(path, expected)
        actual = fs.read_bytes(path)

        self.assertEqual(actual, expected)
    def test_last_modified_s3(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt')
        s3_path = 's3://{}/lorem1.txt'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        fs = FileSystem.get_file_system(s3_path, 'r')

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)
        stamp = fs.last_modified(s3_path)

        self.assertTrue(isinstance(stamp, datetime.datetime))
    def test_file_exists(self):
        fs = FileSystem.get_file_system(self.tmp_dir.name, 'r')

        path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        make_dir(dir1, check_empty=False)

        str_to_file(self.lorem, path1)

        self.assertTrue(fs.file_exists(dir1, include_dir=True))
        self.assertTrue(fs.file_exists(path1, include_dir=False))
        self.assertFalse(fs.file_exists(dir1, include_dir=False))
        self.assertFalse(
            fs.file_exists(dir1 + 'NOTPOSSIBLE', include_dir=False))
Ejemplo n.º 22
0
    def save_model_bundle(self):
        """Save a model bundle.

        This is a zip file with the model weights in .pth format and a serialized
        copy of the LearningConfig, which allows for making predictions in the future.
        """
        from rastervision.pytorch_learner.learner_pipeline_config import (
            LearnerPipelineConfig)
        model_bundle_dir = join(self.tmp_dir, 'model-bundle')
        make_dir(model_bundle_dir)
        shutil.copyfile(self.last_model_path,
                        join(model_bundle_dir, 'model.pth'))
        pipeline_cfg = LearnerPipelineConfig(learner=self.cfg)
        save_pipeline_config(pipeline_cfg,
                             join(model_bundle_dir, 'pipeline-config.json'))
        zipdir(model_bundle_dir, self.model_bundle_path)
    def test_file_exists(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_path_prefix = 's3://{}/xxx/lorem'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        make_dir(path, check_empty=False, use_dirname=True)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_directory, include_dir=True))
        self.assertTrue(file_exists(s3_path, include_dir=False))
        self.assertFalse(file_exists(s3_path_prefix, include_dir=True))
        self.assertFalse(file_exists(s3_directory, include_dir=False))
        self.assertFalse(
            file_exists(s3_directory + 'NOTPOSSIBLE', include_dir=False))
Ejemplo n.º 24
0
    def save(self, labels: SemanticSegmentationLabels) -> None:
        """Save labels to disk.

        More info on rasterio IO:
        - https://github.com/mapbox/rasterio/blob/master/docs/quickstart.rst
        - https://rasterio.readthedocs.io/en/latest/topics/windowed-rw.html

        Args:
            labels - (SemanticSegmentationLabels) labels to be saved
        """
        local_root = get_local_path(self.root_uri, self.tmp_dir)
        make_dir(local_root)

        out_profile = {
            'driver': 'GTiff',
            'height': self.extent.ymax,
            'width': self.extent.xmax,
            'transform': self.crs_transformer.get_affine_transform(),
            'crs': self.crs_transformer.get_image_crs(),
            'blockxsize': self.rasterio_block_size,
            'blockysize': self.rasterio_block_size
        }

        # if old scores exist, combine them with the new ones
        if self.score_raster_source:
            log.info('Old scores found. Merging with current scores.')
            old_labels = self.get_scores()
            labels += old_labels

        self.write_discrete_raster_output(
            out_profile, get_local_path(self.label_uri, self.tmp_dir), labels)

        if self.smooth_output:
            self.write_smooth_raster_output(
                out_profile,
                get_local_path(self.score_uri, self.tmp_dir),
                get_local_path(self.hits_uri, self.tmp_dir),
                labels,
                chip_sz=self.rasterio_block_size)

        if self.vector_outputs:
            self.write_vector_outputs(labels)

        sync_to_dir(local_root, self.root_uri)
Ejemplo n.º 25
0
def crop_image(image_uri, window, crop_uri):
    im_dataset = rasterio.open(image_uri)
    rasterio_window = window.rasterio_format()
    im = im_dataset.read(window=rasterio_window)

    with TemporaryDirectory() as tmp_dir:
        crop_path = get_local_path(crop_uri, tmp_dir)
        make_dir(crop_path, use_dirname=True)

        meta = im_dataset.meta
        meta['width'], meta['height'] = window.get_width(), window.get_height()
        meta['transform'] = rasterio.windows.transform(rasterio_window,
                                                       im_dataset.transform)

        with rasterio.open(crop_path, 'w', **meta) as dst:
            dst.colorinterp = im_dataset.colorinterp
            dst.write(im)

        upload_or_copy(crop_path, crop_uri)
Ejemplo n.º 26
0
    def write_sample(self, sample: DataSample):
        """
        This writes a training or validation sample to
        (train|valid)/{class_name}/{scene_id}-{ind}.png
        """
        class_id = sample.labels.get_cell_class_id(sample.window)
        # If a chip is not associated with a class, don't
        # use it in training data.
        if class_id is None:
            return

        split_name = 'train' if sample.is_train else 'valid'
        class_name = self.class_config.names[class_id]
        class_dir = join(self.sample_dir, split_name, class_name)
        make_dir(class_dir)
        chip_path = join(class_dir, '{}-{}.png'.format(sample.scene_id,
                                                       self.sample_ind))
        save_img(sample.chip, chip_path)
        self.sample_ind += 1
    def write_sample(self, sample: DataSample):
        """
        This writes a training or validation sample to
        (train|valid)/img/{scene_id}-{ind}.png and
        (train|valid)/labels/{scene_id}-{ind}.png
        """
        split_name = 'train' if sample.is_train else 'valid'
        label_arr = sample.labels.get_label_arr(sample.window).astype(np.uint8)

        img_dir = join(self.sample_dir, split_name, 'img')
        labels_dir = join(self.sample_dir, split_name, 'labels')
        make_dir(img_dir)
        make_dir(labels_dir)

        img_path = join(img_dir, '{}-{}.png'.format(sample.scene_id,
                                                    self.sample_ind))
        labels_path = join(
            labels_dir, '{}-{}.png'.format(sample.scene_id, self.sample_ind))
        save_img(sample.chip, img_path)
        save_img(label_arr, labels_path)

        self.sample_ind += 1
Ejemplo n.º 28
0
def _collect(key, root_uri, output_dir, collect_dir, get_model_bundle=False):
    print('\nCollecting experiment {}...\n'.format(key))

    model_bundle_uri = join(root_uri, output_dir, 'bundle', 'model-bundle.zip')
    eval_uri = join(root_uri, output_dir, 'eval', 'eval.json')

    if not file_exists(eval_uri):
        print('Missing eval!')
        return

    if not file_exists(model_bundle_uri):
        print('Missing model bundle!')
        return

    make_dir(join(collect_dir, key))
    if get_model_bundle:
        download_or_copy(model_bundle_uri, join(collect_dir, key))

    download_or_copy(eval_uri, join(collect_dir, key))

    eval_json = file_to_json(join(collect_dir, key, 'eval.json'))
    pprint.pprint(eval_json['overall'], indent=4)
Ejemplo n.º 29
0
    def write_sample(self, sample: DataSample):
        """
        This writes a training or validation sample to
        (train|valid)/img/{scene_id}-{ind}.png and
        (train|valid)/labels/{scene_id}-{ind}.png
        """
        split_name = 'train' if sample.is_train else 'valid'

        sample_dir = Path(self.sample_dir)
        img_dir = sample_dir / split_name / 'img'
        label_dir = sample_dir / split_name / 'labels'

        make_dir(img_dir)
        make_dir(label_dir)

        img = sample.chip
        label_arr = sample.labels.get_label_arr(sample.window).astype(np.uint8)

        img_fmt, label_fmt = self.img_format, self.label_format
        sample_name = f'{sample.scene_id}-{self.sample_ind}'

        # write image
        img_filename = f'{sample_name}.{img_fmt}'
        img_path = img_dir / img_filename
        if img_fmt == 'npy':
            np.save(img_path, img)
        else:
            save_img(img, img_path)

        # write labels
        label_filename = f'{sample_name}.{label_fmt}'
        label_path = label_dir / label_filename
        if label_fmt == 'npy':
            np.save(label_path, label_arr)
        else:
            save_img(label_arr, label_path)

        self.sample_ind += 1
Ejemplo n.º 30
0
    def write_sample(self, sample: DataSample):
        """
        This writes a training or validation sample to
        (train|valid)/img/{scene_id}-{ind}.png and updates
        some COCO data structures.
        """
        split = 'train' if sample.is_train else 'valid'
        split_dir = join(self.sample_dir, split)
        img_dir = join(split_dir, 'img')
        make_dir(img_dir)
        img_fn = '{}-{}.png'.format(sample.scene_id, self.sample_ind)
        img_path = join(img_dir, img_fn)
        save_img(sample.chip, img_path)

        images = self.splits[split]['images']
        annotations = self.splits[split]['annotations']

        images.append({
            'file_name': img_fn,
            'id': self.sample_ind,
            'height': sample.chip.shape[0],
            'width': sample.chip.shape[1]
        })

        npboxes = sample.labels.get_npboxes()
        npboxes = ObjectDetectionLabels.global_to_local(npboxes, sample.window)
        for box_ind, (box, class_id) in enumerate(
                zip(npboxes, sample.labels.get_class_ids())):
            bbox = [box[1], box[0], box[3] - box[1], box[2] - box[0]]
            bbox = [int(i) for i in bbox]
            annotations.append({
                'id': '{}-{}'.format(self.sample_ind, box_ind),
                'image_id': self.sample_ind,
                'bbox': bbox,
                'category_id': int(class_id)
            })

        self.sample_ind += 1