コード例 #1
0
def collect_experiment(key, root_uri, output_dir, get_pred_package=False):
    print('\nCollecting experiment {}...\n'.format(key))

    if root_uri.startswith('s3://'):
        predict_package_uris = list_paths(join(root_uri, key, 'bundle'), ext='predict_package.zip')
        eval_json_uris = list_paths(join(root_uri, key, 'eval'), ext='eval.json')
    else:
        predict_package_uris = glob.glob(join(root_uri, key, 'bundle', '*', 'predict_package.zip'))
        eval_json_uris = glob.glob(join(root_uri, key, 'eval', '*', 'eval.json'))

    if len(predict_package_uris) > 1 or len(eval_json_uris) > 1:
        print('Cannot collect from key with multiple experiments!!!')
        return

    if len(predict_package_uris) == 0 or len(eval_json_uris) == 0:
        print('Missing output!!!')
        return

    predict_package_uri = predict_package_uris[0]
    eval_json_uri = eval_json_uris[0]
    make_dir(join(output_dir, key))
    if get_pred_package:
        download_or_copy(predict_package_uri, join(output_dir, key))

    download_or_copy(eval_json_uri, join(output_dir, key))

    eval_json = file_to_json(join(output_dir, key, 'eval.json'))
    pprint.pprint(eval_json['overall'], indent=4)
コード例 #2
0
    def __enter__(self):
        self.tmp_dir_obj = tempfile.TemporaryDirectory(dir=self.tmp_dir)
        self.sample_dir = join(self.tmp_dir_obj.name, 'samples')
        make_dir(self.sample_dir)
        self.sample_ind = 0

        return self
コード例 #3
0
 def setup_tensorboard(self):
     """Setup for logging stats to TB."""
     self.tb_writer = None
     if self.cfg.log_tensorboard:
         self.tb_log_dir = join(self.output_dir, 'tb-logs')
         make_dir(self.tb_log_dir)
         self.tb_writer = SummaryWriter(log_dir=self.tb_log_dir)
コード例 #4
0
    def plot_batch(self, x: Tensor, y, output_path: str, z=None):
        """Plot a whole batch in a grid using plot_xyz.

        Args:
            x: batch of images
            y: ground truth labels
            output_path: local path where to save plot image
            z: optional predicted labels
        """
        batch_sz = x.shape[0]
        ncols = nrows = math.ceil(math.sqrt(batch_sz))
        fig = plt.figure(constrained_layout=True,
                         figsize=(3 * ncols, 3 * nrows))
        grid = gridspec.GridSpec(ncols=ncols, nrows=nrows, figure=fig)

        for i in range(batch_sz):
            ax = fig.add_subplot(grid[i])
            if z is None:
                self.plot_xyz(ax, x[i], y[i])
            else:
                self.plot_xyz(ax, x[i], y[i], z=z[i])

        make_dir(output_path, use_dirname=True)
        plt.savefig(output_path)
        plt.close()
コード例 #5
0
 def save_model_bundle(self):
     model_bundle_dir = join(self.tmp_dir, 'model-bundle')
     make_dir(model_bundle_dir)
     shutil.copyfile(self.last_model_path,
                     join(model_bundle_dir, 'model.pth'))
     shutil.copyfile(self.config_path, join(model_bundle_dir,
                                            'config.json'))
     zipdir(model_bundle_dir, self.model_bundle_path)
コード例 #6
0
    def test_file_exists_local_true(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertTrue(file_exists(path))
コード例 #7
0
    def test_check_empty(self):
        path = os.path.join(self.tmp_dir.name, 'hello', 'hello.txt')
        dir = os.path.dirname(path)
        str_to_file('hello', path)

        make_dir(dir, check_empty=False)
        with self.assertRaises(Exception):
            make_dir(dir, check_empty=True)
コード例 #8
0
    def test_sync_from_dir_noop_local(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        src = os.path.join(self.tmp_dir.name, 'lorem')
        make_dir(src, check_empty=False)

        fs = FileSystem.get_file_system(src, 'r')
        fs.write_bytes(path, bytes([0x00, 0x01]))
        sync_from_dir(src, src, delete=True)

        self.assertEqual(len(list_paths(src)), 1)
コード例 #9
0
    def test_copy_to_http(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        dst = 'http://localhost/'
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertRaises(NotWritableError, lambda: upload_or_copy(path, dst))
        os.remove(path)
コード例 #10
0
    def test_force_empty(self):
        path = os.path.join(self.tmp_dir.name, 'hello', 'hello.txt')
        dir = os.path.dirname(path)
        str_to_file('hello', path)

        make_dir(dir, force_empty=False)
        self.assertTrue(os.path.isfile(path))
        make_dir(dir, force_empty=True)
        is_empty = len(os.listdir(dir)) == 0
        self.assertTrue(is_empty)
コード例 #11
0
    def test_last_modified(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        fs = FileSystem.get_file_system(path, 'r')

        str_to_file(self.lorem, path)
        stamp = fs.last_modified(path)

        self.assertTrue(isinstance(stamp, datetime.datetime))
コード例 #12
0
    def test_file_exists_s3_true(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        s3_path = 's3://{}/lorem.txt'.format(self.bucket_name)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_path))
コード例 #13
0
    def test_copy_to_local(self):
        path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        path2 = os.path.join(self.tmp_dir.name, 'yyy', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        dir2 = os.path.dirname(path2)
        make_dir(dir1, check_empty=False)
        make_dir(dir2, check_empty=False)

        str_to_file(self.lorem, path1)

        upload_or_copy(path1, path2)
        self.assertEqual(len(list_paths(dir2)), 1)
コード例 #14
0
    def test_sync_to_dir_local(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        src = os.path.dirname(path)
        dst = os.path.join(self.tmp_dir.name, 'xxx')
        make_dir(src, check_empty=False)
        make_dir(dst, check_empty=False)

        fs = FileSystem.get_file_system(path, 'r')
        fs.write_bytes(path, bytes([0x00, 0x01]))
        sync_to_dir(src, dst, delete=True)

        self.assertEqual(len(list_paths(dst)), 1)
コード例 #15
0
    def test_list_paths_s3(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        list_paths(s3_directory)
        self.assertEqual(len(list_paths(s3_directory)), 1)
コード例 #16
0
    def test_bytes_local(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        expected = bytes([0x00, 0x01, 0x02])
        fs = FileSystem.get_file_system(path, 'r')

        fs.write_bytes(path, expected)
        actual = fs.read_bytes(path)

        self.assertEqual(actual, expected)
コード例 #17
0
    def test_last_modified_s3(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt')
        s3_path = 's3://{}/lorem1.txt'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        fs = FileSystem.get_file_system(s3_path, 'r')

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)
        stamp = fs.last_modified(s3_path)

        self.assertTrue(isinstance(stamp, datetime.datetime))
コード例 #18
0
    def save_model_bundle(self):
        """Save a model bundle.

        This is a zip file with the model weights in .pth format and a serialized
        copy of the LearningConfig, which allows for making predictions in the future.
        """
        model_bundle_dir = join(self.tmp_dir, 'model-bundle')
        make_dir(model_bundle_dir)
        shutil.copyfile(self.last_model_path,
                        join(model_bundle_dir, 'model.pth'))
        shutil.copyfile(self.config_path,
                        join(model_bundle_dir, 'learner-config.json'))
        zipdir(model_bundle_dir, self.model_bundle_path)
コード例 #19
0
    def test_file_exists(self):
        fs = FileSystem.get_file_system(self.tmp_dir.name, 'r')

        path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        make_dir(dir1, check_empty=False)

        str_to_file(self.lorem, path1)

        self.assertTrue(fs.file_exists(dir1, include_dir=True))
        self.assertTrue(fs.file_exists(path1, include_dir=False))
        self.assertFalse(fs.file_exists(dir1, include_dir=False))
        self.assertFalse(
            fs.file_exists(dir1 + 'NOTPOSSIBLE', include_dir=False))
コード例 #20
0
    def test_file_exists(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_path_prefix = 's3://{}/xxx/lorem'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        make_dir(path, check_empty=False, use_dirname=True)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_directory, include_dir=True))
        self.assertTrue(file_exists(s3_path, include_dir=False))
        self.assertFalse(file_exists(s3_path_prefix, include_dir=True))
        self.assertFalse(file_exists(s3_directory, include_dir=False))
        self.assertFalse(
            file_exists(s3_directory + 'NOTPOSSIBLE', include_dir=False))
コード例 #21
0
    def save_model_bundle(self):
        """Save a model bundle.

        This is a zip file with the model weights in .pth format and a serialized
        copy of the LearningConfig, which allows for making predictions in the future.
        """
        from rastervision2.pytorch_learner.learner_pipeline_config import (
            LearnerPipelineConfig)
        model_bundle_dir = join(self.tmp_dir, 'model-bundle')
        make_dir(model_bundle_dir)
        shutil.copyfile(self.last_model_path,
                        join(model_bundle_dir, 'model.pth'))
        pipeline_cfg = LearnerPipelineConfig(learner=self.cfg)
        save_pipeline_config(pipeline_cfg,
                             join(model_bundle_dir, 'pipeline-config.json'))
        zipdir(model_bundle_dir, self.model_bundle_path)
コード例 #22
0
    def plot_batch(self, x, y, output_path, z=None):
        batch_sz = x.shape[0]
        ncols = nrows = math.ceil(math.sqrt(batch_sz))
        fig = plt.figure(
            constrained_layout=True, figsize=(3 * ncols, 3 * nrows))
        grid = gridspec.GridSpec(ncols=ncols, nrows=nrows, figure=fig)

        for i in range(batch_sz):
            ax = fig.add_subplot(grid[i])
            if z is None:
                self.plot_xyz(ax, x[i], y[i])
            else:
                self.plot_xyz(ax, x[i], y[i], z=z[i])

        make_dir(output_path, use_dirname=True)
        plt.savefig(output_path)
        plt.close()
コード例 #23
0
def crop_image(image_uri, window, crop_uri):
    im_dataset = rasterio.open(image_uri)
    rasterio_window = window.rasterio_format()
    im = im_dataset.read(window=rasterio_window)

    with tempfile.TemporaryDirectory() as tmp_dir:
        crop_path = get_local_path(crop_uri, tmp_dir)
        make_dir(crop_path, use_dirname=True)

        meta = im_dataset.meta
        meta['width'], meta['height'] = window.get_width(), window.get_height()
        meta['transform'] = rasterio.windows.transform(rasterio_window,
                                                       im_dataset.transform)

        with rasterio.open(crop_path, 'w', **meta) as dst:
            dst.colorinterp = im_dataset.colorinterp
            dst.write(im)

        upload_or_copy(crop_path, crop_uri)
コード例 #24
0
    def write_sample(self, sample: DataSample):
        """
        This writes a training or validation sample to
        (train|valid)/{class_name}/{scene_id}-{ind}.png
        """
        class_id = sample.labels.get_cell_class_id(sample.window)
        # If a chip is not associated with a class, don't
        # use it in training data.
        if class_id is None:
            return

        split_name = 'train' if sample.is_train else 'valid'
        class_name = self.class_config.names[class_id]
        class_dir = join(self.sample_dir, split_name, class_name)
        make_dir(class_dir)
        chip_path = join(class_dir, '{}-{}.png'.format(sample.scene_id,
                                                       self.sample_ind))
        save_img(sample.chip, chip_path)
        self.sample_ind += 1
コード例 #25
0
    def write_sample(self, sample: DataSample):
        """
        This writes a training or validation sample to
        (train|valid)/img/{scene_id}-{ind}.png and
        (train|valid)/labels/{scene_id}-{ind}.png
        """
        split_name = 'train' if sample.is_train else 'valid'
        label_arr = sample.labels.get_label_arr(sample.window).astype(np.uint8)

        img_dir = join(self.sample_dir, split_name, 'img')
        labels_dir = join(self.sample_dir, split_name, 'labels')
        make_dir(img_dir)
        make_dir(labels_dir)

        img_path = join(img_dir, '{}-{}.png'.format(sample.scene_id,
                                                    self.sample_ind))
        labels_path = join(
            labels_dir, '{}-{}.png'.format(sample.scene_id, self.sample_ind))
        save_img(sample.chip, img_path)
        save_img(label_arr, labels_path)

        self.sample_ind += 1
コード例 #26
0
    def write_sample(self, sample: DataSample):
        """
        This writes a training or validation sample to
        (train|valid)/img/{scene_id}-{ind}.png and updates
        some COCO data structures.
        """
        split = 'train' if sample.is_train else 'valid'
        split_dir = join(self.sample_dir, split)
        img_dir = join(split_dir, 'img')
        make_dir(img_dir)
        img_fn = '{}-{}.png'.format(sample.scene_id, self.sample_ind)
        img_path = join(img_dir, img_fn)
        save_img(sample.chip, img_path)

        images = self.splits[split]['images']
        annotations = self.splits[split]['annotations']

        images.append({
            'file_name': img_fn,
            'id': self.sample_ind,
            'height': sample.chip.shape[0],
            'width': sample.chip.shape[1]
        })

        npboxes = sample.labels.get_npboxes()
        npboxes = ObjectDetectionLabels.global_to_local(npboxes, sample.window)
        for box_ind, (box, class_id) in enumerate(
                zip(npboxes, sample.labels.get_class_ids())):
            bbox = [box[1], box[0], box[3] - box[1], box[2] - box[0]]
            bbox = [int(i) for i in bbox]
            annotations.append({
                'id': '{}-{}'.format(self.sample_ind, box_ind),
                'image_id': self.sample_ind,
                'bbox': bbox,
                'category_id': int(class_id)
            })

        self.sample_ind += 1
コード例 #27
0
    def __init__(self,
                 cfg: LearnerConfig,
                 tmp_dir: str,
                 model_path: Optional[str] = None):
        """Constructor.

        Args:
            cfg: configuration
            tmp_dir: root of temp dirs
            model_path: a local path to model weights. If provided, the model is loaded
                and it is assumed that this Learner will be used for prediction only.
        """
        self.cfg = cfg
        self.tmp_dir = tmp_dir

        # TODO make cache dirs configurable
        torch_cache_dir = '/opt/data/torch-cache'
        os.environ['TORCH_HOME'] = torch_cache_dir
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.data_cache_dir = '/opt/data/data-cache'
        make_dir(self.data_cache_dir)

        self.model = self.build_model()
        self.model.to(self.device)

        if model_path is not None:
            if isfile(model_path):
                self.model.load_state_dict(
                    torch.load(model_path, map_location=self.device))
            else:
                raise Exception(
                    'Model could not be found at {}'.format(model_path))
            self.model.eval()
        else:
            log.info(self.cfg)

            # ds = dataset, dl = dataloader
            self.train_ds = None
            self.train_dl = None
            self.valid_ds = None
            self.valid_dl = None
            self.test_ds = None
            self.test_dl = None

            if cfg.output_uri.startswith('s3://'):
                self.output_dir = get_local_path(cfg.output_uri, tmp_dir)
                make_dir(self.output_dir, force_empty=True)
                if not cfg.overfit_mode:
                    self.sync_from_cloud()
            else:
                self.output_dir = cfg.output_uri
                make_dir(self.output_dir)

            self.last_model_path = join(self.output_dir, 'last-model.pth')
            self.config_path = join(self.output_dir, 'config.json')
            self.train_state_path = join(self.output_dir, 'train-state.json')
            self.log_path = join(self.output_dir, 'log.csv')
            model_bundle_fn = basename(cfg.get_model_bundle_uri())
            self.model_bundle_path = join(self.output_dir, model_bundle_fn)
            self.metric_names = self.build_metric_names()

            json_to_file(self.cfg.dict(), self.config_path)
            self.load_init_weights()
            self.load_checkpoint()
            self.opt = self.build_optimizer()
            self.setup_data()
            self.start_epoch = self.get_start_epoch()
            self.steps_per_epoch = len(
                self.train_ds) // self.cfg.solver.batch_sz
            self.step_scheduler = self.build_step_scheduler()
            self.epoch_scheduler = self.build_epoch_scheduler()
            self.setup_tensorboard()
コード例 #28
0
 def test_use_dirname(self):
     path = os.path.join(self.tmp_dir.name, 'hello', 'hello.txt')
     dir = os.path.dirname(path)
     make_dir(path, use_dirname=True)
     self.assertTrue(os.path.isdir(dir))
コード例 #29
0
 def test_default_args(self):
     dir = os.path.join(self.tmp_dir.name, 'hello')
     make_dir(dir)
     self.assertTrue(os.path.isdir(dir))
コード例 #30
0
    def test_file_exists_local_false(self):
        path = os.path.join(self.tmp_dir.name, 'hello', 'hello.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        self.assertFalse(file_exists(path))