Ejemplo n.º 1
0
    def run(self,
            cfg_json_uri,
            pipeline,
            commands,
            num_splits=1,
            pipeline_run_name: str = 'raster-vision'):
        num_commands = 0
        for command in commands:
            if command in pipeline.split_commands and num_splits > 1:
                num_commands += num_splits
            else:
                num_commands += 1

        makefile = '.PHONY: '
        makefile += ' '.join([str(ci) for ci in range(num_commands)])
        makefile += '\n\n'

        makefile += 'all: '
        makefile += ' '.join([str(ci) for ci in range(num_commands)])
        makefile += '\n\n'

        prev_command_inds = []
        curr_command_ind = 0
        for command in commands:

            curr_command_inds = []
            if command in pipeline.split_commands and num_splits > 1:
                for split_ind in range(num_splits):
                    makefile += '{}: '.format(curr_command_ind)
                    makefile += ' '.join([str(ci) for ci in prev_command_inds])
                    makefile += '\n'
                    invocation = (
                        'python -m rastervision.pipeline.cli run_command '
                        '{} {} --split-ind {} --num-splits {}').format(
                            cfg_json_uri, command, split_ind, num_splits)
                    makefile += '\t{}\n\n'.format(invocation)
                    curr_command_inds.append(curr_command_ind)
                    curr_command_ind += 1
            else:
                makefile += '{}: '.format(curr_command_ind)
                makefile += ' '.join([str(ci) for ci in prev_command_inds])
                makefile += '\n'
                invocation = (
                    'python -m rastervision.pipeline.cli run_command '
                    '{} {}'.format(cfg_json_uri, command))
                makefile += '\t{}\n\n'.format(invocation)
                curr_command_inds.append(curr_command_ind)
                curr_command_ind += 1

            prev_command_inds = curr_command_inds

        makefile_path = join(dirname(cfg_json_uri), 'Makefile')
        str_to_file(makefile, makefile_path)
        process = Popen(['make', '-j', '-f', makefile_path])
        terminate_at_exit(process)
        exitcode = process.wait()
        if exitcode != 0:
            sys.exit(exitcode)
        else:
            return 0
Ejemplo n.º 2
0
    def setup_training(self, loss_def_path=None):
        log.info(self.cfg)
        log.info(f'Using device: {self.device}')

        # ds = dataset, dl = dataloader
        self.train_ds = None
        self.train_dl = None
        self.valid_ds = None
        self.valid_dl = None
        self.test_ds = None
        self.test_dl = None

        self.config_path = join(self.output_dir, 'learner-config.json')
        str_to_file(self.cfg.json(), self.config_path)

        self.log_path = join(self.output_dir, 'log.csv')
        self.train_state_path = join(self.output_dir, 'train-state.json')
        model_bundle_fname = basename(self.cfg.get_model_bundle_uri())
        self.model_bundle_path = join(self.output_dir, model_bundle_fname)
        self.metric_names = self.build_metric_names()

        self.last_model_path = join(self.output_dir, 'last-model.pth')
        self.load_checkpoint()

        self.setup_loss(loss_def_path=loss_def_path)
        self.opt = self.build_optimizer()
        self.setup_data()
        self.start_epoch = self.get_start_epoch()
        self.steps_per_epoch = len(self.train_ds) // self.cfg.solver.batch_sz
        self.step_scheduler = self.build_step_scheduler()
        self.epoch_scheduler = self.build_epoch_scheduler()
        self.setup_tensorboard()
    def test_file_exists_local_true(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertTrue(file_exists(path))
    def test_check_empty(self):
        path = os.path.join(self.tmp_dir.name, 'hello', 'hello.txt')
        dir = os.path.dirname(path)
        str_to_file('hello', path)

        make_dir(dir, check_empty=False)
        with self.assertRaises(Exception):
            make_dir(dir, check_empty=True)
    def test_download_if_needed_local(self):
        with self.assertRaises(NotReadableError):
            file_to_str(self.local_path)

        str_to_file(self.content_str, self.local_path)
        upload_or_copy(self.local_path, self.local_path)
        local_path = download_if_needed(self.local_path, self.tmp_dir.name)
        self.assertEqual(local_path, self.local_path)
    def save(self, output_uri):
        """Save this Evaluation to a file.

        Args:
            output_uri: string URI for the file to write.
        """
        json_str = json.dumps(self.to_json(), indent=4)
        str_to_file(json_str, output_uri)
    def test_file_to_str_local(self):
        str_to_file(self.content_str, self.local_path)
        content_str = file_to_str(self.local_path)
        self.assertEqual(self.content_str, content_str)

        wrong_path = '/wrongpath/x.txt'
        with self.assertRaises(NotReadableError):
            file_to_str(wrong_path)
def save_pipeline_config(cfg: 'PipelineConfig', output_uri: str):
    """Save a PipelineConfig to JSON file.

    Inject rv_config and plugin_versions before saving.
    """
    cfg.rv_config = rv_config.get_config_dict(registry.rv_config_schema)
    cfg.plugin_versions = registry.plugin_versions
    cfg_json = cfg.json()
    str_to_file(cfg_json, output_uri)
    def test_force_empty(self):
        path = os.path.join(self.tmp_dir.name, 'hello', 'hello.txt')
        dir = os.path.dirname(path)
        str_to_file('hello', path)

        make_dir(dir, force_empty=False)
        self.assertTrue(os.path.isfile(path))
        make_dir(dir, force_empty=True)
        is_empty = len(os.listdir(dir)) == 0
        self.assertTrue(is_empty)
    def test_copy_to_http(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        dst = 'http://localhost/'
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertRaises(NotWritableError, lambda: upload_or_copy(path, dst))
        os.remove(path)
    def test_last_modified(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        fs = FileSystem.get_file_system(path, 'r')

        str_to_file(self.lorem, path)
        stamp = fs.last_modified(path)

        self.assertTrue(isinstance(stamp, datetime.datetime))
    def test_file_exists_s3_true(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        s3_path = 's3://{}/lorem.txt'.format(self.bucket_name)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_path))
    def test_remote(self):
        with patch(
                'rastervision.pipeline.file_system.utils.download_if_needed',
                side_effect=download_if_needed) as patched_download:
            s3_path = 's3://{}/{}'.format(self.bucket_name, self.file_name)
            str_to_file(self.content_str, s3_path)
            path = get_cached_file(self.cache_dir, s3_path)
            self.assertTrue(os.path.isfile(path))

            # Check that calling it again doesn't invoke the download method again.
            self.assertTrue(os.path.isfile(path))
            self.assertEqual(patched_download.call_count, 1)
    def test_copy_to_local(self):
        path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        path2 = os.path.join(self.tmp_dir.name, 'yyy', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        dir2 = os.path.dirname(path2)
        make_dir(dir1, check_empty=False)
        make_dir(dir2, check_empty=False)

        str_to_file(self.lorem, path1)

        upload_or_copy(path1, path2)
        self.assertEqual(len(list_paths(dir2)), 1)
    def test_list_paths_s3(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        list_paths(s3_directory)
        self.assertEqual(len(list_paths(s3_directory)), 1)
Ejemplo n.º 16
0
    def save_messages(self, split_ind=0, num_splits=1):
        message_maker = self.config.message_maker.build()

        split_groups = split_into_groups(
            list(zip(self.config.names, self.config.message_uris)), num_splits)
        split_group = split_groups[split_ind]

        for name, message_uri in split_group:
            # Unlike before, we use the message_maker to make the message.
            message = message_maker.make_message(name)
            str_to_file(message, message_uri)
            print('Saved message to {}'.format(message_uri))
    def test_file_to_str_s3(self):
        wrong_path = 's3://wrongpath/x.txt'

        with self.assertRaises(NotWritableError):
            str_to_file(self.content_str, wrong_path)

        str_to_file(self.content_str, self.s3_path)
        content_str = file_to_str(self.s3_path)
        self.assertEqual(self.content_str, content_str)

        with self.assertRaises(NotReadableError):
            file_to_str(wrong_path)
    def test_last_modified_s3(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt')
        s3_path = 's3://{}/lorem1.txt'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        fs = FileSystem.get_file_system(s3_path, 'r')

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)
        stamp = fs.last_modified(s3_path)

        self.assertTrue(isinstance(stamp, datetime.datetime))
    def test_download_if_needed_s3(self):
        with self.assertRaises(NotReadableError):
            file_to_str(self.s3_path)

        str_to_file(self.content_str, self.local_path)
        upload_or_copy(self.local_path, self.s3_path)
        local_path = download_if_needed(self.s3_path, self.tmp_dir.name)
        content_str = file_to_str(local_path)
        self.assertEqual(self.content_str, content_str)

        wrong_path = 's3://wrongpath/x.txt'
        with self.assertRaises(NotWritableError):
            upload_or_copy(local_path, wrong_path)
    def test_file_exists(self):
        fs = FileSystem.get_file_system(self.tmp_dir.name, 'r')

        path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        make_dir(dir1, check_empty=False)

        str_to_file(self.lorem, path1)

        self.assertTrue(fs.file_exists(dir1, include_dir=True))
        self.assertTrue(fs.file_exists(path1, include_dir=False))
        self.assertFalse(fs.file_exists(dir1, include_dir=False))
        self.assertFalse(
            fs.file_exists(dir1 + 'NOTPOSSIBLE', include_dir=False))
    def test_file_exists(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_path_prefix = 's3://{}/xxx/lorem'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        make_dir(path, check_empty=False, use_dirname=True)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_directory, include_dir=True))
        self.assertTrue(file_exists(s3_path, include_dir=False))
        self.assertFalse(file_exists(s3_path_prefix, include_dir=True))
        self.assertFalse(file_exists(s3_directory, include_dir=False))
        self.assertFalse(
            file_exists(s3_directory + 'NOTPOSSIBLE', include_dir=False))
Ejemplo n.º 22
0
    def save_messages(self, split_ind=0, num_splits=1):
        # Save a file for each name with a message.

        # The num_splits is the number of parallel jobs to use and
        # split_ind tracks the index of the parallel job. In this case
        # we are splitting on the names/message_uris.
        split_groups = split_into_groups(
            list(zip(self.config.names, self.config.message_uris)), num_splits)
        split_group = split_groups[split_ind]

        for name, message_uri in split_group:
            message = 'hello {}!'.format(name)
            # str_to_file and most functions in the file_system package can
            # read and write transparently to different file systems based on
            # the URI pattern.
            str_to_file(message, message_uri)
            print('Saved message to {}'.format(message_uri))
Ejemplo n.º 23
0
def filter_geojson(labels_uri, output_uri, class_names):
    """Remove features that aren't in class_names and remove class_ids."""
    labels_str = file_to_str(labels_uri)
    labels = json.loads(labels_str)
    filtered_features = []

    for feature in labels['features']:
        feature = copy.deepcopy(feature)
        properties = feature.get('properties')
        if properties:
            class_name = properties.get('class_name') or properties('label')
            if class_name in class_names:
                del properties['class_id']
                filtered_features.append(feature)

    new_labels = {'features': filtered_features}
    str_to_file(json.dumps(new_labels), output_uri)
Ejemplo n.º 24
0
    def write_vector_outputs(self, labels: SemanticSegmentationLabels) -> None:
        """Write vectorized outputs for all configs in self.vector_outputs."""
        import mask_to_polygons.vectorification as vectorification
        import mask_to_polygons.processing.denoise as denoise

        log.info('Writing vector output to disk.')

        label_arr = self._labels_to_full_label_arr(labels)
        with click.progressbar(self.vector_outputs) as bar:
            for i, vo in enumerate(bar):
                if vo.uri is None:
                    log.info(f'Skipping VectorOutputConfig at index {i} '
                             'due to missing uri.')
                    continue
                uri = get_local_path(vo.uri, self.tmp_dir)
                denoise_radius = vo.denoise
                mode = vo.get_mode()
                class_mask = (label_arr == vo.class_id).astype(np.uint8)

                def transform(x, y):
                    return self.crs_transformer.pixel_to_map((x, y))

                if denoise_radius > 0:
                    class_mask = denoise.denoise(class_mask, denoise_radius)

                if mode == 'buildings':
                    geojson = vectorification.geojson_from_mask(
                        mask=class_mask,
                        transform=transform,
                        mode=mode,
                        min_aspect_ratio=vo.min_aspect_ratio,
                        min_area=vo.min_area,
                        width_factor=vo.element_width_factor,
                        thickness=vo.element_thickness)
                elif mode == 'polygons':
                    geojson = vectorification.geojson_from_mask(
                        mask=class_mask, transform=transform, mode=mode)

                str_to_file(geojson, uri)
                upload_or_copy(uri, vo.uri)
Ejemplo n.º 25
0
    def run(self,
            cfg_json_uri,
            pipeline,
            commands,
            num_splits=1,
            pipeline_run_name: str = 'raster-vision'):
        num_commands = 0
        for command in commands:
            if command in pipeline.split_commands and num_splits > 1:
                num_commands += num_splits
            else:
                num_commands += 1

        makefile = '.PHONY: '
        makefile += ' '.join([str(ci) for ci in range(num_commands)])
        makefile += '\n\n'

        makefile += 'all: '
        makefile += ' '.join([str(ci) for ci in range(num_commands)])
        makefile += '\n\n'

        prev_command_inds = []
        curr_command_ind = 0
        for command in commands:

            # detect external command
            if hasattr(pipeline, command):
                fn = getattr(pipeline, command)
                params = signature(fn).parameters
                external = hasattr(fn, 'external') and len(params) in {0, 1}
            else:
                external = False

            curr_command_inds = []
            if not external:
                if command in pipeline.split_commands and num_splits > 1:
                    for split_ind in range(num_splits):
                        makefile += '{}: '.format(curr_command_ind)
                        makefile += ' '.join(
                            [str(ci) for ci in prev_command_inds])
                        makefile += '\n'
                        invocation = (
                            'python -m rastervision.pipeline.cli run_command '
                            '{} {} --split-ind {} --num-splits {}').format(
                                cfg_json_uri, command, split_ind, num_splits)
                        makefile += '\t{}\n\n'.format(invocation)
                        curr_command_inds.append(curr_command_ind)
                        curr_command_ind += 1
                else:
                    makefile += '{}: '.format(curr_command_ind)
                    makefile += ' '.join([str(ci) for ci in prev_command_inds])
                    makefile += '\n'
                    invocation = (
                        'python -m rastervision.pipeline.cli run_command '
                        '{} {}'.format(cfg_json_uri, command))
                    makefile += '\t{}\n\n'.format(invocation)
                    curr_command_inds.append(curr_command_ind)
                    curr_command_ind += 1
            else:
                if len(params) == 0:
                    # No-parameter external command
                    cmds = [fn()]
                elif len(params) == 1 and command in pipeline.split_commands:
                    # One-paramater split external command
                    cmds = fn(num_splits)
                elif len(params
                         ) == 1 and command not in pipeline.split_commands:
                    # One-paramater unsplit external command
                    cmds = fn(1)
                else:
                    # No command
                    cmds = []
                for cmd in cmds:
                    makefile += '{}: '.format(curr_command_ind)
                    makefile += ' '.join([str(ci) for ci in prev_command_inds])
                    makefile += '\n'
                    invocation = (' '.join(cmd))
                    makefile += '\t{}\n\n'.format(invocation)
                    curr_command_inds.append(curr_command_ind)
                    curr_command_ind += 1

            prev_command_inds = curr_command_inds

        makefile_path = join(dirname(cfg_json_uri), 'Makefile')
        str_to_file(makefile, makefile_path)
        process = Popen(['make', '-j', '-f', makefile_path])
        terminate_at_exit(process)
        exitcode = process.wait()
        if exitcode != 0:
            sys.exit(exitcode)
        else:
            return 0
    def save(self, labels):
        """Save.

        Args:
            labels - (SemanticSegmentationLabels) labels to be saved
        """
        local_path = get_local_path(self.uri, self.tmp_dir)
        make_dir(local_path, use_dirname=True)

        transform = self.crs_transformer.get_affine_transform()
        crs = self.crs_transformer.get_image_crs()

        band_count = 1
        dtype = np.uint8
        if self.class_trans:
            band_count = 3

        mask = (np.zeros((self.extent.ymax, self.extent.xmax), dtype=np.uint8)
                if self.vector_output else None)

        # https://github.com/mapbox/rasterio/blob/master/docs/quickstart.rst
        # https://rasterio.readthedocs.io/en/latest/topics/windowed-rw.html
        with rasterio.open(local_path,
                           'w',
                           driver='GTiff',
                           height=self.extent.ymax,
                           width=self.extent.xmax,
                           count=band_count,
                           dtype=dtype,
                           transform=transform,
                           crs=crs) as dataset:
            for window in labels.get_windows():
                label_arr = labels.get_label_arr(window)
                window = window.intersection(self.extent)
                label_arr = label_arr[0:window.get_height(),
                                      0:window.get_width()]

                if mask is not None:
                    mask[window.ymin:window.ymax,
                         window.xmin:window.xmax] = label_arr

                window = window.rasterio_format()
                if self.class_trans:
                    rgb_labels = self.class_trans.class_to_rgb(label_arr)
                    for chan in range(3):
                        dataset.write_band(chan + 1,
                                           rgb_labels[:, :, chan],
                                           window=window)
                else:
                    img = label_arr.astype(dtype)
                    dataset.write_band(1, img, window=window)

        upload_or_copy(local_path, self.uri)

        if self.vector_output:
            import mask_to_polygons.vectorification as vectorification
            import mask_to_polygons.processing.denoise as denoise

            for vo in self.vector_output:
                denoise_radius = vo.denoise
                uri = vo.uri
                mode = vo.get_mode()
                class_id = vo.class_id
                class_mask = np.array(mask == class_id, dtype=np.uint8)

                def transform(x, y):
                    return self.crs_transformer.pixel_to_map((x, y))

                if denoise_radius > 0:
                    class_mask = denoise.denoise(class_mask, denoise_radius)

                if uri and mode == 'buildings':
                    geojson = vectorification.geojson_from_mask(
                        mask=class_mask,
                        transform=transform,
                        mode=mode,
                        min_aspect_ratio=vo.min_aspect_ratio,
                        min_area=vo.min_area,
                        width_factor=vo.element_width_factor,
                        thickness=vo.element_thickness)
                elif uri and mode == 'polygons':
                    geojson = vectorification.geojson_from_mask(
                        mask=class_mask, transform=transform, mode=mode)
                str_to_file(geojson, uri)
Ejemplo n.º 27
0
 def save(self, stats_uri):
     # Ensure lists
     means = list(self.means)
     stds = list(self.stds)
     stats = {'means': means, 'stds': stds}
     str_to_file(json.dumps(stats), stats_uri)
Ejemplo n.º 28
0
    def __init__(self,
                 cfg: LearnerConfig,
                 tmp_dir: str,
                 model_path: Optional[str] = None,
                 model_def_path: Optional[str] = None,
                 loss_def_path: Optional[str] = None):
        """Constructor.

        Args:
            cfg: configuration
            tmp_dir: root of temp dirs
            model_path: a local path to model weights. If provided, the model is loaded
                and it is assumed that this Learner will be used for prediction only.
            model_def_path: a local path to a directory with a hubconf.py. If
                provided, the model definition is imported from here.
            loss_def_path: a local path to a directory with a hubconf.py. If
                provided, the loss function definition is imported from here.
        """
        self.cfg = cfg
        self.tmp_dir = tmp_dir

        # TODO make cache dirs configurable
        torch_cache_dir = '/opt/data/torch-cache'
        os.environ['TORCH_HOME'] = torch_cache_dir
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.data_cache_dir = '/opt/data/data-cache'
        make_dir(self.data_cache_dir)

        if FileSystem.get_file_system(cfg.output_uri) == LocalFileSystem:
            self.output_dir = cfg.output_uri
            make_dir(self.output_dir)
        else:
            self.output_dir = get_local_path(cfg.output_uri, tmp_dir)
            make_dir(self.output_dir, force_empty=True)
            if not cfg.overfit_mode:
                self.sync_from_cloud()

        self.modules_dir = join(self.output_dir, MODULES_DIRNAME)

        self.setup_model(model_def_path=model_def_path)

        if model_path is not None:
            if isfile(model_path):
                self.model.load_state_dict(
                    torch.load(model_path, map_location=self.device))
            else:
                raise Exception(
                    'Model could not be found at {}'.format(model_path))
            self.model.eval()
        else:
            log.info(self.cfg)

            # ds = dataset, dl = dataloader
            self.train_ds = None
            self.train_dl = None
            self.valid_ds = None
            self.valid_dl = None
            self.test_ds = None
            self.test_dl = None

            self.config_path = join(self.output_dir, 'learner-config.json')
            str_to_file(self.cfg.json(), self.config_path)

            self.log_path = join(self.output_dir, 'log.csv')
            self.train_state_path = join(self.output_dir, 'train-state.json')
            model_bundle_fname = basename(cfg.get_model_bundle_uri())
            self.model_bundle_path = join(self.output_dir, model_bundle_fname)
            self.metric_names = self.build_metric_names()

            self.last_model_path = join(self.output_dir, 'last-model.pth')
            self.load_checkpoint()

            self.setup_loss(loss_def_path=loss_def_path)
            self.opt = self.build_optimizer()
            self.setup_data()
            self.start_epoch = self.get_start_epoch()
            self.steps_per_epoch = len(
                self.train_ds) // self.cfg.solver.batch_sz
            self.step_scheduler = self.build_step_scheduler()
            self.epoch_scheduler = self.build_epoch_scheduler()
            self.setup_tensorboard()
 def test_write_str_http(self):
     self.assertRaises(NotWritableError,
                       lambda: str_to_file('xxx', 'http://localhost/'))
    def test_local(self):
        local_path = os.path.join(self.tmp_dir.name, self.file_name)
        str_to_file(self.content_str, local_path)

        path = get_cached_file(self.cache_dir, local_path)
        self.assertTrue(os.path.isfile(path))