def save(self, output_uri):
        """Save this Evaluation to a file.

        Args:
            output_uri: string URI for the file to write.
        """
        json_str = json.dumps(self.to_json(), indent=4)
        str_to_file(json_str, output_uri)
    def test_download_if_needed_local(self):
        with self.assertRaises(NotReadableError):
            file_to_str(self.local_path)

        str_to_file(self.content_str, self.local_path)
        upload_or_copy(self.local_path, self.local_path)
        local_path = download_if_needed(self.local_path, self.tmp_dir.name)
        self.assertEqual(local_path, self.local_path)
    def test_file_to_str_local(self):
        str_to_file(self.content_str, self.local_path)
        content_str = file_to_str(self.local_path)
        self.assertEqual(self.content_str, content_str)

        wrong_path = '/wrongpath/x.txt'
        with self.assertRaises(NotReadableError):
            file_to_str(wrong_path)
    def test_check_empty(self):
        path = os.path.join(self.tmp_dir.name, 'hello', 'hello.txt')
        dir = os.path.dirname(path)
        str_to_file('hello', path)

        make_dir(dir, check_empty=False)
        with self.assertRaises(Exception):
            make_dir(dir, check_empty=True)
    def test_file_exists_local_true(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertTrue(file_exists(path))
Beispiel #6
0
def save_pipeline_config(cfg: 'PipelineConfig', output_uri: str):
    """Save a PipelineConfig to JSON file.

    Inject rv_config and plugin_versions before saving.
    """
    cfg.rv_config = rv_config.get_config_dict(registry.rv_config_schema)
    cfg.plugin_versions = registry.plugin_versions
    cfg_json = cfg.json()
    str_to_file(cfg_json, output_uri)
    def test_copy_to_http(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        dst = 'http://localhost/'
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertRaises(NotWritableError, lambda: upload_or_copy(path, dst))
        os.remove(path)
    def test_force_empty(self):
        path = os.path.join(self.tmp_dir.name, 'hello', 'hello.txt')
        dir = os.path.dirname(path)
        str_to_file('hello', path)

        make_dir(dir, force_empty=False)
        self.assertTrue(os.path.isfile(path))
        make_dir(dir, force_empty=True)
        is_empty = len(os.listdir(dir)) == 0
        self.assertTrue(is_empty)
    def test_last_modified(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        fs = FileSystem.get_file_system(path, 'r')

        str_to_file(self.lorem, path)
        stamp = fs.last_modified(path)

        self.assertTrue(isinstance(stamp, datetime.datetime))
    def test_file_exists_s3_true(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        s3_path = 's3://{}/lorem.txt'.format(self.bucket_name)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_path))
Beispiel #11
0
    def run(self, cfg_json_uri, pipeline, commands, num_splits=1):
        num_commands = 0
        for command in commands:
            if command in pipeline.split_commands and num_splits > 1:
                num_commands += num_splits
            else:
                num_commands += 1

        makefile = '.PHONY: '
        makefile += ' '.join([str(ci) for ci in range(num_commands)])
        makefile += '\n\n'

        makefile += 'all: '
        makefile += ' '.join([str(ci) for ci in range(num_commands)])
        makefile += '\n\n'

        prev_command_inds = []
        curr_command_ind = 0
        for command in commands:
            curr_command_inds = []
            if command in pipeline.split_commands and num_splits > 1:
                for split_ind in range(num_splits):
                    makefile += '{}: '.format(curr_command_ind)
                    makefile += ' '.join([str(ci) for ci in prev_command_inds])
                    makefile += '\n'
                    invocation = (
                        'python -m rastervision2.pipeline.cli run_command '
                        '{} {} --split-ind {} --num-splits {}').format(
                            cfg_json_uri, command, split_ind, num_splits)
                    makefile += '\t{}\n\n'.format(invocation)
                    curr_command_inds.append(curr_command_ind)
                    curr_command_ind += 1
            else:
                makefile += '{}: '.format(curr_command_ind)
                makefile += ' '.join([str(ci) for ci in prev_command_inds])
                makefile += '\n'
                invocation = (
                    'python -m rastervision2.pipeline.cli run_command '
                    '{} {}'.format(cfg_json_uri, command))
                makefile += '\t{}\n\n'.format(invocation)
                curr_command_inds.append(curr_command_ind)
                curr_command_ind += 1

            prev_command_inds = curr_command_inds

        makefile_path = join(dirname(cfg_json_uri), 'Makefile')
        str_to_file(makefile, makefile_path)
        process = Popen(['make', '-j', '-f', makefile_path])
        terminate_at_exit(process)
        exitcode = process.wait()
        if exitcode != 0:
            sys.exit(exitcode)
        else:
            return 0
    def test_remote(self):
        with patch(
                'rastervision2.pipeline.file_system.utils.download_if_needed',
                side_effect=download_if_needed) as patched_download:
            s3_path = 's3://{}/{}'.format(self.bucket_name, self.file_name)
            str_to_file(self.content_str, s3_path)
            path = get_cached_file(self.cache_dir, s3_path)
            self.assertTrue(os.path.isfile(path))

            # Check that calling it again doesn't invoke the download method again.
            self.assertTrue(os.path.isfile(path))
            self.assertEqual(patched_download.call_count, 1)
    def test_list_paths_s3(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        list_paths(s3_directory)
        self.assertEqual(len(list_paths(s3_directory)), 1)
    def test_file_to_str_s3(self):
        wrong_path = 's3://wrongpath/x.txt'

        with self.assertRaises(NotWritableError):
            str_to_file(self.content_str, wrong_path)

        str_to_file(self.content_str, self.s3_path)
        content_str = file_to_str(self.s3_path)
        self.assertEqual(self.content_str, content_str)

        with self.assertRaises(NotReadableError):
            file_to_str(wrong_path)
    def save_messages(self, split_ind=0, num_splits=1):
        message_maker = self.config.message_maker.build()

        split_groups = split_into_groups(
            list(zip(self.config.names, self.config.message_uris)), num_splits)
        split_group = split_groups[split_ind]

        for name, message_uri in split_group:
            # Unlike before, we use the message_maker to make the message.
            message = message_maker.make_message(name)
            str_to_file(message, message_uri)
            print('Saved message to {}'.format(message_uri))
    def test_copy_to_local(self):
        path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        path2 = os.path.join(self.tmp_dir.name, 'yyy', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        dir2 = os.path.dirname(path2)
        make_dir(dir1, check_empty=False)
        make_dir(dir2, check_empty=False)

        str_to_file(self.lorem, path1)

        upload_or_copy(path1, path2)
        self.assertEqual(len(list_paths(dir2)), 1)
    def test_download_if_needed_s3(self):
        with self.assertRaises(NotReadableError):
            file_to_str(self.s3_path)

        str_to_file(self.content_str, self.local_path)
        upload_or_copy(self.local_path, self.s3_path)
        local_path = download_if_needed(self.s3_path, self.tmp_dir.name)
        content_str = file_to_str(local_path)
        self.assertEqual(self.content_str, content_str)

        wrong_path = 's3://wrongpath/x.txt'
        with self.assertRaises(NotWritableError):
            upload_or_copy(local_path, wrong_path)
    def test_last_modified_s3(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt')
        s3_path = 's3://{}/lorem1.txt'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        fs = FileSystem.get_file_system(s3_path, 'r')

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)
        stamp = fs.last_modified(s3_path)

        self.assertTrue(isinstance(stamp, datetime.datetime))
    def test_file_exists(self):
        fs = FileSystem.get_file_system(self.tmp_dir.name, 'r')

        path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        make_dir(dir1, check_empty=False)

        str_to_file(self.lorem, path1)

        self.assertTrue(fs.file_exists(dir1, include_dir=True))
        self.assertTrue(fs.file_exists(path1, include_dir=False))
        self.assertFalse(fs.file_exists(dir1, include_dir=False))
        self.assertFalse(
            fs.file_exists(dir1 + 'NOTPOSSIBLE', include_dir=False))
    def test_file_exists(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_path_prefix = 's3://{}/xxx/lorem'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        make_dir(path, check_empty=False, use_dirname=True)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_directory, include_dir=True))
        self.assertTrue(file_exists(s3_path, include_dir=False))
        self.assertFalse(file_exists(s3_path_prefix, include_dir=True))
        self.assertFalse(file_exists(s3_directory, include_dir=False))
        self.assertFalse(
            file_exists(s3_directory + 'NOTPOSSIBLE', include_dir=False))
Beispiel #21
0
    def save_messages(self, split_ind=0, num_splits=1):
        # Save a file for each name with a message.

        # The num_splits is the number of parallel jobs to use and
        # split_ind tracks the index of the parallel job. In this case
        # we are splitting on the names/message_uris.
        split_groups = split_into_groups(
            list(zip(self.config.names, self.config.message_uris)), num_splits)
        split_group = split_groups[split_ind]

        for name, message_uri in split_group:
            message = 'hello {}!'.format(name)
            # str_to_file and most functions in the file_system package can
            # read and write transparently to different file systems based on
            # the URI pattern.
            str_to_file(message, message_uri)
            print('Saved message to {}'.format(message_uri))
def filter_geojson(labels_uri, output_uri, class_names):
    """Remove features that aren't in class_names and remove class_ids."""
    labels_str = file_to_str(labels_uri)
    labels = json.loads(labels_str)
    filtered_features = []

    for feature in labels['features']:
        feature = copy.deepcopy(feature)
        properties = feature.get('properties')
        if properties:
            class_name = properties.get('class_name') or properties('label')
            if class_name in class_names:
                del properties['class_id']
                filtered_features.append(feature)

    new_labels = {'features': filtered_features}
    str_to_file(json.dumps(new_labels), output_uri)
Beispiel #23
0
def _run_pipeline(cfg, runner, tmp_dir, splits=1, commands=None):
    cfg.update()
    cfg.rv_config = rv_config.get_config_dict(registry.rv_config_schema)
    cfg.recursive_validate_config()
    # This is to run the validation again to check any fields that may have changed
    # after the Config was constructed, possibly by the update method.
    build_config(cfg.dict())

    cfg_json = cfg.json()
    cfg_json_uri = cfg.get_config_uri()
    str_to_file(cfg_json, cfg_json_uri)

    pipeline = cfg.build(tmp_dir)
    if not commands:
        commands = pipeline.commands

    runner.run(cfg_json_uri, pipeline, commands, num_splits=splits)
Beispiel #24
0
def run(runner: str, cfg_module: str, commands: List[str],
        arg: List[Tuple[str, str]], splits: int):
    """Run COMMANDS within pipelines in CFG_MODULE using RUNNER.

    RUNNER: name of the Runner to use

    CFG_MODULE: the module with `get_configs` function that returns PipelineConfigs.
    This can either be a Python module path or a local path to a .py file.

    COMMANDS: space separated sequence of commands to run within pipeline. The order in
    which to run them is based on the Pipeline.commands attribute. If this is omitted,
    all commands will be run.
    """
    tmp_dir_obj = rv_config.get_tmp_dir()
    tmp_dir = tmp_dir_obj.name

    args = dict(arg)
    args = convert_bool_args(args)
    cfgs = get_configs(cfg_module, runner, args)
    runner = registry.get_runner(runner)()

    for cfg in cfgs:
        cfg.update()
        cfg.rv_config = rv_config.get_config_dict(registry.rv_config_schema)
        cfg.recursive_validate_config()
        # This is to run the validation again to check any fields that may have changed
        # after the Config was constructed, possibly by the update method.
        build_config(cfg.dict())

        cfg_json = cfg.json()
        cfg_json_uri = cfg.get_config_uri()
        str_to_file(cfg_json, cfg_json_uri)

        pipeline = cfg.build(tmp_dir)
        if not commands:
            commands = pipeline.commands

        runner.run(cfg_json_uri, pipeline, commands, num_splits=splits)
Beispiel #25
0
    def run(self, cfg_json_uri, pipeline, commands, num_splits=1):
        num_commands = 0
        for command in commands:
            if command in pipeline.split_commands and num_splits > 1:
                num_commands += num_splits
            else:
                num_commands += 1

        makefile = '.PHONY: '
        makefile += ' '.join([str(ci) for ci in range(num_commands)])
        makefile += '\n\n'

        makefile += 'all: '
        makefile += ' '.join([str(ci) for ci in range(num_commands)])
        makefile += '\n\n'

        prev_command_inds = []
        curr_command_ind = 0
        for command in commands:

            # detect external command
            if hasattr(pipeline, command):
                fn = getattr(pipeline, command)
                params = signature(fn).parameters
                external = hasattr(fn, 'external') and len(params) in {0, 1}
            else:
                external = False

            curr_command_inds = []
            if not external:
                if command in pipeline.split_commands and num_splits > 1:
                    for split_ind in range(num_splits):
                        makefile += '{}: '.format(curr_command_ind)
                        makefile += ' '.join(
                            [str(ci) for ci in prev_command_inds])
                        makefile += '\n'
                        invocation = (
                            'python -m rastervision2.pipeline.cli run_command '
                            '{} {} --split-ind {} --num-splits {}').format(
                                cfg_json_uri, command, split_ind, num_splits)
                        makefile += '\t{}\n\n'.format(invocation)
                        curr_command_inds.append(curr_command_ind)
                        curr_command_ind += 1
                else:
                    makefile += '{}: '.format(curr_command_ind)
                    makefile += ' '.join([str(ci) for ci in prev_command_inds])
                    makefile += '\n'
                    invocation = (
                        'python -m rastervision2.pipeline.cli run_command '
                        '{} {}'.format(cfg_json_uri, command))
                    makefile += '\t{}\n\n'.format(invocation)
                    curr_command_inds.append(curr_command_ind)
                    curr_command_ind += 1
            else:
                if len(params) == 0:
                    # No-parameter external command
                    cmds = [fn()]
                elif len(params) == 1 and command in pipeline.split_commands:
                    # One-paramater split external command
                    cmds = fn(num_splits)
                elif len(params
                         ) == 1 and command not in pipeline.split_commands:
                    # One-paramater unsplit external command
                    cmds = fn(1)
                else:
                    # No command
                    cmds = []
                for cmd in cmds:
                    makefile += '{}: '.format(curr_command_ind)
                    makefile += ' '.join([str(ci) for ci in prev_command_inds])
                    makefile += '\n'
                    invocation = (' '.join(cmd))
                    makefile += '\t{}\n\n'.format(invocation)
                    curr_command_inds.append(curr_command_ind)
                    curr_command_ind += 1

            prev_command_inds = curr_command_inds

        makefile_path = join(dirname(cfg_json_uri), 'Makefile')
        str_to_file(makefile, makefile_path)
        process = Popen(['make', '-j', '-f', makefile_path])
        terminate_at_exit(process)
        exitcode = process.wait()
        if exitcode != 0:
            sys.exit(exitcode)
        else:
            return 0
Beispiel #26
0
 def save(self, stats_uri):
     # Ensure lists
     means = list(self.means)
     stds = list(self.stds)
     stats = {'means': means, 'stds': stds}
     str_to_file(json.dumps(stats), stats_uri)
Beispiel #27
0
    def __init__(self,
                 cfg: LearnerConfig,
                 tmp_dir: str,
                 model_path: Optional[str] = None):
        """Constructor.

        Args:
            cfg: configuration
            tmp_dir: root of temp dirs
            model_path: a local path to model weights. If provided, the model is loaded
                and it is assumed that this Learner will be used for prediction only.
        """
        self.cfg = cfg
        self.tmp_dir = tmp_dir

        # TODO make cache dirs configurable
        torch_cache_dir = '/opt/data/torch-cache'
        os.environ['TORCH_HOME'] = torch_cache_dir
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.data_cache_dir = '/opt/data/data-cache'
        make_dir(self.data_cache_dir)

        self.model = self.build_model()
        self.model.to(self.device)

        if model_path is not None:
            if isfile(model_path):
                self.model.load_state_dict(
                    torch.load(model_path, map_location=self.device))
            else:
                raise Exception(
                    'Model could not be found at {}'.format(model_path))
            self.model.eval()
        else:
            log.info(self.cfg)

            # ds = dataset, dl = dataloader
            self.train_ds = None
            self.train_dl = None
            self.valid_ds = None
            self.valid_dl = None
            self.test_ds = None
            self.test_dl = None

            if cfg.output_uri.startswith('s3://'):
                self.output_dir = get_local_path(cfg.output_uri, tmp_dir)
                make_dir(self.output_dir, force_empty=True)
                if not cfg.overfit_mode:
                    self.sync_from_cloud()
            else:
                self.output_dir = cfg.output_uri
                make_dir(self.output_dir)

            self.last_model_path = join(self.output_dir, 'last-model.pth')
            self.config_path = join(self.output_dir, 'learner-config.json')
            self.train_state_path = join(self.output_dir, 'train-state.json')
            self.log_path = join(self.output_dir, 'log.csv')
            model_bundle_fn = basename(cfg.get_model_bundle_uri())
            self.model_bundle_path = join(self.output_dir, model_bundle_fn)
            self.metric_names = self.build_metric_names()

            str_to_file(self.cfg.json(), self.config_path)
            self.load_init_weights()
            self.load_checkpoint()
            self.opt = self.build_optimizer()
            self.setup_data()
            self.start_epoch = self.get_start_epoch()
            self.steps_per_epoch = len(
                self.train_ds) // self.cfg.solver.batch_sz
            self.step_scheduler = self.build_step_scheduler()
            self.epoch_scheduler = self.build_epoch_scheduler()
            self.setup_tensorboard()
 def test_write_str_http(self):
     self.assertRaises(NotWritableError,
                       lambda: str_to_file('xxx', 'http://localhost/'))
    def test_local(self):
        local_path = os.path.join(self.tmp_dir.name, self.file_name)
        str_to_file(self.content_str, local_path)

        path = get_cached_file(self.cache_dir, local_path)
        self.assertTrue(os.path.isfile(path))