def test_evaluator(self):
     output_uri = join(self.tmp_dir.name, 'out.json')
     scenes = [self.get_scene(0), self.get_scene(1)]
     evaluator = SemanticSegmentationEvaluator(self.class_config,
                                               output_uri, None)
     evaluator.process(scenes, self.tmp_dir.name)
     eval_json = file_to_json(output_uri)
     exp_eval_json = file_to_json(data_file_path('expected-eval.json'))
     self.assertDictEqual(eval_json, exp_eval_json)
Ejemplo n.º 2
0
def _compare_evals(
        root_uri_old: str,
        root_uri_new: str,
        float_tol: float = 1e-3,
        exclude_keys: list = ['conf_mat', 'count_error', 'per_scene']) -> None:
    """Compare outputs of the eval command for two runs of an example."""
    console_heading('Comparing keys and values in eval.json files...')
    eval_json_old = join(root_uri_old, 'eval.json')
    eval_json_new = join(root_uri_new, 'eval.json')
    eval_old = file_to_json(eval_json_old)
    eval_new = file_to_json(eval_json_new)
    _compare_dicts(eval_old,
                   eval_new,
                   float_tol=float_tol,
                   exclude_keys=exclude_keys)
Ejemplo n.º 3
0
    def from_model_bundle(model_bundle_uri: str, tmp_dir: str):
        """Create a Learner from a model bundle."""
        model_bundle_path = download_if_needed(model_bundle_uri, tmp_dir)
        model_bundle_dir = join(tmp_dir, 'model-bundle')
        unzip(model_bundle_path, model_bundle_dir)

        config_path = join(model_bundle_dir, 'pipeline-config.json')
        model_path = join(model_bundle_dir, 'model.pth')

        config_dict = file_to_json(config_path)
        config_dict = upgrade_config(config_dict)

        cfg = build_config(config_dict)

        hub_dir = join(model_bundle_dir, MODULES_DIRNAME)
        model_def_path = None
        loss_def_path = None

        # retrieve existing model definition, if available
        ext_cfg = cfg.learner.model.external_def
        if ext_cfg is not None:
            model_def_path = get_hubconf_dir_from_cfg(ext_cfg, parent=hub_dir)
            log.info(
                f'Using model definition found in bundle: {model_def_path}')

        # retrieve existing loss function definition, if available
        ext_cfg = cfg.learner.solver.external_loss_def
        if ext_cfg is not None:
            loss_def_path = get_hubconf_dir_from_cfg(ext_cfg, parent=hub_dir)
            log.info(f'Using loss definition found in bundle: {loss_def_path}')

        return cfg.learner.build(tmp_dir=tmp_dir,
                                 model_path=model_path,
                                 model_def_path=model_def_path,
                                 loss_def_path=loss_def_path)
    def test_accounts_for_aoi(self):
        class_config = ClassConfig(names=['car', 'building', 'background'])

        label_source_uri = data_file_path('evaluator/cc-label-filtered.json')
        label_source_cfg = ChipClassificationLabelSourceConfig(
            vector_source=GeoJSONVectorSourceConfig(
                uri=label_source_uri, default_class_id=None))

        label_store_uri = data_file_path('evaluator/cc-label-full.json')
        label_store_cfg = ChipClassificationGeoJSONStoreConfig(
            uri=label_store_uri)

        raster_source_uri = data_file_path('evaluator/cc-label-img-blank.tif')
        raster_source_cfg = RasterioSourceConfig(uris=[raster_source_uri])

        aoi_uri = data_file_path('evaluator/cc-label-aoi.json')
        s = SceneConfig(
            id='test',
            raster_source=raster_source_cfg,
            label_source=label_source_cfg,
            label_store=label_store_cfg,
            aoi_uris=[aoi_uri])

        with rv_config.get_tmp_dir() as tmp_dir:
            scene = s.build(class_config, tmp_dir)
            output_uri = os.path.join(tmp_dir, 'eval.json')

            evaluator = ChipClassificationEvaluatorConfig(
                output_uri=output_uri).build(class_config)
            evaluator.process([scene], tmp_dir)

            overall = file_to_json(output_uri)['overall']
            for item in overall:
                self.assertEqual(item['f1'], 1.0)
Ejemplo n.º 5
0
def collect_eval_dir(root_uri):
    eval_json_uris = list_paths(join(root_uri, 'eval'), ext='eval.json')
    for eval_json_uri in eval_json_uris:
        eval_json = file_to_json(eval_json_uri)
        print(basename(dirname(eval_json_uri)))
        print(eval_json['overall'][-1]['f1'])
        print()
Ejemplo n.º 6
0
def collect_experiment(key, root_uri, output_dir, get_pred_package=False):
    print('\nCollecting experiment {}...\n'.format(key))

    if root_uri.startswith('s3://'):
        predict_package_uris = list_paths(join(root_uri, key, 'bundle'),
                                          ext='predict_package.zip')
        eval_json_uris = list_paths(join(root_uri, key, 'eval'),
                                    ext='eval.json')
    else:
        predict_package_uris = glob.glob(
            join(root_uri, key, 'bundle', '*', 'predict_package.zip'))
        eval_json_uris = glob.glob(
            join(root_uri, key, 'eval', '*', 'eval.json'))

    if len(predict_package_uris) > 1 or len(eval_json_uris) > 1:
        print('Cannot collect from key with multiple experiments!!!')
        return

    if len(predict_package_uris) == 0 or len(eval_json_uris) == 0:
        print('Missing output!!!')
        return

    predict_package_uri = predict_package_uris[0]
    eval_json_uri = eval_json_uris[0]
    make_dir(join(output_dir, key))
    if get_pred_package:
        download_or_copy(predict_package_uri, join(output_dir, key))

    download_or_copy(eval_json_uri, join(output_dir, key))

    eval_json = file_to_json(join(output_dir, key, 'eval.json'))
    pprint.pprint(eval_json['overall'], indent=4)
    def test_vector_evaluator_with_aoi(self):
        output_uri = join(self.tmp_dir.name, 'raster-out.json')
        vector_output_uri = join(self.tmp_dir.name, 'vector-out.json')
        scenes = [self.get_vector_scene(0, use_aoi=True)]
        evaluator = SemanticSegmentationEvaluator(self.class_config,
                                                  output_uri,
                                                  vector_output_uri)
        evaluator.process(scenes, self.tmp_dir.name)
        vector_eval_json = file_to_json(vector_output_uri)
        exp_vector_eval_json = file_to_json(
            data_file_path('expected-vector-eval-with-aoi.json'))

        # NOTE:  The precision  and recall  values found  in the  file
        # `expected-vector-eval.json`  are equal to fractions of  the
        # form (n-1)/n for  n <= 7 which  can be seen to  be (and have
        # been manually verified to be) correct.
        self.assertDictEqual(vector_eval_json, exp_vector_eval_json)
Ejemplo n.º 8
0
def is_label_item(item: Item) -> bool:
    """Resolve each extension schema into a dict, then check if it has the
    title of "Label Extension".
    """
    for ext_schema_uri in item.stac_extensions:
        schema = file_to_json(ext_schema_uri)
        if schema['title'].lower() == 'label extension':
            return True
    return False
Ejemplo n.º 9
0
def check_eval(test, tmp_dir):
    errors = []

    actual_eval_path = get_actual_eval_path(test, tmp_dir)
    expected_eval_path = get_expected_eval_path(test)

    if isfile(actual_eval_path):
        expected_eval = file_to_json(expected_eval_path)['overall']
        actual_eval = file_to_json(actual_eval_path)['overall']

        for expected_item in expected_eval:
            class_name = expected_item['class_name']
            actual_item = \
                next(filter(
                    lambda x: x['class_name'] == class_name, actual_eval))
            errors.extend(check_eval_item(test, expected_item, actual_item))
    else:
        errors.append(
            TestError(test, 'actual eval file does not exist',
                      actual_eval_path))

    return errors
Ejemplo n.º 10
0
    def from_model_bundle(model_bundle_uri: str, tmp_dir: str):
        """Create a Learner from a model bundle."""
        model_bundle_path = download_if_needed(model_bundle_uri, tmp_dir)
        model_bundle_dir = join(tmp_dir, 'model-bundle')
        unzip(model_bundle_path, model_bundle_dir)

        config_path = join(model_bundle_dir, 'pipeline-config.json')
        model_path = join(model_bundle_dir, 'model.pth')

        config_dict = file_to_json(config_path)
        config_dict = upgrade_config(config_dict)

        cfg = build_config(config_dict)
        return cfg.learner.build(tmp_dir, model_path=model_path)
Ejemplo n.º 11
0
def _get_run_df(run_dirs):
    # Combine options/hyperparams and metrics for a run into a df.
    dfs = []
    for run_dir in run_dirs:
        key = '-'.join(run_dir.split('/')[-2:])
        pipeline_cfg_uri = join(run_dir, 'pipeline-config.json')
        options = get_pipeline_options(key, pipeline_cfg_uri)
        metrics_uri = join(run_dir, 'train/test_metrics.json')
        metrics_dict = file_to_json(metrics_uri)
        df = pd.DataFrame()
        for ind, (key, val) in enumerate(options.items()):
            df.insert(ind, key, [val])
        df.insert(ind+1, 'building_f1', metrics_dict['building_f1'])
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)
Ejemplo n.º 12
0
def get_pipeline_options(key, pipeline_cfg_uri):
    """Returns a dict with the options/hyperparameters for a pipeline run."""
    pipeline_dict = file_to_json(pipeline_cfg_uri)
    solver = pipeline_dict['backend']['solver']
    data = pipeline_dict['backend']['data']

    num_epochs = solver['num_epochs']
    train_sz = data['train_sz_rel']

    opts = {
        'key': key,
        'num_epochs': num_epochs,
        'train_sz': train_sz,
    }
    return opts
Ejemplo n.º 13
0
def _run_command(cfg_json_uri: str,
                 command: str,
                 split_ind: Optional[int] = None,
                 num_splits: Optional[int] = None,
                 runner: Optional[str] = None):
    """Run a single command using a serialized PipelineConfig.

    Args:
        cfg_json_uri: URI of a JSON file with a serialized PipelineConfig
        command: name of command to run
        split_ind: the index that a split command should assume
        num_splits: the total number of splits to use
        runner: the name of the runner to use
    """
    pipeline_cfg_dict = file_to_json(cfg_json_uri)
    rv_config_dict = pipeline_cfg_dict.get('rv_config')
    rv_config.set_everett_config(profile=rv_config.profile,
                                 config_overrides=rv_config_dict)

    tmp_dir_obj = rv_config.get_tmp_dir()
    tmp_dir = tmp_dir_obj.name

    cfg = build_config(pipeline_cfg_dict)
    pipeline = cfg.build(tmp_dir)

    if num_splits is not None and split_ind is None and runner is not None:
        runner = registry.get_runner(runner)()
        split_ind = runner.get_split_ind()

    command_fn = getattr(pipeline, command)

    if num_splits is not None and num_splits > 1:
        msg = 'Running {} command split {}/{}...'.format(
            command, split_ind + 1, num_splits)
        click.secho(msg, fg='green', bold=True)
        command_fn(split_ind=split_ind, num_splits=num_splits)
    else:
        msg = 'Running {} command...'.format(command)
        click.secho(msg, fg='green', bold=True)
        command_fn()
Ejemplo n.º 14
0
def _collect(key, root_uri, output_dir, collect_dir, get_model_bundle=False):
    print('\nCollecting experiment {}...\n'.format(key))

    model_bundle_uri = join(root_uri, output_dir, 'bundle', 'model-bundle.zip')
    eval_uri = join(root_uri, output_dir, 'eval', 'eval.json')

    if not file_exists(eval_uri):
        print('Missing eval!')
        return

    if not file_exists(model_bundle_uri):
        print('Missing model bundle!')
        return

    make_dir(join(collect_dir, key))
    if get_model_bundle:
        download_or_copy(model_bundle_uri, join(collect_dir, key))

    download_or_copy(eval_uri, join(collect_dir, key))

    eval_json = file_to_json(join(collect_dir, key, 'eval.json'))
    pprint.pprint(eval_json['overall'], indent=4)
Ejemplo n.º 15
0
    def __init__(self, img_dir, annotation_uri, transform=None):
        self.img_dir = img_dir
        self.annotation_uri = annotation_uri
        self.transform = transform

        self.img_ids = []
        self.id2ann = {}
        ann_json = file_to_json(annotation_uri)

        for img in ann_json['images']:
            img_id = img['id']
            self.img_ids.append(img_id)
            self.id2ann[img_id] = {
                'image': img['file_name'],
                'bboxes': [],
                'category_id': []
            }
        for ann in ann_json['annotations']:
            img_id = ann['image_id']
            bboxes = self.id2ann[img_id]['bboxes']
            category_ids = self.id2ann[img_id]['category_id']
            bboxes.append(ann['bbox'])
            category_ids.append(ann['category_id'])
    def get_vector_scene(self, class_id, use_aoi=False):
        gt_uri = data_file_path('{}-gt-polygons.geojson'.format(class_id))
        pred_uri = data_file_path('{}-pred-polygons.geojson'.format(class_id))

        scene_id = str(class_id)
        rs = MockRasterSource(channel_order=[0, 1, 3], num_channels=3)
        rs.set_raster(np.zeros((10, 10, 3)))

        crs_transformer = IdentityCRSTransformer()
        extent = Box.make_square(0, 0, 360)

        config = RasterizedSourceConfig(
            vector_source=GeoJSONVectorSourceConfig(uri=gt_uri,
                                                    default_class_id=0),
            rasterizer_config=RasterizerConfig(background_class_id=1))
        gt_rs = config.build(self.class_config, crs_transformer, extent)
        gt_ls = SemanticSegmentationLabelSource(gt_rs, self.null_class_id)

        config = RasterizedSourceConfig(
            vector_source=GeoJSONVectorSourceConfig(uri=pred_uri,
                                                    default_class_id=0),
            rasterizer_config=RasterizerConfig(background_class_id=1))
        pred_rs = config.build(self.class_config, crs_transformer, extent)
        pred_ls = SemanticSegmentationLabelSource(pred_rs, self.null_class_id)
        pred_ls.vector_output = [
            PolygonVectorOutputConfig(uri=pred_uri,
                                      denoise=0,
                                      class_id=class_id)
        ]

        if use_aoi:
            aoi_uri = data_file_path('{}-aoi.geojson'.format(class_id))
            aoi_geojson = file_to_json(aoi_uri)
            aoi_polygons = [shape(aoi_geojson['features'][0]['geometry'])]
            return Scene(scene_id, rs, gt_ls, pred_ls, aoi_polygons)

        return Scene(scene_id, rs, gt_ls, pred_ls)