Example #1
0
def load_experiment_from_directory(dir_experiment: str) -> Experiment:
    filepath_config = os.path.join(dir_experiment,
                                   configs.DEFAULT_FILENAME_CONFIG)
    assert os.path.exists(
        filepath_config), "Experiment directory must contain a config file."
    config = configs.create_config_from_file(filepath_config)
    config.model_training.dir_out = dir_experiment
    return Experiment(config)
Example #2
0
def config_existing(tmp_path) -> configs.Config:
    config = configs.create_config_from_file(
        os.path.join(os.path.dirname(__file__), "test_config.yaml"))
    config.data_build.dir_out = str(tmp_path)
    config.model_training.dir_out = str(tmp_path)
    experiment = experiments.Experiment(config)
    experiment.build_or_load_model(num_features=DEFAULT_NUM_FEATURES)
    return config
Example #3
0
def _create_unet_configs(print_size_estimate: bool = False) -> None:
    config_template = configs.create_config_from_file(_FILEPATH_TEMPLATE_UNET)

    # Please see previous versions of this file for other configs that have been tested
    window_radius = 128
    loss_window_radius = 64
    architecture_name = 'dense_unet'
    all_filters = (8, 16, 24, 32)
    use_growth = False
    all_block_structures = (
        [4, 4],
        [4, 4, 4],
        [4, 4, 4, 4],
        [6, 6],
        [6, 6, 6],
        [6, 6, 6, 6],
        [8, 8],
        [8, 8, 8],
        [8, 8, 8, 8],
    )
    created_build_only = False
    for block_structure in all_block_structures:
        for filters in all_filters:
            # Create new config
            config_template.data_build.window_radius = window_radius
            config_template.data_build.loss_window_radius = loss_window_radius
            config_template.model_training.architecture_name = architecture_name
            config_template.architecture.block_structure = block_structure
            config_template.architecture.filters = filters
            config_template.architecture.use_growth = use_growth

            # Test that models aren't too large -- hacky!
            if print_size_estimate and block_structure[0] == 8:
                from bfgn.experiments import experiments
                config_template.data_build.dir_out = '.'
                config_template.model_training.dir_out = '.'
                config_template.architecture.n_classes = 5
                config_template.architecture.block_structure = tuple(block_structure)
                experiment = experiments.Experiment(config_template)
                experiment._build_new_model((2*window_radius, 2*window_radius, 5))
                print('unet', window_radius, loss_window_radius, architecture_name, block_structure, filters)
                print(experiment.calculate_model_memory_footprint(config_template.data_samples.batch_size))
                print()
                os.remove('config.yaml')
                os.remove('model.h5')
                os.remove('log.out')

            # Save config to file
            configs.save_config_to_file(config_template, paths.get_filepath_config_from_config(config_template))

            # Save config for build only
            if not created_build_only:
                configs.save_config_to_file(
                    config_template, paths.get_filepath_build_only_config_from_config(config_template))
                created_build_only = True
Example #4
0
 def _save_new_config_or_assert_existing_config_matches(self) -> None:
     if not os.path.exists(self.filepath_config):
         configs.save_config_to_file(self.config, self.filepath_config)
     else:
         config_existing = configs.create_config_from_file(
             self.filepath_config)
         config_differences = configs.get_config_differences(
             self.config, config_existing)
         assert (
             not config_differences
         ), "Provided configuration differs from existing configuration at {}; differing values: {}".format(
             self.filepath_config, config_differences)
Example #5
0
def config_new(tmp_path) -> configs.Config:
    config = configs.create_config_from_file(
        os.path.join(os.path.dirname(__file__), "test_config.yaml"))
    config.data_build.dir_out = str(tmp_path)
    config.model_training.dir_out = str(tmp_path)
    return config
def _build_dynamic_config_for_uq_experiments(
        config_name: str, label_experiment: str,
        response_mapping: str) -> configs.Config:
    raise AssertionError(
        'You need to modify the paths for the UQ experiments, like the MP experiment paths'
    )
    response_mapping_classes = {
        'lwr': 10,
        'lwrn': 8,
    }
    assert response_mapping in response_mapping_classes, \
        'response_mapping is {} but must be one of:  {}'.format(response_mapping, response_mapping_classes.keys())

    filepaths_features = list()
    filepaths_responses = list()
    filepaths_boundaries = list()

    # Note that we need to use feature files multiple times in some cases. Feature files will always be associated with
    # reef training data, but may also be associated with land and/or water training data. Thus, feature files may be
    # used 1-3 times in the training data.
    # Note that we need to grab land and water from the "clean" training data directory, and we need to grab general
    # response data from the appropriate folder

    # Get supplemental land/water response data
    filenames = [
        f for f in os.listdir(paths.DIR_DATA_TRAIN_CLEAN)
        if f.endswith('land.tif') or f.endswith('water.tif')
    ]
    for filename in filenames:
        filepath_responses = os.path.join(paths.DIR_DATA_TRAIN_CLEAN, filename)
        filepath_features = re.sub('_\w*.tif', '_features.tif',
                                   filepath_responses)
        filepath_boundaries = re.sub('.tif', '.shp', filepath_responses)
        assert os.path.exists(
            filepath_features), 'Features file not found:  {}'.format(
                filepath_features)
        assert os.path.exists(
            filepath_boundaries), 'Boundaries file not found:  {}'.format(
                filepath_boundaries)
        filepaths_features.append([filepath_features])
        filepaths_responses.append([filepath_responses])
        filepaths_boundaries.append(filepath_boundaries)

    # Get supplemental additional class response data
    filenames = [
        f for f in os.listdir(paths.DIR_DATA_TRAIN_CLEAN)
        if f.endswith('_model_class.tif')
    ]
    for filename in filenames:
        filepath_responses = os.path.join(paths.DIR_DATA_TRAIN_CLEAN, filename)
        filepath_features = re.sub('_\w*.tif', '_features.tif',
                                   filepath_responses)
        filepath_boundaries = re.sub('.tif', 'boundaries.shp',
                                     filepath_responses)
        assert os.path.exists(
            filepath_features), 'Features file not found:  {}'.format(
                filepath_features)
        assert os.path.exists(
            filepath_boundaries), 'Boundaries file not found:  {}'.format(
                filepath_boundaries)
        filepaths_features.append([filepath_features])
        filepaths_responses.append([filepath_responses])
        filepaths_boundaries.append(filepath_boundaries)

    # Get UQ response data
    response_suffix = '_responses_{}b.tif'.format(response_mapping)
    dir_data = paths.get_dir_training_data_experiment(
        label_experiment=label_experiment)
    assert os.path.exists(
        dir_data
    ), 'Training data directory not found for label_experiment {}:  {}'.format(
        label_experiment, dir_data)
    for filename in os.listdir(dir_data):
        if not filename.endswith(response_suffix):
            continue
        filepath_responses = os.path.join(dir_data, filename)
        filepath_features = re.sub(response_suffix, '_features.tif',
                                   filepath_responses)
        filename_boundaries = re.sub(response_suffix, '_boundaries.shp',
                                     filename)
        filepath_boundaries = os.path.join(paths.DIR_DATA_TRAIN_CLEAN,
                                           filename_boundaries)
        assert os.path.exists(
            filepath_features), 'Features file not found:  {}'.format(
                filepath_features)
        assert os.path.exists(
            filepath_boundaries), 'Boundaries file not found:  {}'.format(
                filepath_boundaries)

        filepaths_features.append([filepath_features])
        filepaths_responses.append([filepath_responses])
        filepaths_boundaries.append(filepath_boundaries)

    # Parse config and update dynamic values
    config = configs.create_config_from_file(
        paths.get_filepath_config(config_name))
    config.raw_files.feature_files = filepaths_features
    config.raw_files.response_files = filepaths_responses
    config.raw_files.boundary_files = filepaths_boundaries
    config.data_build.dir_out = paths.get_dir_built_data_experiment(
        label_experiment=label_experiment,
        response_mapping=response_mapping,
        config=config)
    config.model_training.dir_out = paths.get_dir_model_experiment_config(
        config_name=config_name,
        label_experiment=label_experiment,
        response_mapping=response_mapping)
    config.architecture.n_classes = response_mapping_classes[response_mapping]
    return config
def _build_dynamic_config_for_mp_experiments(
        config_name: str, label_experiment: str,
        response_mapping: str) -> configs.Config:
    response_mapping_classes = {'custom': 14}
    assert response_mapping in response_mapping_classes, \
        'response_mapping is {} but must be one of:  {}'.format(response_mapping, response_mapping_classes)

    config_features = list()
    config_responses = list()
    config_boundaries = list()

    # Set source directories
    if '25' in label_experiment:
        dir_features = os.path.join(
            paths.DIR_DATA_TRAIN_FEATURES,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25'))
        dir_responses = os.path.join(
            paths.DIR_DATA_TRAIN_MP,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25'))
        dir_supps = os.path.join(
            paths.DIR_DATA_TRAIN_MP_SUPP,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25'))
        suffix = '_25'
    elif '50' in label_experiment:
        dir_features = os.path.join(
            paths.DIR_DATA_TRAIN_FEATURES,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50'))
        dir_responses = os.path.join(
            paths.DIR_DATA_TRAIN_MP,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50'))
        dir_supps = os.path.join(
            paths.DIR_DATA_TRAIN_MP_SUPP,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50'))
        suffix = '_50'
    else:
        dir_features = paths.DIR_DATA_TRAIN_FEATURES_CLEAN
        dir_responses = paths.DIR_DATA_TRAIN_MP_CLEAN
        dir_supps = paths.DIR_DATA_TRAIN_MP_SUPP_CLEAN
        suffix = ''
    dir_boundaries = paths.DIR_DATA_TRAIN_MP_BOUNDS

    # Get feature/response/boundary sets
    missing_features = list()
    missing_boundaries = list()
    filepaths_responses = sorted([
        os.path.join(dir_responses, filename)
        for filename in os.listdir(dir_responses)
        if filename.endswith('responses_custom{}.tif'.format(suffix))
    ])
    for filepath_response in filepaths_responses:
        quad_name = re.search('L15-\d{4}E-\d{4}N', filepath_response).group()
        filepath_feature = os.path.join(
            dir_features, quad_name + '_features{}.tif'.format(suffix))
        filepath_boundary = os.path.join(dir_boundaries,
                                         quad_name + '_boundaries.shp')
        if not os.path.exists(filepath_feature):
            missing_features.append(filepath_feature)
        if not os.path.exists(filepath_boundary):
            missing_boundaries.append(filepath_boundary)

        config_features.append([filepath_feature])
        config_responses.append([filepath_response])
        config_boundaries.append(filepath_boundary)

    # Get supplemental sets
    filepaths_responses = sorted([
        os.path.join(dir_supps, filename) for filename in os.listdir(dir_supps)
    ])
    for filepath_response in filepaths_responses:
        quad_name = re.search('L15-\d{4}E-\d{4}N', filepath_response).group()
        filepath_feature = os.path.join(
            dir_features, quad_name + '_features{}.tif'.format(suffix))
        if not os.path.exists(filepath_feature):
            missing_features.append(filepath_feature)

        config_features.append([filepath_feature])
        config_responses.append([filepath_response])
        config_boundaries.append(None)

    assert not missing_features and not missing_boundaries, \
        'Missing feature and boundary files:  \n\n{} \n\n{}'.format(missing_features, missing_boundaries)

    # Parse config and update dynamic values
    config = configs.create_config_from_file(
        paths.get_filepath_config(config_name))
    config.raw_files.feature_files = config_features
    config.raw_files.response_files = config_responses
    config.raw_files.boundary_files = config_boundaries
    config.data_build.dir_out = paths.get_dir_built_data_experiment(
        label_experiment=label_experiment,
        response_mapping=response_mapping,
        config=config)
    config.model_training.dir_out = paths.get_dir_model_experiment_config(
        config_name=config_name,
        label_experiment=label_experiment,
        response_mapping=response_mapping)
    config.architecture.n_classes = response_mapping_classes[response_mapping]
    return config