def load_experiment_from_directory(dir_experiment: str) -> Experiment: filepath_config = os.path.join(dir_experiment, configs.DEFAULT_FILENAME_CONFIG) assert os.path.exists( filepath_config), "Experiment directory must contain a config file." config = configs.create_config_from_file(filepath_config) config.model_training.dir_out = dir_experiment return Experiment(config)
def config_existing(tmp_path) -> configs.Config: config = configs.create_config_from_file( os.path.join(os.path.dirname(__file__), "test_config.yaml")) config.data_build.dir_out = str(tmp_path) config.model_training.dir_out = str(tmp_path) experiment = experiments.Experiment(config) experiment.build_or_load_model(num_features=DEFAULT_NUM_FEATURES) return config
def _create_unet_configs(print_size_estimate: bool = False) -> None: config_template = configs.create_config_from_file(_FILEPATH_TEMPLATE_UNET) # Please see previous versions of this file for other configs that have been tested window_radius = 128 loss_window_radius = 64 architecture_name = 'dense_unet' all_filters = (8, 16, 24, 32) use_growth = False all_block_structures = ( [4, 4], [4, 4, 4], [4, 4, 4, 4], [6, 6], [6, 6, 6], [6, 6, 6, 6], [8, 8], [8, 8, 8], [8, 8, 8, 8], ) created_build_only = False for block_structure in all_block_structures: for filters in all_filters: # Create new config config_template.data_build.window_radius = window_radius config_template.data_build.loss_window_radius = loss_window_radius config_template.model_training.architecture_name = architecture_name config_template.architecture.block_structure = block_structure config_template.architecture.filters = filters config_template.architecture.use_growth = use_growth # Test that models aren't too large -- hacky! if print_size_estimate and block_structure[0] == 8: from bfgn.experiments import experiments config_template.data_build.dir_out = '.' config_template.model_training.dir_out = '.' config_template.architecture.n_classes = 5 config_template.architecture.block_structure = tuple(block_structure) experiment = experiments.Experiment(config_template) experiment._build_new_model((2*window_radius, 2*window_radius, 5)) print('unet', window_radius, loss_window_radius, architecture_name, block_structure, filters) print(experiment.calculate_model_memory_footprint(config_template.data_samples.batch_size)) print() os.remove('config.yaml') os.remove('model.h5') os.remove('log.out') # Save config to file configs.save_config_to_file(config_template, paths.get_filepath_config_from_config(config_template)) # Save config for build only if not created_build_only: configs.save_config_to_file( config_template, paths.get_filepath_build_only_config_from_config(config_template)) created_build_only = True
def _save_new_config_or_assert_existing_config_matches(self) -> None: if not os.path.exists(self.filepath_config): configs.save_config_to_file(self.config, self.filepath_config) else: config_existing = configs.create_config_from_file( self.filepath_config) config_differences = configs.get_config_differences( self.config, config_existing) assert ( not config_differences ), "Provided configuration differs from existing configuration at {}; differing values: {}".format( self.filepath_config, config_differences)
def config_new(tmp_path) -> configs.Config: config = configs.create_config_from_file( os.path.join(os.path.dirname(__file__), "test_config.yaml")) config.data_build.dir_out = str(tmp_path) config.model_training.dir_out = str(tmp_path) return config
def _build_dynamic_config_for_uq_experiments( config_name: str, label_experiment: str, response_mapping: str) -> configs.Config: raise AssertionError( 'You need to modify the paths for the UQ experiments, like the MP experiment paths' ) response_mapping_classes = { 'lwr': 10, 'lwrn': 8, } assert response_mapping in response_mapping_classes, \ 'response_mapping is {} but must be one of: {}'.format(response_mapping, response_mapping_classes.keys()) filepaths_features = list() filepaths_responses = list() filepaths_boundaries = list() # Note that we need to use feature files multiple times in some cases. Feature files will always be associated with # reef training data, but may also be associated with land and/or water training data. Thus, feature files may be # used 1-3 times in the training data. # Note that we need to grab land and water from the "clean" training data directory, and we need to grab general # response data from the appropriate folder # Get supplemental land/water response data filenames = [ f for f in os.listdir(paths.DIR_DATA_TRAIN_CLEAN) if f.endswith('land.tif') or f.endswith('water.tif') ] for filename in filenames: filepath_responses = os.path.join(paths.DIR_DATA_TRAIN_CLEAN, filename) filepath_features = re.sub('_\w*.tif', '_features.tif', filepath_responses) filepath_boundaries = re.sub('.tif', '.shp', filepath_responses) assert os.path.exists( filepath_features), 'Features file not found: {}'.format( filepath_features) assert os.path.exists( filepath_boundaries), 'Boundaries file not found: {}'.format( filepath_boundaries) filepaths_features.append([filepath_features]) filepaths_responses.append([filepath_responses]) filepaths_boundaries.append(filepath_boundaries) # Get supplemental additional class response data filenames = [ f for f in os.listdir(paths.DIR_DATA_TRAIN_CLEAN) if f.endswith('_model_class.tif') ] for filename in filenames: filepath_responses = os.path.join(paths.DIR_DATA_TRAIN_CLEAN, filename) filepath_features = re.sub('_\w*.tif', '_features.tif', filepath_responses) filepath_boundaries = re.sub('.tif', 'boundaries.shp', filepath_responses) assert os.path.exists( filepath_features), 'Features file not found: {}'.format( filepath_features) assert os.path.exists( filepath_boundaries), 'Boundaries file not found: {}'.format( filepath_boundaries) filepaths_features.append([filepath_features]) filepaths_responses.append([filepath_responses]) filepaths_boundaries.append(filepath_boundaries) # Get UQ response data response_suffix = '_responses_{}b.tif'.format(response_mapping) dir_data = paths.get_dir_training_data_experiment( label_experiment=label_experiment) assert os.path.exists( dir_data ), 'Training data directory not found for label_experiment {}: {}'.format( label_experiment, dir_data) for filename in os.listdir(dir_data): if not filename.endswith(response_suffix): continue filepath_responses = os.path.join(dir_data, filename) filepath_features = re.sub(response_suffix, '_features.tif', filepath_responses) filename_boundaries = re.sub(response_suffix, '_boundaries.shp', filename) filepath_boundaries = os.path.join(paths.DIR_DATA_TRAIN_CLEAN, filename_boundaries) assert os.path.exists( filepath_features), 'Features file not found: {}'.format( filepath_features) assert os.path.exists( filepath_boundaries), 'Boundaries file not found: {}'.format( filepath_boundaries) filepaths_features.append([filepath_features]) filepaths_responses.append([filepath_responses]) filepaths_boundaries.append(filepath_boundaries) # Parse config and update dynamic values config = configs.create_config_from_file( paths.get_filepath_config(config_name)) config.raw_files.feature_files = filepaths_features config.raw_files.response_files = filepaths_responses config.raw_files.boundary_files = filepaths_boundaries config.data_build.dir_out = paths.get_dir_built_data_experiment( label_experiment=label_experiment, response_mapping=response_mapping, config=config) config.model_training.dir_out = paths.get_dir_model_experiment_config( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) config.architecture.n_classes = response_mapping_classes[response_mapping] return config
def _build_dynamic_config_for_mp_experiments( config_name: str, label_experiment: str, response_mapping: str) -> configs.Config: response_mapping_classes = {'custom': 14} assert response_mapping in response_mapping_classes, \ 'response_mapping is {} but must be one of: {}'.format(response_mapping, response_mapping_classes) config_features = list() config_responses = list() config_boundaries = list() # Set source directories if '25' in label_experiment: dir_features = os.path.join( paths.DIR_DATA_TRAIN_FEATURES, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25')) dir_responses = os.path.join( paths.DIR_DATA_TRAIN_MP, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25')) dir_supps = os.path.join( paths.DIR_DATA_TRAIN_MP_SUPP, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25')) suffix = '_25' elif '50' in label_experiment: dir_features = os.path.join( paths.DIR_DATA_TRAIN_FEATURES, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50')) dir_responses = os.path.join( paths.DIR_DATA_TRAIN_MP, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50')) dir_supps = os.path.join( paths.DIR_DATA_TRAIN_MP_SUPP, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50')) suffix = '_50' else: dir_features = paths.DIR_DATA_TRAIN_FEATURES_CLEAN dir_responses = paths.DIR_DATA_TRAIN_MP_CLEAN dir_supps = paths.DIR_DATA_TRAIN_MP_SUPP_CLEAN suffix = '' dir_boundaries = paths.DIR_DATA_TRAIN_MP_BOUNDS # Get feature/response/boundary sets missing_features = list() missing_boundaries = list() filepaths_responses = sorted([ os.path.join(dir_responses, filename) for filename in os.listdir(dir_responses) if filename.endswith('responses_custom{}.tif'.format(suffix)) ]) for filepath_response in filepaths_responses: quad_name = re.search('L15-\d{4}E-\d{4}N', filepath_response).group() filepath_feature = os.path.join( dir_features, quad_name + '_features{}.tif'.format(suffix)) filepath_boundary = os.path.join(dir_boundaries, quad_name + '_boundaries.shp') if not os.path.exists(filepath_feature): missing_features.append(filepath_feature) if not os.path.exists(filepath_boundary): missing_boundaries.append(filepath_boundary) config_features.append([filepath_feature]) config_responses.append([filepath_response]) config_boundaries.append(filepath_boundary) # Get supplemental sets filepaths_responses = sorted([ os.path.join(dir_supps, filename) for filename in os.listdir(dir_supps) ]) for filepath_response in filepaths_responses: quad_name = re.search('L15-\d{4}E-\d{4}N', filepath_response).group() filepath_feature = os.path.join( dir_features, quad_name + '_features{}.tif'.format(suffix)) if not os.path.exists(filepath_feature): missing_features.append(filepath_feature) config_features.append([filepath_feature]) config_responses.append([filepath_response]) config_boundaries.append(None) assert not missing_features and not missing_boundaries, \ 'Missing feature and boundary files: \n\n{} \n\n{}'.format(missing_features, missing_boundaries) # Parse config and update dynamic values config = configs.create_config_from_file( paths.get_filepath_config(config_name)) config.raw_files.feature_files = config_features config.raw_files.response_files = config_responses config.raw_files.boundary_files = config_boundaries config.data_build.dir_out = paths.get_dir_built_data_experiment( label_experiment=label_experiment, response_mapping=response_mapping, config=config) config.model_training.dir_out = paths.get_dir_model_experiment_config( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) config.architecture.n_classes = response_mapping_classes[response_mapping] return config