def submit_validation_slurm( labels_experiments: str, response_mappings: str, config_names: str = None, config_regex: str = None, run_all: bool = False ) -> None: # Get config filenames if config_names: config_names = config_names.split(',') filename_configs = shared_submit_slurm.get_relevant_config_filenames( config_names=config_names, build_only=False, config_regex=config_regex) # Loop through configs and submit jobs for filename_config in filename_configs: for label_experiment in labels_experiments.split(','): for response_mapping in response_mappings.split(','): shared_submit_slurm.validate_label_experiment(label_experiment) shared_submit_slurm.validate_response_mapping(response_mapping) config_name = os.path.splitext(filename_config)[0] job_name = shared_submit_slurm.get_validation_job_name( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) # Do not submit jobs that do not have trained models or are already complete filepath_classify = paths.get_filepath_classify_complete( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) filepath_validate = paths.get_filepath_validation_complete( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) if not os.path.exists(filepath_classify): print('Classification not complete: {} {}'.format(config_name, response_mapping)) continue if os.path.exists(filepath_validate): print('Validation complete: {} {}'.format(config_name, response_mapping)) continue # Set dynamic SLURM arguments dir_model = paths.get_dir_model_experiment_config( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) slurm_args_dynamic = ' '.join([ shared_submit_slurm.SLURM_GPUS, '--job-name={}'.format(job_name), '--output={}/slurm.validate.%j.%t.OUT'.format(dir_model), '--error={}/slurm.validate.%j.%t.ERROR'.format(dir_model), ]) # Set dynamic python arguments dir_working = os.path.dirname(os.path.abspath(__file__)) slurm_python_wrap = SLURM_COMMAND_VALIDATE.format( dir_working=dir_working, config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping, run_all='--run_all' if run_all else '' ) print('Submitting job {}'.format(job_name)) command = ' '.join([shared_submit_slurm.SLURM_COMMAND, slurm_args_dynamic, slurm_python_wrap]) # print(command) subprocess.call(command, shell=True)
def get_model_logger(logger_name: str, config_name: str, label_experiment: str, response_mapping: str) -> logging.Logger: log_out = os.path.join( paths.get_dir_model_experiment_config(config_name, label_experiment, response_mapping), logger_name) if not log_out.endswith('.log'): log_out += '.log' if not os.path.exists(os.path.dirname(log_out)): os.makedirs(os.path.dirname(log_out)) logger = logging.getLogger() logger.setLevel(logging.DEBUG) formatter = logging.Formatter( fmt= '%(asctime)s - %(processName)s - %(name)s - %(levelname)s - %(message)s' ) handler = logging.FileHandler(log_out) handler.setFormatter(formatter) logger.addHandler(handler) return logger
def submit_classification_slurm(labels_experiments: str, response_mappings: str, config_names: str = None, config_regex: str = None, build_only: bool = False, run_all: bool = False) -> None: # Warning about usage and error checks if build_only and config_names: print( 'WARNING: build_only takes precedence over config_names, which is ignored' ) config_names = None if config_names: config_names = config_names.split(',') filename_configs = shared_submit_slurm.get_relevant_config_filenames( config_names, build_only, config_regex) # Loop through configs and submit jobs for filename_config in filename_configs: for label_experiment in labels_experiments.split(','): for response_mapping in response_mappings.split(','): shared_submit_slurm.validate_label_experiment(label_experiment) shared_submit_slurm.validate_response_mapping(response_mapping) config_name = os.path.splitext(filename_config)[0] job_name = shared_submit_slurm.get_classify_job_name( config_name, label_experiment, response_mapping) # Create model directory dir_model = paths.get_dir_model_experiment_config( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) if not os.path.exists(dir_model): os.makedirs(dir_model) # Do not submit if classification is locked or complete, or if data is built and build_only is True filepath_built = paths.get_filepath_build_complete( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) filepath_complete = paths.get_filepath_classify_complete( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) filepath_lock = paths.get_filepath_classify_lock( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) command = 'squeue -u nfabina -o %j' result = subprocess.run(shlex.split(command), capture_output=True) is_in_job_queue = job_name in result.stdout.decode('utf-8') if not is_in_job_queue and os.path.exists(filepath_lock): os.remove(filepath_lock) if is_in_job_queue: print('Classification in progress: {} {} {}'.format( config_name, label_experiment, response_mapping)) continue elif os.path.exists(filepath_complete): print('Classification complete: {} {} {}'.format( config_name, label_experiment, response_mapping)) continue elif os.path.exists(filepath_built) and build_only: print('Data build complete: {} {} {}'.format( config_name, label_experiment, response_mapping)) continue # Set dynamic SLURM arguments slurm_args_dynamic = ' '.join([ '' if build_only else shared_submit_slurm.SLURM_GPUS_LARGE, '--job-name={}'.format(job_name), '--output={}/slurm.classify.%j.%t.OUT'.format(dir_model), '--error={}/slurm.classify.%j.%t.ERROR'.format(dir_model), ]) # Set dynamic python arguments dir_working = os.path.dirname(os.path.abspath(__file__)) slurm_python_wrap = SLURM_COMMAND_CLASSIFY.format( mail_end='END,' if build_only else '', config_name=config_name, dir_working=dir_working, label_experiment=label_experiment, response_mapping=response_mapping, build_only='--build_only' if build_only else '', run_all='--run_all' if run_all else '') print('Submitting job {}'.format(job_name)) command = ' '.join([ shared_submit_slurm.SLURM_COMMAND, slurm_args_dynamic, slurm_python_wrap ]) # print(command) subprocess.call(command, shell=True)
'--label_experiment={label_experiment} --response_mapping={response_mapping} --model_version={model_version}"' if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--config_name', required=True) parser.add_argument('--label_experiment', required=True) parser.add_argument('--response_mapping', required=True) parser.add_argument('--model_version', required=True) parser.add_argument('--num_jobs', type=int, required=True) args = parser.parse_args() # Prep for calling jobs shared_submit_slurm.validate_label_experiment(args.label_experiment) shared_submit_slurm.validate_response_mapping(args.response_mapping) dir_model = paths.get_dir_model_experiment_config( config_name=args.config_name, label_experiment=args.label_experiment, response_mapping=args.response_mapping) slurm_python_wrap = SLURM_COMMAND_APPLY.format( config_name=args.config_name, label_experiment=args.label_experiment, response_mapping=args.response_mapping, model_version=args.model_version) for idx_job in range(args.num_jobs): job_name = shared_submit_slurm.get_global_apply_job_name( config_name=args.config_name, label_experiment=args.label_experiment, response_mapping=args.response_mapping) slurm_args_dynamic = ' '.join([ shared_submit_slurm.SLURM_GPUS, '--job-name={}'.format(job_name),
def _build_dynamic_config_for_uq_experiments( config_name: str, label_experiment: str, response_mapping: str) -> configs.Config: raise AssertionError( 'You need to modify the paths for the UQ experiments, like the MP experiment paths' ) response_mapping_classes = { 'lwr': 10, 'lwrn': 8, } assert response_mapping in response_mapping_classes, \ 'response_mapping is {} but must be one of: {}'.format(response_mapping, response_mapping_classes.keys()) filepaths_features = list() filepaths_responses = list() filepaths_boundaries = list() # Note that we need to use feature files multiple times in some cases. Feature files will always be associated with # reef training data, but may also be associated with land and/or water training data. Thus, feature files may be # used 1-3 times in the training data. # Note that we need to grab land and water from the "clean" training data directory, and we need to grab general # response data from the appropriate folder # Get supplemental land/water response data filenames = [ f for f in os.listdir(paths.DIR_DATA_TRAIN_CLEAN) if f.endswith('land.tif') or f.endswith('water.tif') ] for filename in filenames: filepath_responses = os.path.join(paths.DIR_DATA_TRAIN_CLEAN, filename) filepath_features = re.sub('_\w*.tif', '_features.tif', filepath_responses) filepath_boundaries = re.sub('.tif', '.shp', filepath_responses) assert os.path.exists( filepath_features), 'Features file not found: {}'.format( filepath_features) assert os.path.exists( filepath_boundaries), 'Boundaries file not found: {}'.format( filepath_boundaries) filepaths_features.append([filepath_features]) filepaths_responses.append([filepath_responses]) filepaths_boundaries.append(filepath_boundaries) # Get supplemental additional class response data filenames = [ f for f in os.listdir(paths.DIR_DATA_TRAIN_CLEAN) if f.endswith('_model_class.tif') ] for filename in filenames: filepath_responses = os.path.join(paths.DIR_DATA_TRAIN_CLEAN, filename) filepath_features = re.sub('_\w*.tif', '_features.tif', filepath_responses) filepath_boundaries = re.sub('.tif', 'boundaries.shp', filepath_responses) assert os.path.exists( filepath_features), 'Features file not found: {}'.format( filepath_features) assert os.path.exists( filepath_boundaries), 'Boundaries file not found: {}'.format( filepath_boundaries) filepaths_features.append([filepath_features]) filepaths_responses.append([filepath_responses]) filepaths_boundaries.append(filepath_boundaries) # Get UQ response data response_suffix = '_responses_{}b.tif'.format(response_mapping) dir_data = paths.get_dir_training_data_experiment( label_experiment=label_experiment) assert os.path.exists( dir_data ), 'Training data directory not found for label_experiment {}: {}'.format( label_experiment, dir_data) for filename in os.listdir(dir_data): if not filename.endswith(response_suffix): continue filepath_responses = os.path.join(dir_data, filename) filepath_features = re.sub(response_suffix, '_features.tif', filepath_responses) filename_boundaries = re.sub(response_suffix, '_boundaries.shp', filename) filepath_boundaries = os.path.join(paths.DIR_DATA_TRAIN_CLEAN, filename_boundaries) assert os.path.exists( filepath_features), 'Features file not found: {}'.format( filepath_features) assert os.path.exists( filepath_boundaries), 'Boundaries file not found: {}'.format( filepath_boundaries) filepaths_features.append([filepath_features]) filepaths_responses.append([filepath_responses]) filepaths_boundaries.append(filepath_boundaries) # Parse config and update dynamic values config = configs.create_config_from_file( paths.get_filepath_config(config_name)) config.raw_files.feature_files = filepaths_features config.raw_files.response_files = filepaths_responses config.raw_files.boundary_files = filepaths_boundaries config.data_build.dir_out = paths.get_dir_built_data_experiment( label_experiment=label_experiment, response_mapping=response_mapping, config=config) config.model_training.dir_out = paths.get_dir_model_experiment_config( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) config.architecture.n_classes = response_mapping_classes[response_mapping] return config
def _build_dynamic_config_for_mp_experiments( config_name: str, label_experiment: str, response_mapping: str) -> configs.Config: response_mapping_classes = {'custom': 14} assert response_mapping in response_mapping_classes, \ 'response_mapping is {} but must be one of: {}'.format(response_mapping, response_mapping_classes) config_features = list() config_responses = list() config_boundaries = list() # Set source directories if '25' in label_experiment: dir_features = os.path.join( paths.DIR_DATA_TRAIN_FEATURES, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25')) dir_responses = os.path.join( paths.DIR_DATA_TRAIN_MP, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25')) dir_supps = os.path.join( paths.DIR_DATA_TRAIN_MP_SUPP, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25')) suffix = '_25' elif '50' in label_experiment: dir_features = os.path.join( paths.DIR_DATA_TRAIN_FEATURES, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50')) dir_responses = os.path.join( paths.DIR_DATA_TRAIN_MP, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50')) dir_supps = os.path.join( paths.DIR_DATA_TRAIN_MP_SUPP, paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50')) suffix = '_50' else: dir_features = paths.DIR_DATA_TRAIN_FEATURES_CLEAN dir_responses = paths.DIR_DATA_TRAIN_MP_CLEAN dir_supps = paths.DIR_DATA_TRAIN_MP_SUPP_CLEAN suffix = '' dir_boundaries = paths.DIR_DATA_TRAIN_MP_BOUNDS # Get feature/response/boundary sets missing_features = list() missing_boundaries = list() filepaths_responses = sorted([ os.path.join(dir_responses, filename) for filename in os.listdir(dir_responses) if filename.endswith('responses_custom{}.tif'.format(suffix)) ]) for filepath_response in filepaths_responses: quad_name = re.search('L15-\d{4}E-\d{4}N', filepath_response).group() filepath_feature = os.path.join( dir_features, quad_name + '_features{}.tif'.format(suffix)) filepath_boundary = os.path.join(dir_boundaries, quad_name + '_boundaries.shp') if not os.path.exists(filepath_feature): missing_features.append(filepath_feature) if not os.path.exists(filepath_boundary): missing_boundaries.append(filepath_boundary) config_features.append([filepath_feature]) config_responses.append([filepath_response]) config_boundaries.append(filepath_boundary) # Get supplemental sets filepaths_responses = sorted([ os.path.join(dir_supps, filename) for filename in os.listdir(dir_supps) ]) for filepath_response in filepaths_responses: quad_name = re.search('L15-\d{4}E-\d{4}N', filepath_response).group() filepath_feature = os.path.join( dir_features, quad_name + '_features{}.tif'.format(suffix)) if not os.path.exists(filepath_feature): missing_features.append(filepath_feature) config_features.append([filepath_feature]) config_responses.append([filepath_response]) config_boundaries.append(None) assert not missing_features and not missing_boundaries, \ 'Missing feature and boundary files: \n\n{} \n\n{}'.format(missing_features, missing_boundaries) # Parse config and update dynamic values config = configs.create_config_from_file( paths.get_filepath_config(config_name)) config.raw_files.feature_files = config_features config.raw_files.response_files = config_responses config.raw_files.boundary_files = config_boundaries config.data_build.dir_out = paths.get_dir_built_data_experiment( label_experiment=label_experiment, response_mapping=response_mapping, config=config) config.model_training.dir_out = paths.get_dir_model_experiment_config( config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping) config.architecture.n_classes = response_mapping_classes[response_mapping] return config