예제 #1
0
def submit_validation_slurm(
        labels_experiments: str,
        response_mappings: str,
        config_names: str = None,
        config_regex: str = None,
        run_all: bool = False
) -> None:
    # Get config filenames
    if config_names:
        config_names = config_names.split(',')
    filename_configs = shared_submit_slurm.get_relevant_config_filenames(
        config_names=config_names, build_only=False, config_regex=config_regex)

    # Loop through configs and submit jobs
    for filename_config in filename_configs:
        for label_experiment in labels_experiments.split(','):
            for response_mapping in response_mappings.split(','):
                shared_submit_slurm.validate_label_experiment(label_experiment)
                shared_submit_slurm.validate_response_mapping(response_mapping)

                config_name = os.path.splitext(filename_config)[0]
                job_name = shared_submit_slurm.get_validation_job_name(
                    config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping)

                # Do not submit jobs that do not have trained models or are already complete
                filepath_classify = paths.get_filepath_classify_complete(
                    config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping)
                filepath_validate = paths.get_filepath_validation_complete(
                    config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping)
                if not os.path.exists(filepath_classify):
                    print('Classification not complete:  {} {}'.format(config_name, response_mapping))
                    continue
                if os.path.exists(filepath_validate):
                    print('Validation complete:  {} {}'.format(config_name, response_mapping))
                    continue

                # Set dynamic SLURM arguments
                dir_model = paths.get_dir_model_experiment_config(
                    config_name=config_name, label_experiment=label_experiment, response_mapping=response_mapping)
                slurm_args_dynamic = ' '.join([
                    shared_submit_slurm.SLURM_GPUS,
                    '--job-name={}'.format(job_name),
                    '--output={}/slurm.validate.%j.%t.OUT'.format(dir_model),
                    '--error={}/slurm.validate.%j.%t.ERROR'.format(dir_model),
                ])

                # Set dynamic python arguments
                dir_working = os.path.dirname(os.path.abspath(__file__))
                slurm_python_wrap = SLURM_COMMAND_VALIDATE.format(
                    dir_working=dir_working, config_name=config_name, label_experiment=label_experiment,
                    response_mapping=response_mapping, run_all='--run_all' if run_all else ''
                )
                print('Submitting job {}'.format(job_name))
                command = ' '.join([shared_submit_slurm.SLURM_COMMAND, slurm_args_dynamic, slurm_python_wrap])
                # print(command)
                subprocess.call(command, shell=True)
예제 #2
0
def get_model_logger(logger_name: str, config_name: str, label_experiment: str,
                     response_mapping: str) -> logging.Logger:
    log_out = os.path.join(
        paths.get_dir_model_experiment_config(config_name, label_experiment,
                                              response_mapping), logger_name)
    if not log_out.endswith('.log'):
        log_out += '.log'
    if not os.path.exists(os.path.dirname(log_out)):
        os.makedirs(os.path.dirname(log_out))
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        fmt=
        '%(asctime)s - %(processName)s - %(name)s - %(levelname)s - %(message)s'
    )
    handler = logging.FileHandler(log_out)
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    return logger
def submit_classification_slurm(labels_experiments: str,
                                response_mappings: str,
                                config_names: str = None,
                                config_regex: str = None,
                                build_only: bool = False,
                                run_all: bool = False) -> None:
    # Warning about usage and error checks
    if build_only and config_names:
        print(
            'WARNING:  build_only takes precedence over config_names, which is ignored'
        )

    config_names = None
    if config_names:
        config_names = config_names.split(',')
    filename_configs = shared_submit_slurm.get_relevant_config_filenames(
        config_names, build_only, config_regex)

    # Loop through configs and submit jobs
    for filename_config in filename_configs:
        for label_experiment in labels_experiments.split(','):
            for response_mapping in response_mappings.split(','):
                shared_submit_slurm.validate_label_experiment(label_experiment)
                shared_submit_slurm.validate_response_mapping(response_mapping)

                config_name = os.path.splitext(filename_config)[0]
                job_name = shared_submit_slurm.get_classify_job_name(
                    config_name, label_experiment, response_mapping)

                # Create model directory
                dir_model = paths.get_dir_model_experiment_config(
                    config_name=config_name,
                    label_experiment=label_experiment,
                    response_mapping=response_mapping)
                if not os.path.exists(dir_model):
                    os.makedirs(dir_model)

                # Do not submit if classification is locked or complete, or if data is built and build_only is True
                filepath_built = paths.get_filepath_build_complete(
                    config_name=config_name,
                    label_experiment=label_experiment,
                    response_mapping=response_mapping)
                filepath_complete = paths.get_filepath_classify_complete(
                    config_name=config_name,
                    label_experiment=label_experiment,
                    response_mapping=response_mapping)
                filepath_lock = paths.get_filepath_classify_lock(
                    config_name=config_name,
                    label_experiment=label_experiment,
                    response_mapping=response_mapping)
                command = 'squeue -u nfabina -o %j'
                result = subprocess.run(shlex.split(command),
                                        capture_output=True)
                is_in_job_queue = job_name in result.stdout.decode('utf-8')
                if not is_in_job_queue and os.path.exists(filepath_lock):
                    os.remove(filepath_lock)
                if is_in_job_queue:
                    print('Classification in progress:  {} {} {}'.format(
                        config_name, label_experiment, response_mapping))
                    continue
                elif os.path.exists(filepath_complete):
                    print('Classification complete:  {} {} {}'.format(
                        config_name, label_experiment, response_mapping))
                    continue
                elif os.path.exists(filepath_built) and build_only:
                    print('Data build complete:  {} {} {}'.format(
                        config_name, label_experiment, response_mapping))
                    continue

                # Set dynamic SLURM arguments
                slurm_args_dynamic = ' '.join([
                    '' if build_only else shared_submit_slurm.SLURM_GPUS_LARGE,
                    '--job-name={}'.format(job_name),
                    '--output={}/slurm.classify.%j.%t.OUT'.format(dir_model),
                    '--error={}/slurm.classify.%j.%t.ERROR'.format(dir_model),
                ])
                # Set dynamic python arguments
                dir_working = os.path.dirname(os.path.abspath(__file__))
                slurm_python_wrap = SLURM_COMMAND_CLASSIFY.format(
                    mail_end='END,' if build_only else '',
                    config_name=config_name,
                    dir_working=dir_working,
                    label_experiment=label_experiment,
                    response_mapping=response_mapping,
                    build_only='--build_only' if build_only else '',
                    run_all='--run_all' if run_all else '')

                print('Submitting job {}'.format(job_name))
                command = ' '.join([
                    shared_submit_slurm.SLURM_COMMAND, slurm_args_dynamic,
                    slurm_python_wrap
                ])
                # print(command)
                subprocess.call(command, shell=True)
예제 #4
0
    '--label_experiment={label_experiment} --response_mapping={response_mapping} --model_version={model_version}"'

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--config_name', required=True)
    parser.add_argument('--label_experiment', required=True)
    parser.add_argument('--response_mapping', required=True)
    parser.add_argument('--model_version', required=True)
    parser.add_argument('--num_jobs', type=int, required=True)
    args = parser.parse_args()

    # Prep for calling jobs
    shared_submit_slurm.validate_label_experiment(args.label_experiment)
    shared_submit_slurm.validate_response_mapping(args.response_mapping)
    dir_model = paths.get_dir_model_experiment_config(
        config_name=args.config_name,
        label_experiment=args.label_experiment,
        response_mapping=args.response_mapping)
    slurm_python_wrap = SLURM_COMMAND_APPLY.format(
        config_name=args.config_name,
        label_experiment=args.label_experiment,
        response_mapping=args.response_mapping,
        model_version=args.model_version)

    for idx_job in range(args.num_jobs):
        job_name = shared_submit_slurm.get_global_apply_job_name(
            config_name=args.config_name,
            label_experiment=args.label_experiment,
            response_mapping=args.response_mapping)
        slurm_args_dynamic = ' '.join([
            shared_submit_slurm.SLURM_GPUS,
            '--job-name={}'.format(job_name),
def _build_dynamic_config_for_uq_experiments(
        config_name: str, label_experiment: str,
        response_mapping: str) -> configs.Config:
    raise AssertionError(
        'You need to modify the paths for the UQ experiments, like the MP experiment paths'
    )
    response_mapping_classes = {
        'lwr': 10,
        'lwrn': 8,
    }
    assert response_mapping in response_mapping_classes, \
        'response_mapping is {} but must be one of:  {}'.format(response_mapping, response_mapping_classes.keys())

    filepaths_features = list()
    filepaths_responses = list()
    filepaths_boundaries = list()

    # Note that we need to use feature files multiple times in some cases. Feature files will always be associated with
    # reef training data, but may also be associated with land and/or water training data. Thus, feature files may be
    # used 1-3 times in the training data.
    # Note that we need to grab land and water from the "clean" training data directory, and we need to grab general
    # response data from the appropriate folder

    # Get supplemental land/water response data
    filenames = [
        f for f in os.listdir(paths.DIR_DATA_TRAIN_CLEAN)
        if f.endswith('land.tif') or f.endswith('water.tif')
    ]
    for filename in filenames:
        filepath_responses = os.path.join(paths.DIR_DATA_TRAIN_CLEAN, filename)
        filepath_features = re.sub('_\w*.tif', '_features.tif',
                                   filepath_responses)
        filepath_boundaries = re.sub('.tif', '.shp', filepath_responses)
        assert os.path.exists(
            filepath_features), 'Features file not found:  {}'.format(
                filepath_features)
        assert os.path.exists(
            filepath_boundaries), 'Boundaries file not found:  {}'.format(
                filepath_boundaries)
        filepaths_features.append([filepath_features])
        filepaths_responses.append([filepath_responses])
        filepaths_boundaries.append(filepath_boundaries)

    # Get supplemental additional class response data
    filenames = [
        f for f in os.listdir(paths.DIR_DATA_TRAIN_CLEAN)
        if f.endswith('_model_class.tif')
    ]
    for filename in filenames:
        filepath_responses = os.path.join(paths.DIR_DATA_TRAIN_CLEAN, filename)
        filepath_features = re.sub('_\w*.tif', '_features.tif',
                                   filepath_responses)
        filepath_boundaries = re.sub('.tif', 'boundaries.shp',
                                     filepath_responses)
        assert os.path.exists(
            filepath_features), 'Features file not found:  {}'.format(
                filepath_features)
        assert os.path.exists(
            filepath_boundaries), 'Boundaries file not found:  {}'.format(
                filepath_boundaries)
        filepaths_features.append([filepath_features])
        filepaths_responses.append([filepath_responses])
        filepaths_boundaries.append(filepath_boundaries)

    # Get UQ response data
    response_suffix = '_responses_{}b.tif'.format(response_mapping)
    dir_data = paths.get_dir_training_data_experiment(
        label_experiment=label_experiment)
    assert os.path.exists(
        dir_data
    ), 'Training data directory not found for label_experiment {}:  {}'.format(
        label_experiment, dir_data)
    for filename in os.listdir(dir_data):
        if not filename.endswith(response_suffix):
            continue
        filepath_responses = os.path.join(dir_data, filename)
        filepath_features = re.sub(response_suffix, '_features.tif',
                                   filepath_responses)
        filename_boundaries = re.sub(response_suffix, '_boundaries.shp',
                                     filename)
        filepath_boundaries = os.path.join(paths.DIR_DATA_TRAIN_CLEAN,
                                           filename_boundaries)
        assert os.path.exists(
            filepath_features), 'Features file not found:  {}'.format(
                filepath_features)
        assert os.path.exists(
            filepath_boundaries), 'Boundaries file not found:  {}'.format(
                filepath_boundaries)

        filepaths_features.append([filepath_features])
        filepaths_responses.append([filepath_responses])
        filepaths_boundaries.append(filepath_boundaries)

    # Parse config and update dynamic values
    config = configs.create_config_from_file(
        paths.get_filepath_config(config_name))
    config.raw_files.feature_files = filepaths_features
    config.raw_files.response_files = filepaths_responses
    config.raw_files.boundary_files = filepaths_boundaries
    config.data_build.dir_out = paths.get_dir_built_data_experiment(
        label_experiment=label_experiment,
        response_mapping=response_mapping,
        config=config)
    config.model_training.dir_out = paths.get_dir_model_experiment_config(
        config_name=config_name,
        label_experiment=label_experiment,
        response_mapping=response_mapping)
    config.architecture.n_classes = response_mapping_classes[response_mapping]
    return config
def _build_dynamic_config_for_mp_experiments(
        config_name: str, label_experiment: str,
        response_mapping: str) -> configs.Config:
    response_mapping_classes = {'custom': 14}
    assert response_mapping in response_mapping_classes, \
        'response_mapping is {} but must be one of:  {}'.format(response_mapping, response_mapping_classes)

    config_features = list()
    config_responses = list()
    config_boundaries = list()

    # Set source directories
    if '25' in label_experiment:
        dir_features = os.path.join(
            paths.DIR_DATA_TRAIN_FEATURES,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25'))
        dir_responses = os.path.join(
            paths.DIR_DATA_TRAIN_MP,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25'))
        dir_supps = os.path.join(
            paths.DIR_DATA_TRAIN_MP_SUPP,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('25'))
        suffix = '_25'
    elif '50' in label_experiment:
        dir_features = os.path.join(
            paths.DIR_DATA_TRAIN_FEATURES,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50'))
        dir_responses = os.path.join(
            paths.DIR_DATA_TRAIN_MP,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50'))
        dir_supps = os.path.join(
            paths.DIR_DATA_TRAIN_MP_SUPP,
            paths.SUBDIR_DATA_TRAIN_DOWNSAMPLE.format('50'))
        suffix = '_50'
    else:
        dir_features = paths.DIR_DATA_TRAIN_FEATURES_CLEAN
        dir_responses = paths.DIR_DATA_TRAIN_MP_CLEAN
        dir_supps = paths.DIR_DATA_TRAIN_MP_SUPP_CLEAN
        suffix = ''
    dir_boundaries = paths.DIR_DATA_TRAIN_MP_BOUNDS

    # Get feature/response/boundary sets
    missing_features = list()
    missing_boundaries = list()
    filepaths_responses = sorted([
        os.path.join(dir_responses, filename)
        for filename in os.listdir(dir_responses)
        if filename.endswith('responses_custom{}.tif'.format(suffix))
    ])
    for filepath_response in filepaths_responses:
        quad_name = re.search('L15-\d{4}E-\d{4}N', filepath_response).group()
        filepath_feature = os.path.join(
            dir_features, quad_name + '_features{}.tif'.format(suffix))
        filepath_boundary = os.path.join(dir_boundaries,
                                         quad_name + '_boundaries.shp')
        if not os.path.exists(filepath_feature):
            missing_features.append(filepath_feature)
        if not os.path.exists(filepath_boundary):
            missing_boundaries.append(filepath_boundary)

        config_features.append([filepath_feature])
        config_responses.append([filepath_response])
        config_boundaries.append(filepath_boundary)

    # Get supplemental sets
    filepaths_responses = sorted([
        os.path.join(dir_supps, filename) for filename in os.listdir(dir_supps)
    ])
    for filepath_response in filepaths_responses:
        quad_name = re.search('L15-\d{4}E-\d{4}N', filepath_response).group()
        filepath_feature = os.path.join(
            dir_features, quad_name + '_features{}.tif'.format(suffix))
        if not os.path.exists(filepath_feature):
            missing_features.append(filepath_feature)

        config_features.append([filepath_feature])
        config_responses.append([filepath_response])
        config_boundaries.append(None)

    assert not missing_features and not missing_boundaries, \
        'Missing feature and boundary files:  \n\n{} \n\n{}'.format(missing_features, missing_boundaries)

    # Parse config and update dynamic values
    config = configs.create_config_from_file(
        paths.get_filepath_config(config_name))
    config.raw_files.feature_files = config_features
    config.raw_files.response_files = config_responses
    config.raw_files.boundary_files = config_boundaries
    config.data_build.dir_out = paths.get_dir_built_data_experiment(
        label_experiment=label_experiment,
        response_mapping=response_mapping,
        config=config)
    config.model_training.dir_out = paths.get_dir_model_experiment_config(
        config_name=config_name,
        label_experiment=label_experiment,
        response_mapping=response_mapping)
    config.architecture.n_classes = response_mapping_classes[response_mapping]
    return config