Esempio n. 1
0
 def test_fails_not_dict(self):
     d1 = {'a': 3}
     d2 = ['not_dict']
     with self.assertRaises(ValueError):
         merged = utils.merge_dicts(d1, d2)
     with self.assertRaises(ValueError):
         merged = utils.merge_dicts(d2, d1)
Esempio n. 2
0
 def test_empty(self):
     d1 = {'a': 3}
     d2 = {}
     merged1 = utils.merge_dicts(d1, d2)
     merged2 = utils.merge_dicts(d2, d1)
     expected = {'a': 3}
     self.assertEqual(merged1, expected)
     self.assertEqual(merged2, expected)
Esempio n. 3
0
    def test_nested_non_dict_override(self):
        d1 = {'a': 3, 'b': {'c': {'d': 4}, 'e': 11}}
        d2 = {'b': {'c': ['not_dict']}}

        merged1 = utils.merge_dicts(d1, d2)
        expected1 = {'a': 3, 'b': {'c': ['not_dict'], 'e': 11}}
        merged2 = utils.merge_dicts(d2, d1)
        expected2 = {'a': 3, 'b': {'c': {'d': 4}, 'e': 11}}

        self.assertEqual(merged1, expected1)
        self.assertEqual(merged2, expected2)
Esempio n. 4
0
            'project_root_dir'
        ],
        "VALID_SLURM_CONFIG_VALUES": [
            'experiments_per_job', 'max_simultaneous_jobs',
            'sbatch_options_template', 'sbatch_options'
        ],
        "LOGIN_NODE_NAMES": ["fs"],
        "OBSERVERS": {
            "NEPTUNE": {
                "AUTH_TOKEN": "YOUR_AUTH_TOKEN",
            },
            "SLACK": {
                "WEBHOOK": "YOUR_WEBHOOK",
            },
            "MATTERMOST": {
                "WEBHOOK": "YOUR_WEBHOOK",
                "DEFAULT_CHANNEL": "YOUR_DEFAULT_CHANNEL",
            }
        },
    }, )

# Load user settings
if SETTINGS.USER_SETTINGS_PATH.exists():
    user_settings_source = imp.load_source('SETTINGS',
                                           str(SETTINGS.USER_SETTINGS_PATH))
    SETTINGS = munchify(merge_dicts(SETTINGS, user_settings_source.SETTINGS))

SETTINGS.SLURM_STATES.ACTIVE = (SETTINGS.SLURM_STATES.PENDING +
                                SETTINGS.SLURM_STATES.RUNNING +
                                SETTINGS.SLURM_STATES.PAUSED)
Esempio n. 5
0
def generate_configs(experiment_config):
    """Generate parameter configurations based on an input configuration.

    Input is a nested configuration where on each level there can be 'fixed', 'grid', and 'random' parameters.

    In essence, we take the cartesian product of all the `grid` parameters and take random samples for the random
    parameters. The nested structure makes it possible to define different parameter spaces e.g. for different datasets.
    Parameter definitions lower in the hierarchy overwrite parameters defined closer to the root.

    For each leaf configuration we take the maximum of all num_samples values on the path since we need to have the same
    number of samples for each random parameter.

    For each configuration of the `grid` parameters we then create `num_samples` configurations of the random
    parameters, i.e. leading to `num_samples * len(grid_configurations)` configurations.

    See Also `examples/example_config.yaml` and the example below.

    Parameters
    ----------
    experiment_config: dict
        Dictionary that specifies the "search space" of parameters that will be enumerated. Should be
        parsed from a YAML file.

    Returns
    -------
    all_configs: list of dicts
        Contains the individual combinations of the parameters.


    """

    reserved, next_level = unpack_config(experiment_config)
    reserved = standardize_config(reserved)
    level_stack = [('', next_level)]
    config_levels = [reserved]
    final_configs = []

    detect_duplicate_parameters(invert_config(reserved), None)

    while len(level_stack) > 0:
        current_sub_name, sub_vals = level_stack.pop(0)
        sub_config, sub_levels = unpack_config(sub_vals)
        sub_config = standardize_config(sub_config)
        config_above = config_levels.pop(0)

        inverted_sub_config = invert_config(sub_config)
        detect_duplicate_parameters(inverted_sub_config, current_sub_name)

        inverted_config_above = invert_config(config_above)
        redefined_parameters = set(inverted_sub_config.keys()).intersection(
            set(inverted_config_above.keys()))

        if len(redefined_parameters) > 0:
            logging.warning(
                f"Found redefined parameters in {current_sub_name}: {redefined_parameters}. "
                f"Redefinitions of parameters override earlier ones.")
            config_above = copy.deepcopy(config_above)
            for p in redefined_parameters:
                sections = inverted_config_above[p]
                for s in sections:
                    del config_above[s][p]

        config = merge_dicts(config_above, sub_config)

        if len(sub_levels) == 0:
            final_configs.append((current_sub_name, config))

        for sub_name, sub_vals in sub_levels.items():
            new_sub_name = f'{current_sub_name}.{sub_name}' if current_sub_name != '' else sub_name
            level_stack.append((new_sub_name, sub_vals))
            config_levels.append(config)

    all_configs = []
    for subconfig_name, conf in final_configs:
        conf = standardize_config(conf)
        random_params = conf['random'] if 'random' in conf else {}
        fixed_params = flatten(conf['fixed']) if 'fixed' in conf else {}
        grid_params = conf['grid'] if 'grid' in conf else {}

        if len(random_params) > 0:
            num_samples = random_params['samples']
            root_seed = random_params.get('seed', None)
            random_sampled = sample_random_configs(flatten(random_params),
                                                   seed=root_seed,
                                                   samples=num_samples)

        grids = [
            generate_grid(v, parent_key=k) for k, v in grid_params.items()
        ]
        grid_configs = dict([sub for item in grids for sub in item])
        grid_product = list(cartesian_product_dict(grid_configs))

        with_fixed = [{**d, **fixed_params} for d in grid_product]
        if len(random_params) > 0:
            with_random = [{
                **grid,
                **random
            } for grid in with_fixed for random in random_sampled]
        else:
            with_random = with_fixed
        all_configs.extend(with_random)

    # Cast NumPy integers to normal integers since PyMongo doesn't like them
    all_configs = [{
        k: int(v) if isinstance(v, np.integer) else v
        for k, v in config.items()
    } for config in all_configs]

    all_configs = [unflatten(conf) for conf in all_configs]
    return all_configs
Esempio n. 6
0
 def test_basic(self):
     d1 = {'a': 3, 'b': 5}
     d2 = {'b': 99, 'c': 7}
     merged = utils.merge_dicts(d1, d2)
     expected = {'a': 3, 'b': 99, 'c': 7}
     self.assertEqual(merged, expected)
Esempio n. 7
0
 def test_nested(self):
     d1 = {'a': 3, 'b': {'c': 10, 'd': 9}}
     d2 = {'e': 7, 'b': {'c': 99, 'f': 11}}
     merged = utils.merge_dicts(d1, d2)
     expected = {'a': 3, 'b': {'c': 99, 'd': 9, 'f': 11}, 'e': 7}
     self.assertEqual(merged, expected)
Esempio n. 8
0
File: add.py Progetto: cqql/seml
def add_experiments(db_collection_name,
                    config_file,
                    force_duplicates,
                    no_hash=False,
                    no_sanity_check=False,
                    no_code_checkpoint=False):
    """
    Add configurations from a config file into the database.

    Parameters
    ----------
    db_collection_name: the MongoDB collection name.
    config_file: path to the YAML configuration.
    force_duplicates: if True, disable duplicate detection.
    no_hash: if True, disable hashing of the configurations for duplicate detection. This is much slower, so use only
        if you have a good reason to.
    no_sanity_check: if True, do not check the config for missing/unused arguments.
    no_code_checkpoint: if True, do not upload the experiment source code files to the MongoDB.

    Returns
    -------
    None
    """

    seml_config, slurm_config, experiment_config = read_config(config_file)

    # Use current Anaconda environment if not specified
    if 'conda_environment' not in seml_config:
        if 'CONDA_DEFAULT_ENV' in os.environ:
            seml_config['conda_environment'] = os.environ['CONDA_DEFAULT_ENV']
        else:
            seml_config['conda_environment'] = None

    # Set Slurm config with default parameters as fall-back option
    slurm_config = merge_dicts(SETTINGS.SLURM_DEFAULT, slurm_config)

    # Check for and use sbatch options template
    sbatch_options_template = slurm_config.get('sbatch_options_template', None)
    if sbatch_options_template is not None:
        if sbatch_options_template not in SETTINGS.SBATCH_OPTIONS_TEMPLATES:
            raise ConfigError(
                f"sbatch options template '{sbatch_options_template}' not found in settings.py."
            )
        for k, v in SETTINGS.SBATCH_OPTIONS_TEMPLATES[
                sbatch_options_template].items():
            if k not in slurm_config['sbatch_options']:
                slurm_config['sbatch_options'][k] = v

    slurm_config['sbatch_options'] = remove_prepended_dashes(
        slurm_config['sbatch_options'])
    configs = generate_configs(experiment_config)
    collection = get_collection(db_collection_name)

    batch_id = get_max_in_collection(collection, "batch_id")
    if batch_id is None:
        batch_id = 1
    else:
        batch_id = batch_id + 1

    if seml_config['use_uploaded_sources'] and not no_code_checkpoint:
        uploaded_files = upload_sources(seml_config, collection, batch_id)
    else:
        uploaded_files = None
    del seml_config['use_uploaded_sources']

    if not no_sanity_check:
        check_config(seml_config['executable'],
                     seml_config['conda_environment'], configs)

    path, commit, dirty = get_git_info(seml_config['executable'])
    git_info = None
    if path is not None:
        git_info = {'path': path, 'commit': commit, 'dirty': dirty}

    use_hash = not no_hash
    if use_hash:
        configs = [{**c, **{'config_hash': make_hash(c)}} for c in configs]

    if not force_duplicates:
        len_before = len(configs)

        # First, check for duplicates withing the experiment configurations from the file.
        if not use_hash:
            # slow duplicate detection without hashes
            unique_configs = []
            for c in configs:
                if c not in unique_configs:
                    unique_configs.append(c)
            configs = unique_configs
        else:
            # fast duplicate detection using hashing.
            configs_dict = {c['config_hash']: c for c in configs}
            configs = [v for k, v in configs_dict.items()]

        len_after_deduplication = len(configs)
        # Now, check for duplicate configurations in the database.
        configs = filter_experiments(collection, configs)
        len_after = len(configs)
        if len_after_deduplication != len_before:
            logging.info(
                f"{len_before - len_after_deduplication} of {len_before} experiment{s_if(len_before)} were "
                f"duplicates. Adding only the {len_after_deduplication} unique configurations."
            )
        if len_after != len_after_deduplication:
            logging.info(
                f"{len_after_deduplication - len_after} of {len_after_deduplication} "
                f"experiment{s_if(len_before)} were already found in the database. They were not added again."
            )

    # Create an index on the config hash. If the index is already present, this simply does nothing.
    collection.create_index("config_hash")
    # Add the configurations to the database with STAGED status.
    if len(configs) > 0:
        add_configs(collection, seml_config, slurm_config, configs,
                    uploaded_files, git_info)