Ejemplo n.º 1
0
def makeGrid(batchPath,
             settingName=None,
             settings=None,
             read_only=False,
             interactive=False,
             install_reqs_at=None,
             install_reqs_force=None):
    print("Generating grid...")
    batchPath = os.path.abspath(batchPath) + os.sep
    if not settings:
        if not settingName:
            raise NotImplementedError(
                "Re-using previous batch is work in progress...")
        #            if not pathIsGrid(batchPath):
        #                raise Exception('Need to give name of setting file if batchPath/config '
        #                                'does not exist')
        #            read_only = True
        #            sys.path.insert(0, batchPath + 'config')
        #            settings = __import__(IniFile(batchPath + 'config/config.ini').params['setting_file'].replace('.py', ''))
        elif os.path.splitext(settingName)[-1].lower() in _yaml_extensions:
            settings = yaml_load_file(settingName)
        else:
            raise NotImplementedError(
                "Using a python script is work in progress...")
            # In this case, info-as-dict would be passed
            # settings = __import__(settingName, fromlist=['dummy'])
    batch = batchjob.BatchJob(batchPath)
    # batch.skip = settings.get("skip", False)
    batch.makeItems(settings, messages=not read_only)
    if read_only:
        for jobItem in [b for b in batch.jobItems]:
            if not jobItem.chainExists():
                batch.jobItems.remove(jobItem)
        batch.save()
        print('OK, configured grid with %u existing chains' %
              (len(batch.jobItems)))
        return batch
    else:
        batch.makeDirectories(setting_file=None)
        batch.save()
    infos = {}
    components_used = {}
    # Default info
    defaults = copy.deepcopy(settings)
    grid_definition = defaults.pop("grid")
    models_definitions = grid_definition["models"]
    datasets_definitions = grid_definition["datasets"]
    for jobItem in batch.items(wantSubItems=False):
        # Model info
        jobItem.makeChainPath()
        try:
            model_info = copy.deepcopy(models_definitions[jobItem.param_set]
                                       or {})
        except KeyError:
            raise ValueError("Model '%s' must be defined." % jobItem.param_set)
        model_info = merge_info(defaults, model_info)
        # Dataset info
        try:
            dataset_info = copy.deepcopy(
                datasets_definitions[jobItem.data_set.tag])
        except KeyError:
            raise ValueError("Data set '%s' must be defined." %
                             jobItem.data_set.tag)
        # Combined info
        combined_info = merge_info(defaults, model_info, dataset_info)
        if "preset" in combined_info:
            preset = combined_info.pop("preset")
            combined_info = merge_info(create_input(**preset), combined_info)
        combined_info[_output_prefix] = jobItem.chainRoot
        # Requisites
        components_used = get_used_components(components_used, combined_info)
        if install_reqs_at:
            combined_info[_packages_path] = os.path.abspath(install_reqs_at)
        # Save the info (we will write it after installation:
        # we need to install to add auto covmats
        if jobItem.param_set not in infos:
            infos[jobItem.param_set] = {}
        infos[jobItem.param_set][jobItem.data_set.tag] = combined_info
    # Installing requisites
    if install_reqs_at:
        print("Installing required code and data for the grid.")
        from cobaya.log import logger_setup
        logger_setup()
        install_reqs(components_used,
                     path=install_reqs_at,
                     force=install_reqs_force)
    print("Adding covmats (if necessary) and writing input files")
    for jobItem in batch.items(wantSubItems=False):
        info = infos[jobItem.param_set][jobItem.data_set.tag]
        # Covariance matrices
        # We try to find them now, instead of at run time, to check if correctly selected
        try:
            sampler = list(info[kinds.sampler])[0]
        except KeyError:
            raise ValueError("No sampler has been chosen")
        if sampler == "mcmc" and info[kinds.sampler][sampler].get(
                "covmat", "auto"):
            packages_path = install_reqs_at or info.get(_packages_path, None)
            if not packages_path:
                raise ValueError(
                    "Cannot assign automatic covariance matrices because no "
                    "external packages path has been defined.")
            # Need updated info for covmats: includes renames
            updated_info = update_info(info)
            # Ideally, we use slow+sampled parameters to look for the covariance matrix
            # but since for that we'd need to initialise a model, we approximate that set
            # as theory+sampled
            from itertools import chain
            like_params = set(
                chain(*[
                    list(like[_params])
                    for like in updated_info[kinds.likelihood].values()
                ]))
            params_info = {
                p: v
                for p, v in updated_info[_params].items()
                if is_sampled_param(v) and p not in like_params
            }
            best_covmat = _get_best_covmat(os.path.abspath(packages_path),
                                           params_info,
                                           updated_info[kinds.likelihood])
            info[kinds.sampler][sampler]["covmat"] = os.path.join(
                best_covmat["folder"], best_covmat["name"])
        # Write the info for this job
        # Allow overwrite since often will want to regenerate grid with tweaks
        yaml_dump_file(jobItem.iniFile(),
                       sort_cosmetic(info),
                       error_if_exists=False)

        # Non-translated old code
        # if not start_at_bestfit:
        #     setMinimize(jobItem, ini)
        #     variant = '_minimize'
        #     ini.saveFile(jobItem.iniFile(variant))
        ## NOT IMPLEMENTED: start at best fit
        ##        ini.params['start_at_bestfit'] = start_at_bestfit
        # ---
        # for deffile in settings.defaults:
        #    ini.defaults.append(batch.commonPath + deffile)
        # if hasattr(settings, 'override_defaults'):
        #    ini.defaults = [batch.commonPath + deffile for deffile in settings.override_defaults] + ini.defaults
        # ---
        # # add ini files for importance sampling runs
        # for imp in jobItem.importanceJobs():
        #     if getattr(imp, 'importanceFilter', None): continue
        #     if batch.hasName(imp.name.replace('_post', '')):
        #         raise Exception('importance sampling something you already have?')
        #     for minimize in (False, True):
        #         if minimize and not getattr(imp, 'want_minimize', True): continue
        #         ini = IniFile()
        #         updateIniParams(ini, imp.importanceSettings, batch.commonPath)
        #         if cosmomcAction == 0 and not minimize:
        #             for deffile in settings.importanceDefaults:
        #                 ini.defaults.append(batch.commonPath + deffile)
        #             ini.params['redo_outroot'] = imp.chainRoot
        #             ini.params['action'] = 1
        #         else:
        #             ini.params['file_root'] = imp.chainRoot
        #         if minimize:
        #             setMinimize(jobItem, ini)
        #             variant = '_minimize'
        #         else:
        #             variant = ''
        #         ini.defaults.append(jobItem.iniFile())
        #         ini.saveFile(imp.iniFile(variant))
        #         if cosmomcAction != 0: break

    if not interactive:
        return batch
    print('Done... to run do: cobaya-grid-run %s' % batchPath)
Ejemplo n.º 2
0
def makeGrid(batchPath,
             settingName=None,
             settings=None,
             read_only=False,
             interactive=False,
             install_reqs_at=None,
             install_reqs_force=None):
    batchPath = os.path.abspath(batchPath) + os.sep
    #    # 0: chains, 1: importance sampling, 2: best-fit, 3: best-fit and Hessian
    #    cosmomcAction = 0
    if not settings:
        if not settingName:
            raise NotImplementedError(
                "Re-using previous batch is work in progress...")
#            if not pathIsGrid(batchPath):
#                raise Exception('Need to give name of setting file if batchPath/config '
#                                'does not exist')
#            read_only = True
#            sys.path.insert(0, batchPath + 'config')
#            sys.modules['batchJob'] = batchjob  # old name
#            settings = __import__(IniFile(batchPath + 'config/config.ini').params['setting_file'].replace('.py', ''))
        elif os.path.splitext(settingName)[-1].lower() in (".yml", ".yaml"):
            settings = yaml_load_file(settingName)
        else:
            # ACTUALLY, in the scripted case a DICT or a YAML FILE NAME should be passed
            raise NotImplementedError(
                "Using a python script is work in progress...")
#            settings = __import__(settingName, fromlist=['dummy'])
    from cobaya.grid_tools import batchjob
    batch = batchjob.batchJob(batchPath, settings.get("yaml_dir", None))
    ###    batch.skip = settings.get("skip", False)
    if "skip" in settings:
        raise NotImplementedError("Skipping not implemented yet.")
    batch.makeItems(settings, messages=not read_only)
    if read_only:
        for jobItem in [b for b in batch.jobItems]:
            if not jobItem.chainExists():
                batch.jobItems.remove(jobItem)
        batch.save()
        print('OK, configured grid with %u existing chains' %
              (len(batch.jobItems)))
        return batch
    else:
        # WAS        batch.makeDirectories(settings.__file__)
        # WHY THE DIR OF settings AND NOT THE GRID DIR GIVEN???
        batch.makeDirectories(setting_file=None)
        batch.save()

# NOT IMPLEMENTED YET: start at best fit!!!
#    start_at_bestfit = getattr(settings, 'start_at_bestfit', False)

    defaults = copy.deepcopy(settings)
    modules_used = {}
    grid_definition = defaults.pop("grid")
    models_definitions = grid_definition["models"]
    datasets_definitions = grid_definition["datasets"]
    for jobItem in batch.items(wantSubItems=False):
        jobItem.makeChainPath()
        base_info = copy.deepcopy(defaults)
        try:
            model_info = models_definitions[jobItem.param_set] or {}
        except KeyError:
            raise ValueError("Model '%s' must be defined." % jobItem.param_set)

        # COVMATS NOT IMPLEMENTED YET!!!
        # cov_dir_name = getattr(settings, 'cov_dir', 'planck_covmats')
        # covdir = os.path.join(batch.basePath, cov_dir_name)
        # covmat = os.path.join(covdir, jobItem.name + '.covmat')
        # if not os.path.exists(covmat):
        #     covNameMappings = getattr(settings, 'covNameMappings', None)
        #     mapped_name_norm = jobItem.makeNormedName(covNameMappings)[0]
        #     covmat_normed = os.path.join(covdir, mapped_name_norm + '.covmat')
        #     covmat = covmat_normed
        #     if not os.path.exists(covmat) and hasattr(jobItem.data_set,
        #                                               'covmat'): covmat = batch.basePath + jobItem.data_set.covmat
        #     if not os.path.exists(covmat) and hasattr(settings, 'covmat'): covmat = batch.basePath + settings.covmat
        # else:
        #     covNameMappings = None
        # if os.path.exists(covmat):
        #     ini.params['propose_matrix'] = covmat
        #     if getattr(settings, 'newCovmats', True): ini.params['MPI_Max_R_ProposeUpdate'] = 20
        # else:
        #     hasCov = False
        #     ini.params['MPI_Max_R_ProposeUpdate'] = 20
        #     covmat_try = []
        #     if 'covRenamer' in dir(settings):
        #         covmat_try += settings.covRenamer(jobItem.name)
        #         covmat_try += settings.covRenamer(mapped_name_norm)
        #     if hasattr(settings, 'covrenames'):
        #         for aname in [jobItem.name, mapped_name_norm]:
        #             covmat_try += [aname.replace(old, new, 1) for old, new in settings.covrenames if old in aname]
        #             for new1, old1 in settings.covrenames:
        #                 if old1 in aname:
        #                     name = aname.replace(old1, new1, 1)
        #                     covmat_try += [name.replace(old, new, 1) for old, new in settings.covrenames if old in name]
        #     if 'covWithoutNameOrder' in dir(settings):
        #         if covNameMappings:
        #             removes = copy.deepcopy(covNameMappings)
        #         else:
        #             removes = dict()
        #         for name in settings.covWithoutNameOrder:
        #             if name in jobItem.data_set.names:
        #                 removes[name] = ''
        #                 covmat_try += [jobItem.makeNormedName(removes)[0]]
        #     covdir2 = os.path.join(batch.basePath, getattr(settings, 'cov_dir_fallback', cov_dir_name))
        #     for name in covmat_try:
        #         covmat = os.path.join(batch.basePath, covdir2, name + '.covmat')
        #         if os.path.exists(covmat):
        #             ini.params['propose_matrix'] = covmat
        #             print('covmat ' + jobItem.name + ' -> ' + name)
        #             hasCov = True
        #             break
        #     if not hasCov: print('WARNING: no matching specific covmat for ' + jobItem.name)

        ## NOT IMPLEMENTED: start at best fit
        ##        ini.params['start_at_bestfit'] = start_at_bestfit

        try:
            dataset_info = datasets_definitions[jobItem.data_set.tag]
        except KeyError:
            raise ValueError("Data set '%s' must be defined." %
                             jobItem.data_set.tag)
        combined_info = merge_info(base_info, model_info, dataset_info)
        combined_info[_output_prefix] = jobItem.chainRoot

        # ???
        #        for deffile in settings.defaults:
        #            ini.defaults.append(batch.commonPath + deffile)
        #        if hasattr(settings, 'override_defaults'):
        #            ini.defaults = [batch.commonPath + deffile for deffile in settings.override_defaults] + ini.defaults

        # requisites
        modules_used = get_modules(modules_used, combined_info)
        if install_reqs_at:
            combined_info[_path_install] = os.path.abspath(install_reqs_at)
        # Write the info for this job
        yaml_dump_file(combined_info, jobItem.iniFile())

        # if not start_at_bestfit:
        #     setMinimize(jobItem, ini)
        #     variant = '_minimize'
        #     ini.saveFile(jobItem.iniFile(variant))

        # # add ini files for importance sampling runs
        # for imp in jobItem.importanceJobs():
        #     if getattr(imp, 'importanceFilter', None): continue
        #     if batch.hasName(imp.name.replace('_post', '')):
        #         raise Exception('importance sampling something you already have?')
        #     for minimize in (False, True):
        #         if minimize and not getattr(imp, 'want_minimize', True): continue
        #         ini = IniFile()
        #         updateIniParams(ini, imp.importanceSettings, batch.commonPath)
        #         if cosmomcAction == 0 and not minimize:
        #             for deffile in settings.importanceDefaults:
        #                 ini.defaults.append(batch.commonPath + deffile)
        #             ini.params['redo_outroot'] = imp.chainRoot
        #             ini.params['action'] = 1
        #         else:
        #             ini.params['file_root'] = imp.chainRoot
        #         if minimize:
        #             setMinimize(jobItem, ini)
        #             variant = '_minimize'
        #         else:
        #             variant = ''
        #         ini.defaults.append(jobItem.iniFile())
        #         ini.saveFile(imp.iniFile(variant))
        #         if cosmomcAction != 0: break

    # Installing requisites
    print("Installing required code and data for the grid.")
    if install_reqs_at:
        install_reqs(modules_used,
                     path=install_reqs_at,
                     force=install_reqs_force)
    if not interactive:
        return batch
    print('Done... to run do: cobaya-grid-run %s' % batchPath)