def query_to_hashes(subset=None, base_dir="Output") : db_cfg = utils.get_db_cfg(path=os.path.join(base_dir, "db.json")) q = Query() if subset is None : q_result = db_cfg.all() else : q_result = db_cfg.search(utils.dict_to_query(subset)) return q_result
def hash_to_cfg(hash_, cfgs_dir="./Output/cfgs") : db_cfg = utils.get_db_cfg() q = Query() q_result = db_cfg.search(q.hash == hash_) if len(q_result) == 0 : cfgs = [str(file) for file in Path(cfgs_dir).rglob(f"*{hash_}.yaml")] if len(cfgs) == 1 : cfg = utils.load_yaml(cfgs[0]) cfg.hash = utils.cfg_to_hash(cfg) return cfg else : return None assert len(q_result) > 0 cfg = [utils.DotDict(cfg) for cfg in q_result if cfg["network"]["ID"] == 0][0] return cfg
def get_MCMC_data(variable="all", variable_subset=None, sort_by_variable=True, N_max=None, extra_selections=None): db_cfg = utils.get_db_cfg() if variable == "all": return [utils.DotDict(cfg) for cfg in db_cfg] if N_max is None: N_max = len(db_cfg) if extra_selections is None: extra_selections = {} used_hashes = set() cfgs_to_plot = [] for query_cfg in db_cfg: # break query_cfg.pop(variable, None) hash_ = query_cfg.pop("hash", None) if hash_ in used_hashes: continue if not passes_extra_selections_cut(query_cfg, extra_selections): continue cfgs = utils.query_cfg(query_cfg) if len(cfgs) != 1: # break accepted_cfgs, used_hashes = get_accepted_cfgs( cfgs, used_hashes, variable, variable_subset, sort_by_variable, ) cfgs_to_plot.append(accepted_cfgs) if len(cfgs_to_plot) >= N_max: return cfgs_to_plot return cfgs_to_plot
def __init__(self, base_dir="Output/ABM", filetype="hdf5", subset=None, verbose=False) : self.base_dir = utils.path(base_dir) self.filetype = filetype self.subset = subset self.verbose = verbose if verbose : print("Loading ABM_simulations \n", flush=True) if self.subset is None: self.all_filenames = get_all_ABM_filenames(base_dir, filetype) self.all_folders = get_all_ABM_folders(self.all_filenames) self.cfgs = get_cfgs(self.all_folders) else : # Steps: # Connect to data base # Get the hashes for the relevent subset # Only load these db = utils.get_db_cfg() q = Query() query = q.version == 2.1 for key, val in subset.items() : query = query & (q[key] == val) cfgs = db.search(query) self.all_filenames = [] for hash_ in [cfg["hash"] for cfg in cfgs] : self.all_filenames.extend(utils.hash_to_filenames(hash_)) self.all_folders = get_all_ABM_folders(self.all_filenames) self.cfgs = get_cfgs(self.all_folders) self.d = self._convert_all_files_to_dict(filetype)
def run_simulations( simulation_parameters, N_runs=2, num_cores_max=None, N_tot_max=False, verbose=False, force_rerun=False, dry_run=False, **kwargs) : if isinstance(simulation_parameters, dict) : simulation_parameters = utils.format_simulation_paramters(simulation_parameters) cfgs_all = utils.generate_cfgs(simulation_parameters, N_runs, N_tot_max, verbose=verbose) N_tot_max = utils.d_num_cores_N_tot[utils.extract_N_tot_max(simulation_parameters)] elif isinstance(simulation_parameters[0], utils.DotDict) : cfgs_all = simulation_parameters N_tot_max = np.max([cfg.network.N_tot for cfg in cfgs_all]) else : raise ValueError(f"simulation_parameters not of the correct type") if len(cfgs_all) == 0 : N_files = 0 return N_files db_cfg = utils.get_db_cfg() q = Query() db_counts = np.array([db_cfg.count((q.hash == cfg.hash) & (q.network.ID == cfg.network.ID)) for cfg in cfgs_all]) assert np.max(db_counts) <= 1 # keep only cfgs that are not in the database already if force_rerun : cfgs = cfgs_all else : cfgs = [cfg for (cfg, count) in zip(cfgs_all, db_counts) if count == 0] N_files = len(cfgs) num_cores = utils.get_num_cores_N_tot(N_tot_max, num_cores_max) if isinstance(simulation_parameters, dict) : s_simulation_parameters = str(simulation_parameters) elif isinstance(simulation_parameters, list) : s_simulation_parameters = f"{len(simulation_parameters)} runs" else : raise AssertionError("simulation_parameters neither list nor dict") print( f"\n\n" f"Generating {N_files :3d} network-based simulations", f"with {num_cores} cores", f"based on {s_simulation_parameters}.", "Please wait. \n", flush=True) if dry_run or N_files == 0 : return N_files # kwargs = {} if num_cores == 1 : for cfg in tqdm(cfgs) : cfg_out = run_single_simulation(cfg, save_initial_network=True, verbose=verbose, **kwargs) update_database(db_cfg, q, cfg_out) else : # First generate the networks f_single_network = partial(run_single_simulation, only_initialize_network=True, save_initial_network=True, verbose=verbose, **kwargs) # Get the network hashes network_hashes = set([utils.cfg_to_hash(cfg.network, exclude_ID=False) for cfg in cfgs]) # Get list of unique cfgs cfgs_network = [] for cfg in cfgs : network_hash = utils.cfg_to_hash(cfg.network, exclude_ID=False) if network_hash in network_hashes : cfgs_network.append(cfg) network_hashes.remove(network_hash) # Generate the networks print("Generating networks. Please wait") p_umap(f_single_network, cfgs_network, num_cpus=num_cores) # Then run the simulations on the network print("Running simulations. Please wait") f_single_simulation = partial(run_single_simulation, verbose=verbose, **kwargs) for cfg in p_uimap(f_single_simulation, cfgs, num_cpus=num_cores) : update_database(db_cfg, q, cfg) return N_files
raise ValueError( "Too many sigma, not yet implemented") # TODO: Implement hashes = [utils.cfg_to_hash(cfg, exclude_ID=False) for cfg in cfgs_all] _, ind = np.unique(hashes, return_index=True) for i in reversed(range(np.max(ind))): if i not in ind: cfgs_all.pop(i) N_tot_max = np.max([cfg.network.N_tot for cfg in cfgs_all]) if __name__ == "__main__": with Timer() as t: db_cfg = utils.get_db_cfg() q = Query() db_counts = np.array([ db_cfg.count((q.hash == cfg.hash) & (q.network.ID == cfg.network.ID)) for cfg in cfgs_all ]) assert np.max(db_counts) <= 1 cfgs = [cfg for (cfg, count) in zip(cfgs_all, db_counts) if count == 0] N_files = len(cfgs) num_cores = utils.get_num_cores_N_tot(N_tot_max, num_cores_max)