def sample_simulate_pool(main_db: Union[str, Path], index_file_pattern: str, fit_type: str, n_sim: int, n_pool: int): """ Fit the samples in a database in parallel by making copies of the database, fitting them separately, and then combining them back together in the sample table of main_db. Parameters ---------- main_db Path to the main database that will be spawned. index_file_pattern File pattern for the new databases that will have index equal to the simulation number. fit_type The type of fit to run, one of "fixed" or "both". n_sim Number of simulations that will be fit. n_pool Number of pools for the multiprocessing. """ if fit_type not in ["fixed", "both"]: raise SampleError(f"Unrecognized fit type {fit_type}.") fit_sample = FitSample(main_db=main_db, index_file_pattern=index_file_pattern, fit_type=fit_type) fits = dmdismod_in_parallel(dm_thread=fit_sample, sims=list(range(n_sim)), n_pool=n_pool) # Reconstruct the sample table with all n_sim fits samp = pd.DataFrame().append(fits).reset_index(drop=True) d = DismodIO(path=main_db) d.sample = samp[['sample_index', 'var_id', 'var_value']]
def main(): """ Takes dismod databases that have already had a fit run on them and simulates new datasets, refitting on all of them, then combining the results back into one database. Returns: """ args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) context = Context(model_version_id=args.model_version_id) main_db = context.db_file(location_id=args.parent_location_id, sex_id=args.sex_id) d = DismodIO(path=main_db) if d.fit_var.empty: raise RuntimeError( "Cannot run sample / simulate on a database without fit_var!") # Create n_sim simulation datasets based on the fitted parameters run_dismod_commands(dm_file=main_db, commands=[ 'set start_var fit_var' 'set truth_var fit_var', 'set scale_var fit_var', f'simulate {args.n_sim}' ]) if args.n_pool > 1: # Make a pool and fit to each of the simulations (uses the __call__ method) fit_sample = FitSample(context=context, location_id=args.location_id, sex_id=args.sex_id, fit_type=args.fit_type) p = Pool(args.n_pool) fits = list(p.map(fit_sample, range(args.n_sim))) p.close() # Reconstruct the sample table with all n_sim fits sample = pd.DataFrame().append(fits).reset_index(drop=True) sample.rename(columns={ 'fit_var_id': 'var_id', 'fit_var_value': 'var_value' }, inplace=True) d.sample = sample else: # If we only have one pool that means we aren't going to run in parallel run_dismod_commands(dm_file=main_db, commands=[f'sample simulate {args.n_sim}'])
def _process(self, db: str): dbio = DismodIO(path=db) n_var = len(dbio.var) this_sample = dbio.sample.loc[dbio.sample.sample_index == self.index].copy() this_sample['sample_index'] = 0 this_sample['sample_id'] = this_sample['var_id'] dbio.sample = this_sample del dbio run_dismod_commands(dm_file=db, commands=[f'predict sample']) dbio = DismodIO(path=db) predict = dbio.predict predict['sample_index'] = self.index return predict