def sample_simulate_pool(main_db: Union[str, Path], index_file_pattern: str, fit_type: str, n_sim: int, n_pool: int): """ Fit the samples in a database in parallel by making copies of the database, fitting them separately, and then combining them back together in the sample table of main_db. Parameters ---------- main_db Path to the main database that will be spawned. index_file_pattern File pattern for the new databases that will have index equal to the simulation number. fit_type The type of fit to run, one of "fixed" or "both". n_sim Number of simulations that will be fit. n_pool Number of pools for the multiprocessing. """ if fit_type not in ["fixed", "both"]: raise SampleError(f"Unrecognized fit type {fit_type}.") fit_sample = FitSample(main_db=main_db, index_file_pattern=index_file_pattern, fit_type=fit_type) fits = dmdismod_in_parallel(dm_thread=fit_sample, sims=list(range(n_sim)), n_pool=n_pool) # Reconstruct the sample table with all n_sim fits samp = pd.DataFrame().append(fits).reset_index(drop=True) d = DismodIO(path=main_db) d.sample = samp[['sample_index', 'var_id', 'var_value']]
def main(): """ Takes dismod databases that have already had a fit run on them and simulates new datasets, refitting on all of them, then combining the results back into one database. Returns: """ args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) context = Context(model_version_id=args.model_version_id) main_db = context.db_file(location_id=args.parent_location_id, sex_id=args.sex_id) d = DismodIO(path=main_db) if d.fit_var.empty: raise RuntimeError( "Cannot run sample / simulate on a database without fit_var!") # Create n_sim simulation datasets based on the fitted parameters run_dismod_commands(dm_file=main_db, commands=[ 'set start_var fit_var' 'set truth_var fit_var', 'set scale_var fit_var', f'simulate {args.n_sim}' ]) if args.n_pool > 1: # Make a pool and fit to each of the simulations (uses the __call__ method) fit_sample = FitSample(context=context, location_id=args.location_id, sex_id=args.sex_id, fit_type=args.fit_type) p = Pool(args.n_pool) fits = list(p.map(fit_sample, range(args.n_sim))) p.close() # Reconstruct the sample table with all n_sim fits sample = pd.DataFrame().append(fits).reset_index(drop=True) sample.rename(columns={ 'fit_var_id': 'var_id', 'fit_var_value': 'var_value' }, inplace=True) d.sample = sample else: # If we only have one pool that means we aren't going to run in parallel run_dismod_commands(dm_file=main_db, commands=[f'sample simulate {args.n_sim}'])
def collect(dbs, location_ids = None): residuals = pd.DataFrame() i = -1 if location_ids: dbs = [p for p in dbs if int(p.parts[-3]) in location_ids] for p in dbs: global db db = DismodIO(p) try: db.option loc,sex = map(int, p.parts[-3:-1]) fit = (db.data_subset.merge(db.data, how='left') .merge(db.fit_data_subset, left_on = 'data_subset_id', right_on = 'fit_data_subset_id') .merge(db.node, how='left') .merge(db.integrand, how='left')) cov_names = {f'x_{row.covariate_id}': row.c_covariate_name for i, row in db.covariate[['covariate_id', 'c_covariate_name']].iterrows()} fit.rename(columns = cov_names, inplace=True) fit['c_parent_location_id'] = loc cols = (['c_parent_location_id', 'c_location_id', 'integrand_name', 'data_name', 'age_lower', 'age_upper', 'time_lower', 'time_upper', 'weighted_residual'] + list(cov_names.values())) residuals = residuals.append(fit[cols]) i += 1 print (i, f'sex: {sex}, location: {loc}') except: continue return residuals
def simulate(path: Union[str, Path], n_sim: int): """ Simulate from a database, within a database. Parameters ---------- path A path to the database object to create simulations in. n_sim Number of simulations to create. """ d = DismodIO(path=path) try: if d.fit_var.empty: raise SampleError( "Cannot run sample simulate on a database without fit_var!") except ValueError: raise SampleError( "Cannot run sample simulate on a database without fit_var!" "Does not have the fit_var table yet.") # Create n_sim simulation datasets based on the fitted parameters run_dismod_commands(dm_file=path, commands=[ 'set start_var fit_var', 'set truth_var fit_var', 'set scale_var fit_var', f'simulate {n_sim}' ])
def _process(self, db: str): dbio = DismodIO(path=db) n_var = len(dbio.var) this_sample = dbio.sample.loc[dbio.sample.sample_index == self.index].copy() this_sample['sample_index'] = 0 this_sample['sample_id'] = this_sample['var_id'] dbio.sample = this_sample del dbio run_dismod_commands(dm_file=db, commands=[f'predict sample']) dbio = DismodIO(path=db) predict = dbio.predict predict['sample_index'] = self.index return predict
def test_predict_sample_pools(mi, settings, dismod): alchemy = Alchemy(settings) predictions = predict_sample_pool(main_db=NAME, index_file_pattern='sample_{index}.db', n_pool=2, n_sim=2) di = DismodIO(NAME) assert len(predictions) == 2 * len(di.avgint)
def test_predict_pool(mi, settings, dismod): alchemy = Alchemy(settings) predict = Predict(main_db=NAME, index_file_pattern='sample_{index}.db') result = predict(1) di = DismodIO(NAME) assert len(result) == len(di.avgint) assert all(result.sample_index) == 1 assert all(result.columns == ['predict_id', 'sample_index', 'avgint_id', 'avg_integrand'])
def test_sample_simulate_sequence(filler, dismod): sample_simulate_sequence(NAME, n_sim=2, fit_type='fixed') di = DismodIO(NAME) assert len(di.sample) == 500 assert all(di.sample.columns == ['sample_id', 'sample_index', 'var_id', 'var_value']) assert all(di.sample.iloc[0:250].sample_index == 0) assert all(di.sample.iloc[250:500].sample_index == 1) assert all(~np.isnan(di.sample.var_value))
def test_sample_asymptotic(filler, dismod): sample_asymptotic(NAME, fit_type='fixed', n_sim=3) di = DismodIO(NAME) assert len(di.sample) == 750 assert all(di.sample.columns == ['sample_id', 'sample_index', 'var_id', 'var_value']) assert all(di.sample.iloc[0:250].sample_index == 0) assert all(di.sample.iloc[250:500].sample_index == 1) assert all(di.sample.iloc[500:750].sample_index == 2) assert all(~np.isnan(di.sample.var_value))
def test_predict_sample(mi, settings, dismod): alchemy = Alchemy(settings) fill_avgint_with_priors_grid(inputs=mi, alchemy=alchemy, settings=settings, source_db_path=NAME, child_locations=[72], child_sexes=[2]) run_dismod_commands(dm_file=NAME, commands=['predict sample']) di = DismodIO(NAME) assert len(di.predict) == 2 * len(di.avgint)
def fill_avgint_with_priors_grid(inputs: MeasurementInputs, alchemy: Alchemy, settings: SettingsConfig, source_db_path: Union[str, Path], child_locations: List[int], child_sexes: List[int]): sourceDB = DismodIO(path=source_db_path) rates = [r.rate for r in settings.rate] grids = integrand_grids(alchemy=alchemy, integrands=rates) posterior_grid = get_prior_avgint_grid(grids=grids, sexes=child_sexes, locations=child_locations, midpoint=False) posterior_grid = inputs.add_covariates_to_data(df=posterior_grid) posterior_grid = prep_data_avgint(df=posterior_grid, node_df=sourceDB.node, covariate_df=sourceDB.covariate) posterior_grid.rename(columns={'sex_id': 'c_sex_id'}, inplace=True) sourceDB.avgint = posterior_grid
def __call__(self, index=None): index_db = self.context.db_file(location_id=self.location_id, sex_id=self.sex_id, index=index) if index is not None: copy2(src=str(self.main_db), dst=str(index_db)) run_dismod_commands(dm_file=index_db, commands=[f'fit {self.fit_type} {index}']) db = DismodIO(path=index_db) fit = db.fit_var fit['sample_index'] = index return fit
def check_last_command(dm_file: str, command: str): LOG.warning( "FIXME -- GMA -- Check_last_command needs to wrap the call to dmdismod, not the ODE preprocessor." ) from cascade_at.dismod.api.dismod_io import DismodIO db = DismodIO(dm_file) log = db.log last_begin = [ l for i, l in log.iterrows() if l.message_type == 'command' and l.message.startswith('begin ') ] rtn = True if not last_begin: LOG.error(f"ERROR: Failed to find a 'begin' command.") rtn = False else: last_begin = last_begin[-1] if rtn: start_cmd = [ l for i, l in log[last_begin.log_id:].iterrows() if l.message_type == 'command' and l.message.startswith(f'begin {command}') ] if not start_cmd: LOG.error( f"ERROR: Expected 'begin {command}' but found '{last_begin.message}'." ) rtn = False else: start_cmd = start_cmd[-1] if rtn: end_cmd = [ l for i, l in log[start_cmd.log_id:].iterrows() if l.message_type == 'command' and l.message.startswith(f'end {command}') ] if not end_cmd: LOG.error( f"ERROR: Did not find end for this '{start_cmd.message}' command" ) rtn = False for i, l in log[start_cmd.log_id:].iterrows(): if l.message_type in ['error', 'warning']: LOG.info(f"DISMOD {l.message_type}: {l.message.rstrip()}") rtn = False if rtn: LOG.info(f"{command} OK") else: LOG.error( f"ERROR: {command} had errors, warnings, or failed to complete." ) return rtn
def _process(self, db: str): run_dismod_commands(dm_file=db, commands=[f'fit {self.fit_type} {self.index}']) db = DismodIO(path=db) fit = db.fit_var fit['sample_index'] = self.index fit.rename(columns={ 'fit_var_id': 'var_id', 'fit_var_value': 'var_value' }, inplace=True) return fit
def fill_avgint_with_priors_grid(inputs: MeasurementInputs, alchemy: Alchemy, settings: SettingsConfig, source_db_path: Union[str, Path], child_locations: List[int], child_sexes: List[int]): """ Fill the average integrand table with the grid that the priors are on. This is so that we can "predict" the prior for the next level of the cascade. Parameters ---------- inputs An inputs object alchemy A grid alchemy object settings A settings configuration object source_db_path The path of the source database that has had a fit on it child_locations The child locations to predict for child_sexes The child sexes to predict for """ sourceDB = DismodIO(path=source_db_path) rates = [r.rate for r in settings.rate] grids = integrand_grids(alchemy=alchemy, integrands=rates) posterior_grid = get_prior_avgint_grid(grids=grids, sexes=child_sexes, locations=child_locations, midpoint=False) posterior_grid = inputs.add_covariates_to_data(df=posterior_grid) posterior_grid = prep_data_avgint(df=posterior_grid, node_df=sourceDB.node, covariate_df=sourceDB.covariate) posterior_grid.rename(columns={'sex_id': 'c_sex_id'}, inplace=True) sourceDB.avgint = posterior_grid
def mulcov_statistics(model_version_id: int, locations: List[int], sexes: List[int], outfile_name: str, sample: bool = True, mean: bool = True, std: bool = True, quantile: Optional[List[float]] = None) -> None: """ Compute statistics for the covariate multipliers. Parameters ---------- model_version_id The model version ID locations A list of locations that, when used in combination with sexes, point to the databases to pull covariate multiplier estimates from sexes A list of sexes that, when used in combination with locations, point to the databases to pull covariate multiplier estimates from outfile_name A filepath specifying where to save the covariate multiplier statistics. sample Whether or not the results are stored in the sample table or the fit_var table. mean Whether or not to compute the mean std Whether or not to compute the standard deviation quantile An optional list of quantiles to compute """ context = Context(model_version_id=model_version_id) db_files = [DismodIO(context.db_file(location_id=loc, sex_id=sex)) for loc in locations for sex in sexes] LOG.info(f"There are {len(db_files)} databases that will be aggregated.") common_covariates = common_covariate_names(db_files) LOG.info(f"The common covariates in the passed databases are {common_covariates}.") if sample: table_name = 'sample' else: table_name = 'fit_var' LOG.info(f"Will pull from the {table_name} table from each database.") mulcov_estimates = get_mulcovs( dbs=db_files, covs=common_covariates, table=table_name ) stats = compute_statistics( df=mulcov_estimates, mean=mean, std=std, quantile=quantile ) LOG.info('Write to output file.') stats.to_csv(context.outputs_dir / f'{outfile_name}.csv', index=False)
def main(): args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) context = Context(model_version_id=args.model_version_id) inputs, alchemy, settings = context.read_inputs() sourceDB = DismodIO(path=context.db_file( location_id=args.source_location, sex_id=args.source_sex, make=False)) rates = [r.rate for r in settings.rate] posterior_grid = get_prior_avgint_grid(settings=settings, integrands=rates, sexes=args.target_sexes, locations=args.target_locations, midpoint=False) posterior_grid = inputs.add_covariates_to_data(df=posterior_grid) posterior_grid = prep_data_avgint(df=posterior_grid, node_df=sourceDB.node, covariate_df=sourceDB.covariate) posterior_grid.rename(columns={'sex_id': 'c_sex_id'}, inplace=True) sourceDB.avgint = posterior_grid run_dismod_commands(dm_file=sourceDB, commands=['predict sample'])
def predict_sample_pool(main_db: Union[str, Path], index_file_pattern: str, n_sim: int, n_pool: int): """ Run predict sample in a pool by making copies of the existing database and splitting out the sample table into n_sim databases, running predict sample on each of them, and combining the results back into the main database. """ predict = Predict(main_db=main_db, index_file_pattern=index_file_pattern) predictions = dmdismod_in_parallel(dm_thread=predict, sims=list(range(n_sim)), n_pool=n_pool) predictions = pd.DataFrame().append(predictions).reset_index(drop=True) d = DismodIO(path=main_db) return predictions[['sample_index', 'avgint_id', 'avg_integrand']]
def main(): from cascade_at.dismod.api.dismod_io import DismodIO args = parse_args() path = Path(args.filename).expanduser() assert path.is_file(), f"The database path {path} does not exist." global db db = DismodIO(path) title = case_study_title(db, version=args.model_version_id, disease=args.disease, which_fit=args.fit_type) data = get_fitted_data(db) data_integrands = sorted( set(data.integrand_name[~data.integrand_name.isna()].unique()) - set(['mtall', 'mtother'])) no_ode_integrands = sorted( set(['Sincidence', 'mtexcess', 'mtother', 'remission']).intersection(data_integrands)) yes_ode_integrands = sorted( (set(data_integrands) - set(no_ode_integrands)).intersection(data_integrands)) all_integrands = no_ode_integrands + yes_ode_integrands covariate_integrand_list = yes_ode_integrands predict_integrand_list = ['susceptible', 'withC'] rate = db.rate integrand = db.integrand rate_names = rate.loc[~rate.parent_smooth_id.isna(), 'rate_name'].tolist() for rate_name in rate_names: plot_rate(db, rate_name, title=title) for integrand_name in all_integrands: plot_integrand(db, data, integrand_name, title=title) plot_predict(db, covariate_integrand_list, predict_integrand_list, title=title)
def main(): """ Returns: """ args = get_args() logging.basicConfig(level=LEVELS[args.loglevel]) context = Context(model_version_id=args.model_version_id) db_files = [ DismodIO(context.db_file(location_id=loc, sex_id=sex)) for loc in args.locations for sex in args.sexes ] LOG.info(f"There are {len(db_files)} databases that will be aggregated.") common_covariates = common_covariate_names(db_files) LOG.info( f"The common covariates in the passed databases are {common_covariates}." ) if args.sample: table_name = 'sample' else: table_name = 'fit_var' LOG.info(f"Will pull from the {table_name} table from each database.") mulcov_estimates = get_mulcovs(dbs=db_files, covs=common_covariates, table=args.sample) mulcov_statistics = compute_statistics(df=mulcov_estimates, mean=args.mean, std=args.std, quantile=args.quantile) LOG.info() mulcov_statistics.to_csv(context.outputs_dir / f'{args.outfile_name}.csv', index=False)
def create_database(file_name, age_list, time_list, integrand_table, node_table, subgroup_table, weight_table, covariate_table, avgint_table, data_table, prior_table, smooth_table, nslist_table, rate_table, mulcov_table, option_table): import sys #*# import dismod_at from cascade_at.dismod.api.dismod_io import DismodIO db = DismodIO(file_name) # ---------------------------------------------------------------------- # avgint_extra_columns, data_extra_columns avgint_extra_columns = list() data_extra_columns = list() for row in option_table: if row['name'] == 'avgint_extra_columns': avgint_extra_columns = row['value'].split() if row['name'] == 'data_extra_columns': data_extra_columns = row['value'].split() # ---------------------------------------------------------------------- # create database new = True #*# connection = dismod_at.create_connection(file_name, new) # ---------------------------------------------------------------------- # create age table col_name = ['age'] col_type = ['real'] row_list = [] for age in age_list: row_list.append([age]) tbl_name = 'age' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.age = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create time table col_name = ['time'] col_type = ['real'] row_list = [] for time in time_list: row_list.append([time]) tbl_name = 'time' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.time = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create integrand table col_name = ['integrand_name', 'minimum_meas_cv'] col_type = ['text', 'real'] row_list = [] for i in range(len(integrand_table)): minimum_meas_cv = 0.0 if 'minimum_meas_cv' in integrand_table[i]: minimum_meas_cv = integrand_table[i]['minimum_meas_cv'] row = [integrand_table[i]['name'], minimum_meas_cv] row_list.append(row) tbl_name = 'integrand' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.integrand = pd.DataFrame(row_list, columns=col_name) # global_integrand_name2id = {} for i in range(len(row_list)): global_integrand_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create density table col_name = ['density_name'] col_type = ['text'] row_list = [ ['uniform'], ['gaussian'], ['laplace'], ['students'], ['log_gaussian'], ['log_laplace'], ['log_students'], ['cen_gaussian'], ['cen_laplace'], ['cen_log_gaussian'], ['cen_log_laplace'], ] tbl_name = 'density' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.density = pd.DataFrame(row_list, columns=col_name) # global_density_name2id = {} for i in range(len(row_list)): global_density_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create covariate table col_name = ['covariate_name', 'reference', 'max_difference'] col_type = ['text', 'real', 'real'] row_list = [] for i in range(len(covariate_table)): max_difference = None if 'max_difference' in covariate_table[i]: max_difference = covariate_table[i]['max_difference'] row = [ covariate_table[i]['name'], covariate_table[i]['reference'], max_difference ] row_list.append(row) tbl_name = 'covariate' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.covariate = pd.DataFrame(row_list, columns=col_name) # global_covariate_name2id = {} for i in range(len(covariate_table)): global_covariate_name2id[covariate_table[i]['name']] = i # ---------------------------------------------------------------------- # create node table global_node_name2id = {} for i in range(len(node_table)): global_node_name2id[node_table[i]['name']] = i # col_name = ['node_name', 'parent'] col_type = ['text', 'integer'] row_list = [] for i in range(len(node_table)): node = node_table[i] name = node['name'] parent = node['parent'] if parent == '': parent = None else: parent = global_node_name2id[parent] row_list.append([name, parent]) tbl_name = 'node' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.node = pd.DataFrame(row_list, columns=col_name) # create subgroup table global_subgroup_name2id = {} global_group_name2id = {} group_id = 0 group_name = subgroup_table[0]['group'] global_group_name2id[group_name] = group_id for i in range(len(subgroup_table)): global_subgroup_name2id[subgroup_table[i]['subgroup']] = i if subgroup_table[i]['group'] != group_name: group_id = group_id + 1 group_name = subgroup_table[i]['group'] global_group_name2id[group_name] = group_id # col_name = ['subgroup_name', 'group_id', 'group_name'] col_type = ['text', 'integer', 'text'] row_list = [] for i in range(len(subgroup_table)): if i == 0: group_id = 0 group_name = subgroup_table[0]['group'] elif subgroup_table[i]['group'] != group_name: group_id = group_id + 1 group_name = subgroup_table[i]['group'] subgroup_name = subgroup_table[i]['subgroup'] row_list.append([subgroup_name, group_id, group_name]) tbl_name = 'subgroup' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.subgroup = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create prior table col_name = [ 'prior_name', 'lower', 'upper', 'mean', 'std', 'density_id', 'eta', 'nu' ] col_type = [ 'text', 'real', 'real', 'real', 'real', 'integer', 'real', 'real' ] row_list = [] for i in range(len(prior_table)): prior = prior_table[i] density_id = global_density_name2id[prior['density']] # # columns that have null for default value for key in ['lower', 'upper', 'std', 'eta', 'nu']: if not key in prior: prior[key] = None # row = [ prior['name'], prior['lower'], prior['upper'], prior['mean'], prior['std'], density_id, prior['eta'], prior['nu'], ] row_list.append(row) tbl_name = 'prior' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.prior = pd.DataFrame(row_list, columns=col_name) # global_prior_name2id = {} for i in range(len(row_list)): global_prior_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create weight table col_name = ['weight_name', 'n_age', 'n_time'] col_type = ['text', 'integer', 'integer'] row_list = [] for i in range(len(weight_table)): weight = weight_table[i] name = weight['name'] n_age = len(weight['age_id']) n_time = len(weight['time_id']) row_list.append([name, n_age, n_time]) tbl_name = 'weight' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.weight = pd.DataFrame(row_list, columns=col_name) # global_weight_name2id = {} for i in range(len(weight_table)): global_weight_name2id[weight_table[i]['name']] = i # null is used for constant weighting global_weight_name2id[''] = None # ---------------------------------------------------------------------- # create weight_grid table col_name = ['weight_id', 'age_id', 'time_id', 'weight'] col_type = ['integer', 'integer', 'integer', 'real'] row_list = [] for i in range(len(weight_table)): weight = weight_table[i] age_id = weight['age_id'] time_id = weight['time_id'] fun = weight['fun'] for j in age_id: for k in time_id: w = fun(age_list[j], time_list[k]) row_list.append([i, j, k, w]) tbl_name = 'weight_grid' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.weight_grid = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create smooth table col_name = [ 'smooth_name', 'n_age', 'n_time', 'mulstd_value_prior_id', 'mulstd_dage_prior_id', 'mulstd_dtime_prior_id' ] col_type = ['text', 'integer', 'integer', 'integer', 'integer', 'integer'] row_list = [] for i in range(len(smooth_table)): smooth = smooth_table[i] name = smooth['name'] n_age = len(smooth['age_id']) n_time = len(smooth['time_id']) # prior_id = dict() for key in ['value', 'dage', 'dtime']: prior_id[key] = None mulstd_key = 'mulstd_' + key + '_prior_name' if mulstd_key in smooth: prior_name = smooth[mulstd_key] if prior_name != None: prior_id[key] = global_prior_name2id[prior_name] # row_list.append([ name, n_age, n_time, prior_id['value'], prior_id['dage'], prior_id['dtime'], ]) tbl_name = 'smooth' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.smooth = pd.DataFrame(row_list, columns=col_name) # global_smooth_name2id = {} for i in range(len(smooth_table)): global_smooth_name2id[smooth_table[i]['name']] = i # ---------------------------------------------------------------------- # create smooth_grid table col_name = [ 'smooth_id', 'age_id', 'time_id', 'value_prior_id', 'dage_prior_id', 'dtime_prior_id', 'const_value', ] col_type = [ 'integer', # smooth_id 'integer', # age_id 'integer', # time_id 'integer', # value_prior_id 'integer', # dage_prior_id 'integer', # dtime_prior_id 'real', # const_value ] row_list = [] for i in range(len(smooth_table)): smooth = smooth_table[i] age_id = smooth['age_id'] time_id = smooth['time_id'] fun = smooth['fun'] max_j = 0 for j in age_id: if age_list[j] > age_list[max_j]: max_j = j max_k = 0 for k in time_id: if time_list[k] > time_list[max_k]: max_k = k for j in age_id: for k in time_id: (v, da, dt) = fun(age_list[j], time_list[k]) # if j == max_j: da = None elif da != None: da = global_prior_name2id[da] # if k == max_k: dt = None elif dt != None: dt = global_prior_name2id[dt] # const_value = None if isinstance(v, float): const_value = v v = None elif v != None: v = global_prior_name2id[v] row_list.append([i, j, k, v, da, dt, const_value]) tbl_name = 'smooth_grid' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.smooth_grid = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create nslist table col_name = ['nslist_name'] col_type = ['text'] row_list = list() for nslist_name in nslist_table: row_list.append([nslist_name]) tbl_name = 'nslist' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.nslist = pd.DataFrame(row_list, columns=col_name) # global_nslist_name2id = dict() for i in range(len(row_list)): global_nslist_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create nslist_pair table col_name = ['nslist_id', 'node_id', 'smooth_id'] col_type = ['integer', 'integer', 'integer'] row_list = list() tbl_name = 'nslist_pair' for key in nslist_table: pair_list = nslist_table[key] nslist_id = global_nslist_name2id[key] for pair in pair_list: node_id = global_node_name2id[pair[0]] smooth_id = global_smooth_name2id[pair[1]] row_list.append([nslist_id, node_id, smooth_id]) #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.nslist_pair = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create rate table col_name = [ 'rate_name', 'parent_smooth_id', 'child_smooth_id', 'child_nslist_id' ] col_type = ['text', 'integer', 'integer', 'integer'] row_list = list() for rate_name in ['pini', 'iota', 'rho', 'chi', 'omega']: row = [rate_name, None, None, None] for i in range(len(rate_table)): rate = rate_table[i] if rate['name'] == rate_name: row = [rate_name] for key in ['parent_smooth', 'child_smooth', 'child_nslist']: entry = None if key in rate: entry = rate[key] if entry != None: if key == 'child_nslist': entry = global_nslist_name2id[entry] else: entry = global_smooth_name2id[entry] row.append(entry) row_list.append(row) tbl_name = 'rate' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.rate = pd.DataFrame(row_list, columns=col_name) global_rate_name2id = {} for i in range(len(row_list)): global_rate_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create mulcov table col_name = [ 'mulcov_type', 'rate_id', 'integrand_id', 'covariate_id', 'group_id', 'group_smooth_id', 'subgroup_smooth_id', ] col_type = [ 'text', # mulcov_type 'integer', # rate_id 'integer', # integrand_id 'integer', # covariate_id 'integer', # group_id 'integer', # group_smooth_id 'integer', # subgroup_smooth_id ] row_list = [] warning_printed = False for i in range(len(mulcov_table)): mulcov = mulcov_table[i] mulcov_type = mulcov['type'] effected = mulcov['effected'] covariate_id = global_covariate_name2id[mulcov['covariate']] # # rate_id and integrand_id if mulcov_type == 'rate_value': rate_id = global_rate_name2id[effected] integrand_id = None else: integrand_id = global_integrand_name2id[effected] rate_id = None # # group_id if 'group' in mulcov: group_id = global_group_name2id[mulcov['group']] else: group_id = 0 if not warning_printed: msg = 'create_database Warning: ' msg += 'group key missing in mulcov table,\n' msg += 'using default value; i.e., first group ' msg += '(you should fix this).' print(msg) warning_printed = True # # group_smooth_id if mulcov['smooth'] == None: group_smooth_id = None else: group_smooth_id = global_smooth_name2id[mulcov['smooth']] # # subgroup_smooth_id if not 'subsmooth' in mulcov: subgroup_smooth_id = None elif mulcov['subsmooth'] == None: subgroup_smooth_id = None else: subgroup_smooth_id = global_smooth_name2id[mulcov['subsmooth']] # row_list.append([ mulcov_type, rate_id, integrand_id, covariate_id, group_id, group_smooth_id, subgroup_smooth_id, ]) tbl_name = 'mulcov' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.mulcov = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # avgint table # # extra_name, extra_type extra_name = [] extra_type = [] if (len(avgint_table) > 0): extra_name = avgint_extra_columns row = avgint_table[0] for key in extra_name: if isinstance(row[key], str): extra_type.append('text') elif isinstance(row[key], int): extra_type.append('integer') elif isinstance(row[key], float): extra_type.append('real') else: assert False # # col_name col_name = extra_name + [ 'integrand_id', 'node_id', 'subgroup_id', 'weight_id', 'age_lower', 'age_upper', 'time_lower', 'time_upper' ] for j in range(len(covariate_table)): col_name.append('x_%s' % j) # # col_type col_type = extra_type + [ 'integer', # integrand_id 'integer', # node_id 'integer', # subgroup_id 'integer', # weight_id 'real', # age_lower 'real', # age_upper 'real', # time_lower 'real' # time_upper ] for j in range(len(covariate_table)): col_type.append('real') # # row_list row_list = [] warning_printed = False for i in range(len(avgint_table)): avgint = avgint_table[i] # # subgroup column has a default value if 'subgroup' not in avgint: avgint['subgroup'] = subgroup_table[0]['subgroup'] if not warning_printed: msg = 'create_database Warning: ' msg += 'subgroup key missing in avgint table,\n' msg += 'using default value; i.e., first subgroup ' msg += '(you should fix this).' print(msg) warning_printed = True # # extra columns first row = list() for name in extra_name: row.append(avgint[name]) # avgint_id = i integrand_id = global_integrand_name2id[avgint['integrand']] node_id = global_node_name2id[avgint['node']] subgroup_id = global_subgroup_name2id[avgint['subgroup']] weight_id = global_weight_name2id[avgint['weight']] row = row + [ integrand_id, node_id, subgroup_id, weight_id, avgint['age_lower'], avgint['age_upper'], avgint['time_lower'], avgint['time_upper'] ] for j in range(len(covariate_table)): row.append(avgint[float(covariate_table[j]['name'])]) row_list.append(row) tbl_name = 'avgint' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.avgint = (pd.DataFrame(row_list, columns=col_name).astype( dict( zip( col_name, pd.Series(col_type).replace({ 'integer': 'int', 'real': 'float' }))))) # ---------------------------------------------------------------------- # create data table # # # extra_name, extra_type extra_name = [] extra_type = [] if (len(data_table) > 0): extra_name = data_extra_columns row = data_table[0] for key in extra_name: if isinstance(row[key], str): extra_type.append('text') elif isinstance(row[key], int): extra_type.append('integer') elif isinstance(row[key], float): extra_type.append('real') else: assert False # # col_name col_name = extra_name + [ 'integrand_id', 'node_id', 'subgroup_id', 'weight_id', 'age_lower', 'age_upper', 'time_lower', 'time_upper', 'hold_out', 'density_id', 'meas_value', 'meas_std', 'eta', 'nu', ] for j in range(len(covariate_table)): col_name.append('x_%s' % j) # # col_type col_type = extra_type + [ 'integer', # integrand_id 'integer', # node_id 'integer', # subgroup_id 'integer', # weight_id 'real', # age_lower 'real', # age_upper 'real', # time_lower 'real', # time_upper 'integer', # hold_out 'integer', # density_id 'real', # meas_value 'real', # meas_std 'real', # eta 'real', # nu ] for j in range(len(covariate_table)): col_type.append('real') row_list = [] warning_printed = False for i in range(len(data_table)): data = data_table[i] # # extra columns first row = list() for name in extra_name: row.append(data[name]) # # columns that have null for default value for key in ['meas_std', 'eta', 'nu']: if not key in data: data[key] = None # # subgroup column has a default value if not 'subgroup' in data: data['subgroup'] = subgroup_table[0]['subgroup'] if not warning_printed: msg = 'create_database Warning: ' msg += 'subgroup key missing in data table,\n' msg += 'using default value; i.e., first subgroup ' msg += '(you should fix this).' print(msg) warning_printed = True # integrand_id = global_integrand_name2id[data['integrand']] density_id = global_density_name2id[data['density']] node_id = global_node_name2id[data['node']] subgroup_id = global_subgroup_name2id[data['subgroup']] weight_id = global_weight_name2id[data['weight']] hold_out = int(data['hold_out']) row = row + [ integrand_id, node_id, subgroup_id, weight_id, data['age_lower'], data['age_upper'], data['time_lower'], data['time_upper'], hold_out, density_id, data['meas_value'], data['meas_std'], data['eta'], data['nu'] ] for j in range(len(covariate_table)): row.append(float(data[covariate_table[j]['name']])) row_list.append(row) tbl_name = 'data' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) data = pd.DataFrame(row_list, columns=col_name) data['data_name'] = '' db.data = data # ---------------------------------------------------------------------- # create option table col_name = ['option_name', 'option_value'] col_type = ['text unique', 'text'] row_list = [] for row in option_table: name = row['name'] value = row['value'] row_list.append([name, value]) tbl_name = 'option' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.option = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # close the connection #*# connection.close() return
def dm(tmp_path): return DismodIO(path=tmp_path / 'dismod.db')
def example_db( file_name, test_config={ 'node_effects': False, 'group_effects': False, 'sex_effect': False, 'use_group_mulcov': False, 'include_group_data': False, 'zero_sum_mulcov': False }, truth={}, prior=dict(subgroup_effects=None, parent_density='uniform', parent_std=None, child_density='uniform', child_std=None, subgroup_density='uniform', subgroup_std=1), node_effects=None, subgroup_effects=None, tol_fixed=1e-10, tol_random=1e-10, ): if os.path.exists(file_name): os.remove(file_name) # Note that the a, t values are not used for this example def fun_iota_parent(a, t): return ('prior_iota_parent', None, None) if test_config['node_effects']: def fun_iota_child(a, t): return ('prior_iota_child', None, None) if test_config['group_effects']: def fun_iota_group(a, t): return ('prior_iota_group', None, None) def fun_iota_subgroup(a, t): return ('prior_iota_subgroup', None, None) if test_config['sex_effect']: def fun_iota_sex(a, t): return ('prior_iota_sex', None, None) # TODO: Delete dependency with dismod_at # ---------------------------------------------------------------------- # age table age_list = [0.0, 100.0] # # time table time_list = [1990.0, 2020.0] # # integrand table integrand_table = [{'name': 'Sincidence'}] # # node table: world -> north_america # north_america -> (united_states, canada) if test_config['node_effects']: node_table = [ { 'name': 'p1', 'parent': '' }, { 'name': 'c1', 'parent': 'p1' }, { 'name': 'c2', 'parent': 'p1' }, ] else: node_table = [ { 'name': 'p1', 'parent': '' }, ] # # weight table: weight_table = list() # # covariate table covariate_table = [{ 'name': 'one', 'reference': 0.0, 'max_difference': None }] if test_config['sex_effect']: covariate_table.append({ 'name': 'sex', 'reference': 0.0, 'max_difference': None }) # # mulcov table mulcov_table = [] if test_config['group_effects']: mulcov_table.append({ 'covariate': 'one', 'type': 'rate_value', 'effected': 'iota', 'group': 'g1', 'smooth': 'smooth_iota_group' if test_config['use_group_mulcov'] else None, # 'smooth' : None, 'subsmooth': 'smooth_iota_subgroup' }) if test_config['sex_effect']: mulcov_table.append({ 'covariate': 'sex', 'type': 'rate_value', 'effected': 'iota', 'group': 'g1' if test_config['group_effects'] else 'none', 'smooth': 'smooth_iota_sex' }) # # avgint table: avgint_table = list() # # nslist_table: nslist_table = dict() # ---------------------------------------------------------------------- # subgroup_table subgroup_table = [ { 'subgroup': 'none', 'group': 'none' }, { 'subgroup': 's1', 'group': 'g1' }, { 'subgroup': 's2', 'group': 'g1' }, ] # ---------------------------------------------------------------------- # data table: data_table = list() # write out data row = { 'density': 'gaussian', 'weight': '', 'hold_out': False, 'time_lower': 2000.0, 'time_upper': 2000.0, 'age_lower': 50.0, 'age_upper': 50.0, 'integrand': 'Sincidence', 'one': 1, # 'node': 'north_america', 'node': 'p1', # 'eta': 1e-4, } sexs = [0, 1] if test_config['sex_effect'] else [0] for node, node_effect in node_effects.items(): if (not test_config['node_effects'] and node != 'p1'): continue # Exclude data for the parent node if (test_config['node_effects'] and node == 'p1'): continue for sex in sexs: for sg, sge in subgroup_effects.items(): if (not test_config['group_effects'] and sg != 'none'): continue # Exclude data for the group -- if fitting both nodes and groups, omitting sg none creates Hessian errors if (test_config['group_effects'] and not test_config['include_group_data'] and sg == 'none'): continue total_effect = 0 if test_config['sex_effect']: use_sex_covariate = (sg != 'none') or ( sg == 'none' and not test_config['group_effects']) row['sex'] = sex if use_sex_covariate else -1 subgroups = pd.DataFrame(subgroup_table) group = subgroups.loc[ subgroups.subgroup == sg, 'group'].squeeze( ) if test_config['group_effects'] else 'g1' sex_effect = sex * truth['iota_sex_true'][group] total_effect += sex_effect if test_config['node_effects']: row['node'] = node total_effect += node_effect row['subgroup'] = sg sg_effect = 0 if test_config['group_effects']: if sg in ('s1', 's2'): sg_effect = truth['iota_group_true'] + sge total_effect += sg_effect # print ({'sex_effect': (sex, sex_effect), 'node_effect': (node, node_effect), 'sg_effect': (sg, sg_effect), 'total_effect': total_effect}) row['meas_value'] = truth['iota_parent_true'] * np.exp( total_effect) row['meas_std'] = row['meas_value'] * 1e-1 data_table.append(copy.copy(row)) # ---------------------------------------------------------------------- # prior_table prior_table = [ { # prior_iota_parent 'name': 'prior_iota_parent', 'density': prior.get('parent_density', 'iniform'), 'mean': prior.get('parent_mean', truth['iota_parent_true'] * .5), 'std': prior.get('parent_std', 0), 'eta': prior.get('parent_eta', None), 'lower': truth['iota_parent_true'] * 1e-2, 'upper': truth['iota_parent_true'] * 1e+2, },{ # prior_iota_child 'name': 'prior_iota_child', 'density': prior.get('child_density', 'uniform'), 'mean': prior.get('child_mean', .001), 'std': prior.get('child_std', 0), 'eta': prior.get('child_eta', None), 'lower': -np.inf, 'upper': +np.inf, }, { # prior_iota_group 'name': 'prior_iota_group', 'density': prior.get('group_density', 'uniform'), 'mean': prior.get('group_mean', 0.0), 'std': prior.get('group_std', 0), # 'density': 'gaussian', # 'mean': 0.0, # 'std': 10.0, }, { # prior_iota_subgroup 'name': 'prior_iota_subgroup', 'density': prior.get('subgroup_density', 'uniform'), 'mean': prior.get('subgroup_mean', 0.0), 'std': prior.get('subgroup_std', 0), } ] if test_config['sex_effect']: prior_table.append({ # prior_iota_sex 'name': 'prior_iota_sex', 'density': 'uniform', 'mean': 0.0, 'lower': -100, 'upper': 100 }) # ---------------------------------------------------------------------- # smooth table smooth_table = [{ # smooth_iota_parent 'name': 'smooth_iota_parent', 'age_id': [0], 'time_id': [0], 'fun': fun_iota_parent }] if test_config['node_effects']: smooth_table += [{ # smooth_iota_child 'name': 'smooth_iota_child', 'age_id': [0], 'time_id': [0], 'fun': fun_iota_child }] if test_config['group_effects']: if test_config['use_group_mulcov']: smooth_table += [{ # smooth_iota_group 'name': 'smooth_iota_group', 'age_id': [0], 'time_id': [0], 'fun': fun_iota_group }] smooth_table += [{ # smooth_iota_subgroup 'name': 'smooth_iota_subgroup', 'age_id': [0], 'time_id': [0], 'fun': fun_iota_subgroup }] if test_config['sex_effect']: smooth_table.append({ # smooth_iota_sex 'name': 'smooth_iota_sex', 'age_id': [0], 'time_id': [0], 'fun': fun_iota_sex }) # ---------------------------------------------------------------------- # rate table rate_table = [{ 'name': 'iota', 'parent_smooth': 'smooth_iota_parent', 'child_smooth': 'smooth_iota_child' if test_config['node_effects'] else None, }] # ---------------------------------------------------------------------- # option_table option_table = [ # { 'name':'parent_node_name', 'value':'north_america' }, { 'name': 'parent_node_name', 'value': 'p1' }, { 'name': 'print_level_fixed', 'value': 5 }, # { 'name':'print_level_fixed', 'value':0 }, { 'name': 'quasi_fixed', 'value': 'false' }, # { 'name':'derivative_test_fixed', 'value':'second-order' }, # { 'name':'derivative_test_fixed', 'value':'trace-adaptive' }, { 'name': 'tolerance_fixed', 'value': tol_fixed }, { 'name': 'bound_frac_fixed', 'value': '1e-10' }, { 'name': 'derivative_test_random', 'value': 'second-order' }, { 'name': 'tolerance_random', 'value': tol_random }, { 'name': 'zero_sum_mulcov_group', 'value': 'g1' if test_config['group_effects'] and test_config['zero_sum_mulcov'] else None }, { 'name': 'zero_sum_child_rate', 'value': 'iota' if test_config['node_effects'] else None }, { 'name': 'rate_case', 'value': 'iota_pos_rho_zero' }, { 'name': 'max_num_iter_fixed', 'value': '1000' }, { 'name': 'max_num_iter_random', 'value': '100' } ] # ---------------------------------------------------------------------- # TODO: Change to using DismodIO instead of dismod_at.create_database from cascade_at.dismod.api.dismod_io import DismodIO db = DismodIO(file_name) try: from .create_database import create_database except: from create_database import create_database # create database #dismod_at.create_database( create_database(file_name, age_list, time_list, integrand_table, node_table, subgroup_table, weight_table, covariate_table, avgint_table, data_table, prior_table, smooth_table, nslist_table, rate_table, mulcov_table, option_table) # ---------------------------------------------------------------------- from cascade_at.dismod.api.dismod_io import DismodIO db = DismodIO(file_name) return db
def dmdismod(cmd): """ Example calling sequence: os.system('cp /Users/gma/ihme/epi/at_cascade/data/475588/dbs/100/3/dismod.db /tmp/t1_diabetes.db') dmdismod(f'{_dismod_cmd_} /tmp/t1_diabetes.db ODE init') dmdismod(f'{_dismod_cmd_} /tmp/t1_diabetes.db ODE fit --ode-hold-out-list mtexcess') dmdismod(f'{_dismod_cmd_} /tmp/t1_diabetes.db ODE students --ode-hold-out-list mtexcess') """ help = ("An extended dmdismod command, to handle Brad's strategy of:\n" " 1) fit the non-ODE integrands to initialize an ODE fit,\n" " 2) fit the non-ODE and ODE integrands\n" " 3) fit to log-student data densities.") def parse_args(args): import argparse from distutils.util import strtobool as str2bool parser = argparse.ArgumentParser() parser.add_argument('path', type=str, help='Path to the Dismod_AT sqlite database') parser.add_argument( 'dispatch', type=str, help=("If dispatch == 'ODE', use ODE fitting strategy." "If missing, use standard dismod_at commands.")) parser.add_argument( 'option', type=str, help= "For the ODE fitting strategy, one of ('init', 'fit' or 'students')." ) parser.add_argument( "-m", "--max-covariate-effect", nargs='?', type=float, default=2, help= ("Maximum absolute covariate effect = multiplier * (covariate - referece). " "Note that exp(effect) multiplies a model value to get the model value for " "this covariate value. (Noise covariate multipliers are not included.)" )) parser.add_argument( "-c", '--mulcov-values', nargs='+', type=str, default=None, help="Constrain covariate multipliers to the specified value") parser.add_argument("-o", "--ode-hold-out-list", nargs='?', type=str, default=None, const=None, help="Integrands to hold out during the ODE fit") parser.add_argument("-s", "--random-seed", nargs='?', type=int, default=None, help="Random seed for the random_subsampling") parser.add_argument("-d", "--random-subsample", nargs='?', type=int, default=1000, const=None, help="Number of random subsamples to fit.") parser.add_argument( "-p", "--save-to-path", nargs='?', type=str, default=None, const=None, help="Path to directory where to store the results") parser.add_argument( "-t", "--reference_db", nargs='?', type=str, default="", const="", help= "Path to the reference databases. Fit results are compared to these databases for testing purposes." ) get_help = len(args) > 1 and any(a.startswith('-h') for a in args[1:]) args = parser.parse_args(args[1:]) args.cmd = sys.argv[0] if args.mulcov_values is None: args.mulcov_values = [] else: args.mulcov_values = [[ a, b, float(c) ] for a, b, c in np.asarray(args.mulcov_values).reshape(-1, 3)] return args args = cmd.split() p_args = parse_args(cmd.split()) print('-' * 10) LOG.info(cmd) print('-' * 10) if p_args.random_seed: random_seed = p_args.random_seed LOG.info( f"Setting the subsampling random_seed to the dmdismod argument value = {random_seed}" ) else: db = DismodIO(p_args.path) option = db.option random_seed = option.loc[option.option_name == 'random_seed', 'option_value'] if not random_seed.empty: random_seed = int(random_seed) LOG.info( f"Setting the subsampling random_seed to the database option table value = {random_seed}" ) else: random_seed = None LOG.info(f"The subsampling random_seed not set.") if p_args.option == "init": db = init_ode_command( [_dismod_cmd_] + args[1:], max_covariate_effect=p_args.max_covariate_effect, mulcov_values=p_args.mulcov_values, ode_hold_out_list=p_args.ode_hold_out_list, # random_seed = p_args.random_seed, random_seed=random_seed, random_subsample=p_args.random_subsample, save_to_path=p_args.save_to_path, reference_db=p_args.reference_db) elif p_args.option == "fit": db = fit_ode_command( [_dismod_cmd_] + args[1:], ode_hold_out_list=p_args.ode_hold_out_list, # random_seed = p_args.random_seed, random_seed=random_seed, random_subsample=p_args.random_subsample, save_to_path=p_args.save_to_path, reference_db=p_args.reference_db) elif p_args.option == "students": fit_students_command( [_dismod_cmd_] + args[1:], ode_hold_out_list=p_args.ode_hold_out_list, # random_seed = p_args.random_seed, random_seed=random_seed, random_subsample=p_args.random_subsample, save_to_path=p_args.save_to_path, reference_db=p_args.reference_db)
def reference_dbs(case): fit_ihme_path = _CASCADE_DATA_PATH_ / case return fit_ihme_path, dict( no_ode=DismodIO(fit_ihme_path / 'no_ode/no_ode.db'), yes_ode=DismodIO(fit_ihme_path / 'yes_ode/yes_ode.db'), students=DismodIO(fit_ihme_path / 'students/students.db'))
def run_test(file_name, test_config, truth_in, start_from_truth = False, test_asymptotic = False): from cascade_at.dismod.constants import _dismod_cmd_ # gradient_error = False try: db = DismodIO(file_name) # from dismod_db_api import DismodDbAPI as API # db = API(file_name) truth = get_truth(test_config, truth_in) system([ _dismod_cmd_, file_name, 'init' ]) # Initialize the truth_var table to the correct answer if True: # Need to create the truth_var table before setting it. # Can't seem to get db.create_tables to work, so use dismod_at to do it system([ _dismod_cmd_, file_name, 'set', 'truth_var', 'prior_mean']) truth_var = db.truth_var truth_var['truth_var_value'] = truth db.truth_var = truth_var if 0: try: # Check dismod gradients gradient_error = None option = db.option system([ _dismod_cmd_, file_name, 'set', 'option', 'derivative_test_fixed', 'adaptive']) system([ _dismod_cmd_, file_name, 'set', 'option', 'derivative_test_random', 'second-order']) system([ _dismod_cmd_, file_name, 'set', 'option', 'max_num_iter_fixed', '-1']) system([ _dismod_cmd_, file_name, 'set', 'option', 'max_num_iter_random', '100']) # Start from the truth if 0: system([ _dismod_cmd_, file_name, 'set', 'start_var', 'truth_var']) system([ _dismod_cmd_, file_name, 'set', 'scale_var', 'truth_var']) system([ _dismod_cmd_, file_name, 'fit', 'fixed']) system([ _dismod_cmd_, file_name, 'set', 'start_var', 'fit_var']) system([ _dismod_cmd_, file_name, 'fit', 'both']) except Exception as ex: print (ex) gradient_error = ex raise ex finally: db.option = option if start_from_truth: system([ _dismod_cmd_, file_name, 'set', 'start_var', 'truth_var']) system([ _dismod_cmd_, file_name, 'set', 'scale_var', 'truth_var']) # Check that prediction matches the measured data cols = db.avgint.columns.tolist() db.avgint = db.data.rename(columns={'data_id':'avgint_id'})[cols] system([ _dismod_cmd_, file_name, 'predict', 'truth_var']) check = np.allclose(db.data.meas_value, db.predict.avg_integrand, atol=1e-10, rtol=1e-10) assert check, 'ERROR: Predict from truth does not match the data' # # Fit fixed effects system([ _dismod_cmd_, file_name, 'fit', 'fixed']) if test_asymptotic: system([ _dismod_cmd_, file_name, 'sample', 'asymptotic', 'fixed', '10']) # # Fit both fixed and random effects system([ _dismod_cmd_, file_name, 'set', 'start_var', 'fit_var']) system([ _dismod_cmd_, file_name, 'set', 'scale_var', 'fit_var']) if (test_config['group_effects'] or test_config['node_effects']): system([ _dismod_cmd_, file_name, 'fit', 'both']) else: print ('Skipping fit both because there are no random effects.') check = np.allclose(db.fit_data_subset.weighted_residual, [0]*len(db.fit_data_subset), atol=1e-8, rtol=1e-8) assert check, 'ERROR: Measured values do not match the fit result integrand values.' print ('Tests OK -- fit both fit_data_subset and measured_data agree.') if test_asymptotic: system([ _dismod_cmd_, file_name, 'sample', 'asymptotic', 'both', '10']) # ----------------------------------------------------------------------- if gradient_error: print ('ERROR: Gradient check failed.') print (gradient_error) return False, db else: print ('Test OK') return True, db except Exception as ex: print (ex) print ('Test FAILED') return False, db finally: print (f'fit_var_value: {db.fit_var.fit_var_value.tolist()}') print (f'RMS(fit_var_value - truth): {np.sqrt(np.sum((db.fit_var.fit_var_value - db.truth_var.truth_var_value)**2))}') print (db.var.merge(db.fit_var, left_on = 'var_id', right_on = 'fit_var_id') .drop(columns = ['integrand_id', 'fit_var_id', 'residual_value', 'residual_dage', 'residual_dtime', 'lagrange_value', 'lagrange_dage', 'lagrange_dtime'])) print (f'RMS(weighted_residual): {np.sum(np.sqrt((db.fit_data_subset.weighted_residual)**2))}') print (db.data.merge(db.fit_data_subset, left_on = 'data_id', right_on = 'fit_data_subset_id') .drop(columns = ['fit_data_subset_id', 'integrand_id', 'weight_id', 'eta', 'nu', 'meas_std', 'avg_integrand', 'hold_out']))
def _ode_command(args, type='', random_subsample=None, ode_hold_out_list=[], random_seed=None, save_to_path=None, reference_db=None, max_covariate_effect=2, mulcov_values=[], nu=5): """ 1) Initialize the database for the non-ODE/ODE fitting strategy 2) Hold out the ODE strategy related integrands 3) Fit both on a subset of the integrands. Init step fits those corresponding directly to the rates (e.g. Sincidence, chi and rho). Omega is assumed to be constrained. Fit and students setps fit all but the ode_hold_out_list. 4) Restore the data table to it's original state """ LOG.info(f"_ode_command: {' '.join(args)}") dismod, path, cmd, option = args[:4] db = setup_db(path, dismod=dismod, ode_hold_out_list=ode_hold_out_list) try: if reference_db and isinstance(reference_db, str): reference_db = DismodIO(reference_db) # Seed used to randomly subsample data if random_seed in [0, None]: random_seed = int(time.time()) random.seed(random_seed) msg = '\nrandom_seed = ' + str(random_seed) LOG.info(msg) # Subsample the data for integrand in db.integrands: db.random_subsample_data(integrand, max_sample=random_subsample) db.compress_age_time_intervals() if type == 'no_ode': db.setup_ode_fit(max_covariate_effect=max_covariate_effect, mulcov_values=mulcov_values, ode_hold_out_list=ode_hold_out_list) hold_out_integrands = db.yes_ode_integrands elif type in ('yes_ode', 'students'): hold_out_integrands = db.ode_hold_out_list # Remove integrands appropriate to fit type db.hold_out_data(integrand_names=hold_out_integrands, hold_out=1) if type == 'no_ode': system(f'{db.dismod} {db.path} init') elif type in ('yes_ode', 'students'): try: fit_var = db.fit_var if fit_var.empty: fit_var = None except ValueError: fit_var = None if fit_var is not None: system(f'{db.dismod} {db.path} set start_var fit_var') else: system(f'{db.dismod} {db.path} set start_var prior_mean') if type == 'students': db.set_student_likelihoods(factor_eta=1e-2, nu=nu) if reference_db: db.check_input_tables(reference_db) db.fit(msg=f'fit_ode -- {cmd}_{option}') db.save_database(save_to_path) if reference_db: db.check_output_tables(reference_db) cmd = f"dismodat.py {db.path} db2csv" LOG.info(cmd) os.system(cmd) except: raise finally: db.save_database(db.path.parent / f'{db.path.stem}_ODE_{type}{db.path.suffix}') LOG.info("Restoring the data table to its original state.") db.data = db.input_data return db
def dm(dismod): return DismodIO(path=Path('dismod-init.db'))