def run_test(file_name, test_config, truth_in, start_from_truth = False, test_asymptotic = False): from cascade_at.dismod.constants import _dismod_cmd_ # gradient_error = False try: db = DismodIO(file_name) # from dismod_db_api import DismodDbAPI as API # db = API(file_name) truth = get_truth(test_config, truth_in) system([ _dismod_cmd_, file_name, 'init' ]) # Initialize the truth_var table to the correct answer if True: # Need to create the truth_var table before setting it. # Can't seem to get db.create_tables to work, so use dismod_at to do it system([ _dismod_cmd_, file_name, 'set', 'truth_var', 'prior_mean']) truth_var = db.truth_var truth_var['truth_var_value'] = truth db.truth_var = truth_var if 0: try: # Check dismod gradients gradient_error = None option = db.option system([ _dismod_cmd_, file_name, 'set', 'option', 'derivative_test_fixed', 'adaptive']) system([ _dismod_cmd_, file_name, 'set', 'option', 'derivative_test_random', 'second-order']) system([ _dismod_cmd_, file_name, 'set', 'option', 'max_num_iter_fixed', '-1']) system([ _dismod_cmd_, file_name, 'set', 'option', 'max_num_iter_random', '100']) # Start from the truth if 0: system([ _dismod_cmd_, file_name, 'set', 'start_var', 'truth_var']) system([ _dismod_cmd_, file_name, 'set', 'scale_var', 'truth_var']) system([ _dismod_cmd_, file_name, 'fit', 'fixed']) system([ _dismod_cmd_, file_name, 'set', 'start_var', 'fit_var']) system([ _dismod_cmd_, file_name, 'fit', 'both']) except Exception as ex: print (ex) gradient_error = ex raise ex finally: db.option = option if start_from_truth: system([ _dismod_cmd_, file_name, 'set', 'start_var', 'truth_var']) system([ _dismod_cmd_, file_name, 'set', 'scale_var', 'truth_var']) # Check that prediction matches the measured data cols = db.avgint.columns.tolist() db.avgint = db.data.rename(columns={'data_id':'avgint_id'})[cols] system([ _dismod_cmd_, file_name, 'predict', 'truth_var']) check = np.allclose(db.data.meas_value, db.predict.avg_integrand, atol=1e-10, rtol=1e-10) assert check, 'ERROR: Predict from truth does not match the data' # # Fit fixed effects system([ _dismod_cmd_, file_name, 'fit', 'fixed']) if test_asymptotic: system([ _dismod_cmd_, file_name, 'sample', 'asymptotic', 'fixed', '10']) # # Fit both fixed and random effects system([ _dismod_cmd_, file_name, 'set', 'start_var', 'fit_var']) system([ _dismod_cmd_, file_name, 'set', 'scale_var', 'fit_var']) if (test_config['group_effects'] or test_config['node_effects']): system([ _dismod_cmd_, file_name, 'fit', 'both']) else: print ('Skipping fit both because there are no random effects.') check = np.allclose(db.fit_data_subset.weighted_residual, [0]*len(db.fit_data_subset), atol=1e-8, rtol=1e-8) assert check, 'ERROR: Measured values do not match the fit result integrand values.' print ('Tests OK -- fit both fit_data_subset and measured_data agree.') if test_asymptotic: system([ _dismod_cmd_, file_name, 'sample', 'asymptotic', 'both', '10']) # ----------------------------------------------------------------------- if gradient_error: print ('ERROR: Gradient check failed.') print (gradient_error) return False, db else: print ('Test OK') return True, db except Exception as ex: print (ex) print ('Test FAILED') return False, db finally: print (f'fit_var_value: {db.fit_var.fit_var_value.tolist()}') print (f'RMS(fit_var_value - truth): {np.sqrt(np.sum((db.fit_var.fit_var_value - db.truth_var.truth_var_value)**2))}') print (db.var.merge(db.fit_var, left_on = 'var_id', right_on = 'fit_var_id') .drop(columns = ['integrand_id', 'fit_var_id', 'residual_value', 'residual_dage', 'residual_dtime', 'lagrange_value', 'lagrange_dage', 'lagrange_dtime'])) print (f'RMS(weighted_residual): {np.sum(np.sqrt((db.fit_data_subset.weighted_residual)**2))}') print (db.data.merge(db.fit_data_subset, left_on = 'data_id', right_on = 'fit_data_subset_id') .drop(columns = ['fit_data_subset_id', 'integrand_id', 'weight_id', 'eta', 'nu', 'meas_std', 'avg_integrand', 'hold_out']))
def create_database(file_name, age_list, time_list, integrand_table, node_table, subgroup_table, weight_table, covariate_table, avgint_table, data_table, prior_table, smooth_table, nslist_table, rate_table, mulcov_table, option_table): import sys #*# import dismod_at from cascade_at.dismod.api.dismod_io import DismodIO db = DismodIO(file_name) # ---------------------------------------------------------------------- # avgint_extra_columns, data_extra_columns avgint_extra_columns = list() data_extra_columns = list() for row in option_table: if row['name'] == 'avgint_extra_columns': avgint_extra_columns = row['value'].split() if row['name'] == 'data_extra_columns': data_extra_columns = row['value'].split() # ---------------------------------------------------------------------- # create database new = True #*# connection = dismod_at.create_connection(file_name, new) # ---------------------------------------------------------------------- # create age table col_name = ['age'] col_type = ['real'] row_list = [] for age in age_list: row_list.append([age]) tbl_name = 'age' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.age = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create time table col_name = ['time'] col_type = ['real'] row_list = [] for time in time_list: row_list.append([time]) tbl_name = 'time' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.time = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create integrand table col_name = ['integrand_name', 'minimum_meas_cv'] col_type = ['text', 'real'] row_list = [] for i in range(len(integrand_table)): minimum_meas_cv = 0.0 if 'minimum_meas_cv' in integrand_table[i]: minimum_meas_cv = integrand_table[i]['minimum_meas_cv'] row = [integrand_table[i]['name'], minimum_meas_cv] row_list.append(row) tbl_name = 'integrand' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.integrand = pd.DataFrame(row_list, columns=col_name) # global_integrand_name2id = {} for i in range(len(row_list)): global_integrand_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create density table col_name = ['density_name'] col_type = ['text'] row_list = [ ['uniform'], ['gaussian'], ['laplace'], ['students'], ['log_gaussian'], ['log_laplace'], ['log_students'], ['cen_gaussian'], ['cen_laplace'], ['cen_log_gaussian'], ['cen_log_laplace'], ] tbl_name = 'density' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.density = pd.DataFrame(row_list, columns=col_name) # global_density_name2id = {} for i in range(len(row_list)): global_density_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create covariate table col_name = ['covariate_name', 'reference', 'max_difference'] col_type = ['text', 'real', 'real'] row_list = [] for i in range(len(covariate_table)): max_difference = None if 'max_difference' in covariate_table[i]: max_difference = covariate_table[i]['max_difference'] row = [ covariate_table[i]['name'], covariate_table[i]['reference'], max_difference ] row_list.append(row) tbl_name = 'covariate' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.covariate = pd.DataFrame(row_list, columns=col_name) # global_covariate_name2id = {} for i in range(len(covariate_table)): global_covariate_name2id[covariate_table[i]['name']] = i # ---------------------------------------------------------------------- # create node table global_node_name2id = {} for i in range(len(node_table)): global_node_name2id[node_table[i]['name']] = i # col_name = ['node_name', 'parent'] col_type = ['text', 'integer'] row_list = [] for i in range(len(node_table)): node = node_table[i] name = node['name'] parent = node['parent'] if parent == '': parent = None else: parent = global_node_name2id[parent] row_list.append([name, parent]) tbl_name = 'node' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.node = pd.DataFrame(row_list, columns=col_name) # create subgroup table global_subgroup_name2id = {} global_group_name2id = {} group_id = 0 group_name = subgroup_table[0]['group'] global_group_name2id[group_name] = group_id for i in range(len(subgroup_table)): global_subgroup_name2id[subgroup_table[i]['subgroup']] = i if subgroup_table[i]['group'] != group_name: group_id = group_id + 1 group_name = subgroup_table[i]['group'] global_group_name2id[group_name] = group_id # col_name = ['subgroup_name', 'group_id', 'group_name'] col_type = ['text', 'integer', 'text'] row_list = [] for i in range(len(subgroup_table)): if i == 0: group_id = 0 group_name = subgroup_table[0]['group'] elif subgroup_table[i]['group'] != group_name: group_id = group_id + 1 group_name = subgroup_table[i]['group'] subgroup_name = subgroup_table[i]['subgroup'] row_list.append([subgroup_name, group_id, group_name]) tbl_name = 'subgroup' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.subgroup = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create prior table col_name = [ 'prior_name', 'lower', 'upper', 'mean', 'std', 'density_id', 'eta', 'nu' ] col_type = [ 'text', 'real', 'real', 'real', 'real', 'integer', 'real', 'real' ] row_list = [] for i in range(len(prior_table)): prior = prior_table[i] density_id = global_density_name2id[prior['density']] # # columns that have null for default value for key in ['lower', 'upper', 'std', 'eta', 'nu']: if not key in prior: prior[key] = None # row = [ prior['name'], prior['lower'], prior['upper'], prior['mean'], prior['std'], density_id, prior['eta'], prior['nu'], ] row_list.append(row) tbl_name = 'prior' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.prior = pd.DataFrame(row_list, columns=col_name) # global_prior_name2id = {} for i in range(len(row_list)): global_prior_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create weight table col_name = ['weight_name', 'n_age', 'n_time'] col_type = ['text', 'integer', 'integer'] row_list = [] for i in range(len(weight_table)): weight = weight_table[i] name = weight['name'] n_age = len(weight['age_id']) n_time = len(weight['time_id']) row_list.append([name, n_age, n_time]) tbl_name = 'weight' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.weight = pd.DataFrame(row_list, columns=col_name) # global_weight_name2id = {} for i in range(len(weight_table)): global_weight_name2id[weight_table[i]['name']] = i # null is used for constant weighting global_weight_name2id[''] = None # ---------------------------------------------------------------------- # create weight_grid table col_name = ['weight_id', 'age_id', 'time_id', 'weight'] col_type = ['integer', 'integer', 'integer', 'real'] row_list = [] for i in range(len(weight_table)): weight = weight_table[i] age_id = weight['age_id'] time_id = weight['time_id'] fun = weight['fun'] for j in age_id: for k in time_id: w = fun(age_list[j], time_list[k]) row_list.append([i, j, k, w]) tbl_name = 'weight_grid' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.weight_grid = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create smooth table col_name = [ 'smooth_name', 'n_age', 'n_time', 'mulstd_value_prior_id', 'mulstd_dage_prior_id', 'mulstd_dtime_prior_id' ] col_type = ['text', 'integer', 'integer', 'integer', 'integer', 'integer'] row_list = [] for i in range(len(smooth_table)): smooth = smooth_table[i] name = smooth['name'] n_age = len(smooth['age_id']) n_time = len(smooth['time_id']) # prior_id = dict() for key in ['value', 'dage', 'dtime']: prior_id[key] = None mulstd_key = 'mulstd_' + key + '_prior_name' if mulstd_key in smooth: prior_name = smooth[mulstd_key] if prior_name != None: prior_id[key] = global_prior_name2id[prior_name] # row_list.append([ name, n_age, n_time, prior_id['value'], prior_id['dage'], prior_id['dtime'], ]) tbl_name = 'smooth' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.smooth = pd.DataFrame(row_list, columns=col_name) # global_smooth_name2id = {} for i in range(len(smooth_table)): global_smooth_name2id[smooth_table[i]['name']] = i # ---------------------------------------------------------------------- # create smooth_grid table col_name = [ 'smooth_id', 'age_id', 'time_id', 'value_prior_id', 'dage_prior_id', 'dtime_prior_id', 'const_value', ] col_type = [ 'integer', # smooth_id 'integer', # age_id 'integer', # time_id 'integer', # value_prior_id 'integer', # dage_prior_id 'integer', # dtime_prior_id 'real', # const_value ] row_list = [] for i in range(len(smooth_table)): smooth = smooth_table[i] age_id = smooth['age_id'] time_id = smooth['time_id'] fun = smooth['fun'] max_j = 0 for j in age_id: if age_list[j] > age_list[max_j]: max_j = j max_k = 0 for k in time_id: if time_list[k] > time_list[max_k]: max_k = k for j in age_id: for k in time_id: (v, da, dt) = fun(age_list[j], time_list[k]) # if j == max_j: da = None elif da != None: da = global_prior_name2id[da] # if k == max_k: dt = None elif dt != None: dt = global_prior_name2id[dt] # const_value = None if isinstance(v, float): const_value = v v = None elif v != None: v = global_prior_name2id[v] row_list.append([i, j, k, v, da, dt, const_value]) tbl_name = 'smooth_grid' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.smooth_grid = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create nslist table col_name = ['nslist_name'] col_type = ['text'] row_list = list() for nslist_name in nslist_table: row_list.append([nslist_name]) tbl_name = 'nslist' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.nslist = pd.DataFrame(row_list, columns=col_name) # global_nslist_name2id = dict() for i in range(len(row_list)): global_nslist_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create nslist_pair table col_name = ['nslist_id', 'node_id', 'smooth_id'] col_type = ['integer', 'integer', 'integer'] row_list = list() tbl_name = 'nslist_pair' for key in nslist_table: pair_list = nslist_table[key] nslist_id = global_nslist_name2id[key] for pair in pair_list: node_id = global_node_name2id[pair[0]] smooth_id = global_smooth_name2id[pair[1]] row_list.append([nslist_id, node_id, smooth_id]) #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.nslist_pair = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # create rate table col_name = [ 'rate_name', 'parent_smooth_id', 'child_smooth_id', 'child_nslist_id' ] col_type = ['text', 'integer', 'integer', 'integer'] row_list = list() for rate_name in ['pini', 'iota', 'rho', 'chi', 'omega']: row = [rate_name, None, None, None] for i in range(len(rate_table)): rate = rate_table[i] if rate['name'] == rate_name: row = [rate_name] for key in ['parent_smooth', 'child_smooth', 'child_nslist']: entry = None if key in rate: entry = rate[key] if entry != None: if key == 'child_nslist': entry = global_nslist_name2id[entry] else: entry = global_smooth_name2id[entry] row.append(entry) row_list.append(row) tbl_name = 'rate' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.rate = pd.DataFrame(row_list, columns=col_name) global_rate_name2id = {} for i in range(len(row_list)): global_rate_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create mulcov table col_name = [ 'mulcov_type', 'rate_id', 'integrand_id', 'covariate_id', 'group_id', 'group_smooth_id', 'subgroup_smooth_id', ] col_type = [ 'text', # mulcov_type 'integer', # rate_id 'integer', # integrand_id 'integer', # covariate_id 'integer', # group_id 'integer', # group_smooth_id 'integer', # subgroup_smooth_id ] row_list = [] warning_printed = False for i in range(len(mulcov_table)): mulcov = mulcov_table[i] mulcov_type = mulcov['type'] effected = mulcov['effected'] covariate_id = global_covariate_name2id[mulcov['covariate']] # # rate_id and integrand_id if mulcov_type == 'rate_value': rate_id = global_rate_name2id[effected] integrand_id = None else: integrand_id = global_integrand_name2id[effected] rate_id = None # # group_id if 'group' in mulcov: group_id = global_group_name2id[mulcov['group']] else: group_id = 0 if not warning_printed: msg = 'create_database Warning: ' msg += 'group key missing in mulcov table,\n' msg += 'using default value; i.e., first group ' msg += '(you should fix this).' print(msg) warning_printed = True # # group_smooth_id if mulcov['smooth'] == None: group_smooth_id = None else: group_smooth_id = global_smooth_name2id[mulcov['smooth']] # # subgroup_smooth_id if not 'subsmooth' in mulcov: subgroup_smooth_id = None elif mulcov['subsmooth'] == None: subgroup_smooth_id = None else: subgroup_smooth_id = global_smooth_name2id[mulcov['subsmooth']] # row_list.append([ mulcov_type, rate_id, integrand_id, covariate_id, group_id, group_smooth_id, subgroup_smooth_id, ]) tbl_name = 'mulcov' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.mulcov = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # avgint table # # extra_name, extra_type extra_name = [] extra_type = [] if (len(avgint_table) > 0): extra_name = avgint_extra_columns row = avgint_table[0] for key in extra_name: if isinstance(row[key], str): extra_type.append('text') elif isinstance(row[key], int): extra_type.append('integer') elif isinstance(row[key], float): extra_type.append('real') else: assert False # # col_name col_name = extra_name + [ 'integrand_id', 'node_id', 'subgroup_id', 'weight_id', 'age_lower', 'age_upper', 'time_lower', 'time_upper' ] for j in range(len(covariate_table)): col_name.append('x_%s' % j) # # col_type col_type = extra_type + [ 'integer', # integrand_id 'integer', # node_id 'integer', # subgroup_id 'integer', # weight_id 'real', # age_lower 'real', # age_upper 'real', # time_lower 'real' # time_upper ] for j in range(len(covariate_table)): col_type.append('real') # # row_list row_list = [] warning_printed = False for i in range(len(avgint_table)): avgint = avgint_table[i] # # subgroup column has a default value if 'subgroup' not in avgint: avgint['subgroup'] = subgroup_table[0]['subgroup'] if not warning_printed: msg = 'create_database Warning: ' msg += 'subgroup key missing in avgint table,\n' msg += 'using default value; i.e., first subgroup ' msg += '(you should fix this).' print(msg) warning_printed = True # # extra columns first row = list() for name in extra_name: row.append(avgint[name]) # avgint_id = i integrand_id = global_integrand_name2id[avgint['integrand']] node_id = global_node_name2id[avgint['node']] subgroup_id = global_subgroup_name2id[avgint['subgroup']] weight_id = global_weight_name2id[avgint['weight']] row = row + [ integrand_id, node_id, subgroup_id, weight_id, avgint['age_lower'], avgint['age_upper'], avgint['time_lower'], avgint['time_upper'] ] for j in range(len(covariate_table)): row.append(avgint[float(covariate_table[j]['name'])]) row_list.append(row) tbl_name = 'avgint' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.avgint = (pd.DataFrame(row_list, columns=col_name).astype( dict( zip( col_name, pd.Series(col_type).replace({ 'integer': 'int', 'real': 'float' }))))) # ---------------------------------------------------------------------- # create data table # # # extra_name, extra_type extra_name = [] extra_type = [] if (len(data_table) > 0): extra_name = data_extra_columns row = data_table[0] for key in extra_name: if isinstance(row[key], str): extra_type.append('text') elif isinstance(row[key], int): extra_type.append('integer') elif isinstance(row[key], float): extra_type.append('real') else: assert False # # col_name col_name = extra_name + [ 'integrand_id', 'node_id', 'subgroup_id', 'weight_id', 'age_lower', 'age_upper', 'time_lower', 'time_upper', 'hold_out', 'density_id', 'meas_value', 'meas_std', 'eta', 'nu', ] for j in range(len(covariate_table)): col_name.append('x_%s' % j) # # col_type col_type = extra_type + [ 'integer', # integrand_id 'integer', # node_id 'integer', # subgroup_id 'integer', # weight_id 'real', # age_lower 'real', # age_upper 'real', # time_lower 'real', # time_upper 'integer', # hold_out 'integer', # density_id 'real', # meas_value 'real', # meas_std 'real', # eta 'real', # nu ] for j in range(len(covariate_table)): col_type.append('real') row_list = [] warning_printed = False for i in range(len(data_table)): data = data_table[i] # # extra columns first row = list() for name in extra_name: row.append(data[name]) # # columns that have null for default value for key in ['meas_std', 'eta', 'nu']: if not key in data: data[key] = None # # subgroup column has a default value if not 'subgroup' in data: data['subgroup'] = subgroup_table[0]['subgroup'] if not warning_printed: msg = 'create_database Warning: ' msg += 'subgroup key missing in data table,\n' msg += 'using default value; i.e., first subgroup ' msg += '(you should fix this).' print(msg) warning_printed = True # integrand_id = global_integrand_name2id[data['integrand']] density_id = global_density_name2id[data['density']] node_id = global_node_name2id[data['node']] subgroup_id = global_subgroup_name2id[data['subgroup']] weight_id = global_weight_name2id[data['weight']] hold_out = int(data['hold_out']) row = row + [ integrand_id, node_id, subgroup_id, weight_id, data['age_lower'], data['age_upper'], data['time_lower'], data['time_upper'], hold_out, density_id, data['meas_value'], data['meas_std'], data['eta'], data['nu'] ] for j in range(len(covariate_table)): row.append(float(data[covariate_table[j]['name']])) row_list.append(row) tbl_name = 'data' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) data = pd.DataFrame(row_list, columns=col_name) data['data_name'] = '' db.data = data # ---------------------------------------------------------------------- # create option table col_name = ['option_name', 'option_value'] col_type = ['text unique', 'text'] row_list = [] for row in option_table: name = row['name'] value = row['value'] row_list.append([name, value]) tbl_name = 'option' #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) db.option = pd.DataFrame(row_list, columns=col_name) # ---------------------------------------------------------------------- # close the connection #*# connection.close() return