def fit_both(self, tol: float = 1e-4, fit_fixed: bool = True, db2csv: bool = True, max_iter: int = 100,
                 fit_gaussian: bool = False, zero_sum: bool = False, print_level: int = 5):

        if fit_gaussian:
            self.db.set_meas_density('gaussian')

        if fit_fixed:
            self.fit_fixed(tol=tol, db2csv=False, max_iter=max_iter, zero_sum=zero_sum, print_level=print_level)
            system_command([program, self.db_path, 'set', 'start_var', 'fit_var'])
        else:
            self.db.set_tol(tol)
            self.db.set_max_iteration(max_iter)
            self.db.set_print_level(print_level)
            if zero_sum:
                self.db.set_zero_sum_constraint()
            self.initialize(db2csv=False)

        if fit_gaussian:
            self.db.reset_meas_density()
            self.initialize(db2csv=False)

        system_command([program, self.db_path, 'fit', 'both'])
        system_command([program, self.db_path, 'predict', 'fit_var'])

        if db2csv:
            dismod_at.db2csv_command(self.db_path)
Exemple #2
0
    def simulate(self, node, file_name, n_sim=10):
        N_str = str(n_sim)
        system_command([program, file_name, 'set', 'truth_var', 'fit_var'])
        system_command([program, file_name, 'set', 'start_var', 'fit_var'])
        system_command([program, file_name, 'set', 'scale_var', 'fit_var'])
        system_command([program, file_name, 'simulate', N_str])
        dismod_at.db2csv_command(file_name)
        system_command([program, file_name, 'sample', 'simulate', N_str])
        system_command([program, file_name, 'predict', 'sample'])
        dismod_at.db2csv_command(file_name)

        predict = pd.read_csv(self.file_path + 'predict.csv')
        predict_rate_dict = collections.defaultdict(list)
        predict_alpha_dict = collections.defaultdict(list)
        for i, row in predict.iterrows():
            if row['integrand'] == 'Sincidence':
                predict_rate_dict[row['node']].append(row['avgint'])
            else:
                predict_alpha_dict[row['integrand']].append(row['avgint'])

        assert len(predict_rate_dict) == len(self.node_parent_children[node])
        assert len(predict_alpha_dict) == self.n_cov

        rate_mean_std = {k: (np.mean(v), np.std(v)) for k, v in predict_rate_dict.items()}
        alpha_mean_std = {k: (np.mean(v), np.std(v)) for k, v in predict_alpha_dict.items()}

        os.rename(self.file_path + 'predict.csv', self.file_path + 'predict_' + node + '.csv')

        return rate_mean_std, alpha_mean_std
 def fit_fixed(self, db2csv=True):
     command = [program, self.path, 'fit', 'fixed']
     print(' '.join(command))
     flag = subprocess.call(command)
     if flag != 0:
         sys.exit('The dismod_at fit fixed command failed')
     if db2csv:
         dismod_at.db2csv_command(self.path)
 def fit_fixed(self, tol: float = 1e-4, db2csv: bool = True, max_iter: int = 100, zero_sum: bool = False,
               print_level: int = 5):
     self.db.set_tol(tol)
     self.db.set_max_iteration(max_iter)
     self.db.set_print_level(print_level)
     if zero_sum:
         self.db.set_zero_sum_constraint()
     self.initialize(db2csv=False)
     system_command([program, self.db_path, 'fit', 'fixed'])
     system_command([program, self.db_path, 'predict', 'fit_var'])
     if db2csv:
         dismod_at.db2csv_command(self.db_path)
Exemple #5
0
 def fit(self, file_name, depth=1, fit_fixed=False, fit_both=True,
         verbose=True, write_to_csv=True):
     self.db.initialize(file_name)
     system_command([program, file_name, 'init'], verbose)
     if depth == 0 or fit_fixed:
         system_command([program, file_name, 'fit', 'fixed'], verbose)
         system_command([program, file_name, 'set', 'start_var', 'fit_var'], verbose)
         system_command([program, file_name, 'set', 'scale_var', 'fit_var'], verbose)
     if depth > 0 and fit_both:
         system_command([program, file_name, 'fit', 'both'], verbose)
     if write_to_csv:
         dismod_at.db2csv_command(file_name)
Exemple #6
0
    def initialize(self, file_name, to_csv=False):
        dismod_at.create_database(file_name, self.age_list, self.time_list,
                                  self.integrand_table, self.node_table,
                                  self.weight_table, self.covariate_table,
                                  self.avgint_table, self.data_table,
                                  self.prior_table, self.smooth_table, list(),
                                  self.rate_table, self.mulcov_table,
                                  self.option_table)

        if to_csv:
            flag = subprocess.call([program, file_name, 'init'])
            if flag != 0:
                sys.exit('command failed: flag = ' + str(flag))
            dismod_at.db2csv_command(file_name)
Exemple #7
0
    def __init__(self, path_to_folder, db_file_name):
        self.path_to_db = os.path.join(path_to_folder + db_file_name)
        dismod_at.db2csv_command(self.path_to_db)
        self.db_output = DismodOutput(self.path_to_db)

        self.path_to_data = path_to_folder + 'data.csv'
        self.age_list, self.time_list = self.db_output.get_age_time_lists()

        self.integrand_values = self.db_output.get_integrand_values()
        self.data_values = pd.read_csv(self.path_to_data)
        self.cov_name_to_id = self.db_output.get_covarates_names()

        self.rate_to_integrand = {
            'iota': 'Sincidence',
            'rho': 'remission',
            'chi': 'mtexcess',
            'omega': 'mtother'
        }
Exemple #8
0
    def init_database(self, db2csv: bool = True):

        dismod_at.create_database(self.path, self.age_list, self.time_list,
                                  self.integrand_table, self.node_table,
                                  self.weight_table, self.covariate_table,
                                  self.avgint_table, self.data_table,
                                  self.prior_table, self.smooth_table, list(),
                                  self.rate_table, self.mulcov_table,
                                  self.option_table)

        if not os.path.exists(self.path):
            os.mknod(self.path)

        command = [program, self.path, 'init']
        print(' '.join(command))
        flag = subprocess.call(command)
        if flag != 0:
            sys.exit('The dismod_at init command failed')
        if db2csv is True:
            dismod_at.db2csv_command(self.path)
Exemple #9
0
cmd = 'insert into log'
cmd += ' (log_id,message_type,table_name,row_id,unix_time,message) values('
cmd += str(log_id) + ','  # log_id
cmd += '"command",'  # message_type
cmd += 'null,'  # table_name
cmd += 'null,'  # row_id
cmd += str(begin_time) + ','  # unix_time
cmd += '"' + message + '")'  # message
dismod_at.sql_command(connection, cmd)
connection.close()
# ---------------------------------------------------------------------------
# execute command
if command_arg == 'db2csv':
    if len(sys.argv) != 3:
        sys.exit(usage)
    dismod_at.db2csv_command(database_file_arg)
elif command_arg == 'perturb':
    if len(sys.argv) != 5:
        sys.exit(usage)
    tbl_name = arg_list[0]
    sigma = arg_list[1]
    dismod_at.perturb_command(database_file_arg, tbl_name, sigma)
elif command_arg == 'plot_rate_fit':
    if len(sys.argv) != 6:
        sys.exit(usage)
    pdf_file = arg_list[0]
    plot_title = arg_list[1]
    rate_set = set(arg_list[2].split())
    dismod_at.plot_rate_fit(database_file_arg, pdf_file, plot_title, rate_set)
elif command_arg == 'plot_data_fit':
    if len(sys.argv) != 6:
    def initDatabase(self, max_iter=500):
        if len(self.age_list) == 0:
            max_age = -float('inf')
            min_age = float('inf')
            for i in range(self.n):
                max_age = max(max_age, self.data.loc[i, 'age_end'])
                min_age = min(min_age, self.data.loc[i, 'age_start'])
            age_list = [int(round(x)) for x in np.linspace(min_age,max_age,\
                        round((max_age-min_age)/5)+1)]
            age_list = sorted(list(set(age_list)))
            #if len(age_list) == 1:
            #    age_list.insert(0,age_list[0]-1)
            self.age_list = age_list
        if len(self.time_list) == 0:
            max_time = -float('inf')
            min_time = float('inf')
            for i in range(self.n):
                max_time = max(max_time, self.data.loc[i, 'year_end'])
                min_time = min(min_time, self.data.loc[i, 'year_start'])
            time_list = [int(round(x)) for x in np.linspace(min_time, max_time,\
                         round((max_time - min_time)/3+1))]
            time_list = sorted(list(set(time_list)))
            self.time_list = time_list
        #print(self.age_list)
        #print(self.time_list)

        avgint_table = list()
        nslist_table = dict()  # smoothing
        integrand_table = []
        for intg in self.integrand:
            integrand_table.append({'name': intg})

        node_table = [{'name': 'world', 'parent': ''}]

        weight_table = [{
            'name': 'constant',
            'age_id': range(len(self.age_list)),
            'time_id': range(len(self.time_list)),
            'fun': lambda a, t: 1.0
        }]

        rate_table = list()
        for rate in self.rates:
            rate_table.append({
                'name': rate,
                'parent_smooth': 'smooth_rate_' + rate
            })

        covariate_table = list()
        for cov in self.covariates:
            covariate_table.append({'name': cov['name'], 'reference': 0.0})
        mulcov_table = list()
        for cov in self.covariates:
            mulcov_table.append({
                'covariate': cov['name'],
                'type': cov['type'],
                'effected': cov['effected'],
                'smooth': 'smooth_mulcov_' + cov['name']
            })

        smooth_table = list()
        for rate in self.rates:
            smooth_table.append({
                'name':
                'smooth_rate_' + rate,
                'age_id':
                range(len(self.age_list)),
                'time_id':
                range(len(self.time_list)),
                'fun':
                lambda a, t, r=rate:
                ('value_prior_' + r, 'dage_prior_' + r, 'dtime_prior_' + r)
            })
        for cov in self.covariates:
            name = cov['name']
            smooth_table.append({
                'name':
                'smooth_mulcov_' + cov['name'],
                'age_id':
                range(len(self.age_list)),
                'time_id':
                range(len(self.time_list)),
                'fun':
                lambda a, t, name=name: ('value_prior_' + name, 'dage_prior_' +
                                         name, 'dtime_prior_' + name)
            })
        #for row in smooth_table:
        #    print(row['fun'](0,0))

        prior_table = []
        for i in range(len(self.rates)):
            prior_table.append({'name': 'value_prior_' + self.rates[i]})
            prior_table[-1].update(self.rate_priors[i][0])
            prior_table.append({'name': 'dage_prior_' + self.rates[i]})
            prior_table[-1].update(self.rate_priors[i][1])
            prior_table.append({'name': 'dtime_prior_' + self.rates[i]})
            prior_table[-1].update(self.rate_priors[i][2])
        for i in range(len(self.covariates)):
            prior_table.append(
                {'name': 'value_prior_' + self.covariates[i]['name']})
            prior_table[-1].update(self.cov_priors[i][0])
            prior_table.append(
                {'name': 'dage_prior_' + self.covariates[i]['name']})
            prior_table[-1].update(self.cov_priors[i][1])
            prior_table.append(
                {'name': 'dtime_prior_' + self.covariates[i]['name']})
            prior_table[-1].update(self.cov_priors[i][2])

        data_table = list()
        row = {
            'node': 'world',
            'weight': 'constant',
            'hold_out': False,
        }
        row.update(self.meas_noise_density)
        for data_id in range(self.n):
            if self.data.loc[data_id, 'measure'] in self.integrand:
                row['integrand'] = self.data.loc[data_id, 'measure']
                for k, v in self.meas_noise_density[row['integrand']].items():
                    row[k] = v
                row['meas_value'] = self.data.loc[data_id, 'meas_value']
                row['meas_std'] = self.data.loc[data_id, 'meas_std']
                row['age_lower'] = self.data.loc[data_id, 'age_start']
                row['age_upper'] = self.data.loc[data_id, 'age_end']
                row['time_lower'] = self.data.loc[data_id, 'year_start']
                row['time_upper'] = self.data.loc[data_id, 'year_end']
                for cov in self.covariates:
                    row[cov['name']] = self.data.loc[data_id, cov['name']]
                data_table.append(copy.copy(row))

        option_table = [
            {
                'name': 'parent_node_name',
                'value': 'world'
            },
            {
                'name': 'ode_step_size',
                'value': '10.0'
            },
            {
                'name': 'quasi_fixed',
                'value': 'false'
            },
            {
                'name': 'max_num_iter_fixed',
                'value': max_iter
            },
            {
                'name': 'print_level_fixed',
                'value': '5'
            },
            {
                'name': 'tolerance_fixed',
                'value': '1e-8'
            },
        ]
        if self.integrand == ['Sincidence']:
            option_table.append({
                'name': 'rate_case',
                'value': 'iota_pos_rho_zero'
            })
        elif self.integrand == 'remission':
            option_table.append({
                'name': 'rate_case',
                'value': 'iota_zero_rho_pos'
            })
        else:
            option_table.append({
                'name': 'rate_case',
                'value': 'iota_pos_rho_pos'
            })

        option_name_id = {}
        for i in range(len(option_table)):
            option_name_id[option_table[i]['name']] = i
        for option in self.options:
            if option['name'] in option_name_id:
                option_table[option_name_id[
                    option['name']]]['value'] = option['value']
            else:
                option_table.append(option)

        dismod_at.create_database(self.path, self.age_list, self.time_list,
                                  integrand_table, node_table, weight_table,
                                  covariate_table, avgint_table, data_table,
                                  prior_table, smooth_table, nslist_table,
                                  rate_table, mulcov_table, option_table)

        command = [program, self.path, 'init']
        print(' '.join(command))
        flag = subprocess.call(command)
        if flag != 0:
            sys.exit('The dismod_at init command failed')
        dismod_at.db2csv_command(self.path)
Exemple #11
0
dismod_at.system_command_prc([program, file_name, 'fit', 'both'])
dismod_at.system_command_prc(
    [program, file_name, 'sample', 'asymptotic', 'both',
     str(number_sample)])
# -----------------------------------------------------------------------
# get tables
new = False
connection = dismod_at.create_connection(file_name, new)
var_table = dismod_at.get_table_dict(connection, 'var')
node_table = dismod_at.get_table_dict(connection, 'node')
rate_table = dismod_at.get_table_dict(connection, 'rate')
fit_var_table = dismod_at.get_table_dict(connection, 'fit_var')
sample_table = dismod_at.get_table_dict(connection, 'sample')
hes_random_table = dismod_at.get_table_dict(connection, 'hes_random')
connection.close()
dismod_at.db2csv_command(file_name)
# -----------------------------------------------------------------------
# var_id2name
var_id2node_name = dict()
assert len(var_table) == 3
for var_id in range(len(var_table)):
    assert var_id < 3
    row = var_table[var_id]
    assert row['var_type'] == 'rate'
    assert rate_table[row['rate_id']]['rate_name'] == 'iota'
    node_name = node_table[row['node_id']]['node_name']
    var_id2node_name[var_id] = node_name
    if node_name == 'world':
        theta = fit_var_table[var_id]['fit_var_value']
# -----------------------------------------------------------------------
# check the Hessian of the random effects objective
Exemple #12
0
max_abs_err = 0.0
for var_id in range(len(var_table)):
    var_row = var_table[var_id]
    fit_row = fit_var_table[var_id]
    var_type = var_row['var_type']
    rate_id = var_row['rate_id']
    rate_name = rate_table[rate_id]['rate_name']
    fit_var_value = fit_row['fit_var_value']
    relative_err = fit_var_value / rate_true[rate_name] - 1.0
    max_abs_err = max(max_abs_err, abs(relative_err))
if max_abs_err > 0.1:
    sys.msg('csv2db.py: max_abs_err = ' + str(max_abs_err))
#
# check error in mtall approximation
max_abs_res = 0.0
for data_id in range(len(data_table)):
    assert data_id == data_subset_table[data_id]['data_id']
    integrand_id = data_table[data_id]['integrand_id']
    integrand_name = integrand_table[integrand_id]['integrand_name']
    if integrand_name == 'mtall':
        weighted_residual = fit_data_subset[data_id]['weighted_residual']
        max_abs_res = max(max_abs_res, abs(weighted_residual))
if max_abs_res > 0.1:
    sys.msg('csv2db.py: max_abs_res = ' + str(max_abs_res))
# ---------------------------------------------------------------------------
# csv representation of database
dismod_at.db2csv_command(database)
# ---------------------------------------------------------------------------
print('csv2db.py: OK')
# END PYTHON