def create_report_file(Resistances, initial_resistances, drug_start_time, param_obj, report_name, inset_days, debug): with open(report_name, "w") as outfile: starting_pop = inset_days[0][ dts.InsetChart.Channels.KEY_StatisticalPopulation] # success = sft.test_binomial_95ci( initial_resistances, starting_pop, param_obj["TB_Drug_Resistance_Rate_HIV"], outfile, "???" ) success = True progression = [] bad_msgs = [] for x in range(len(inset_days)): inset_day = inset_days[x] inset_mdr_prevalence = inset_day[ dts.InsetChart.Channels.KEY_MdrTbPrevalence] stdout_resistants = Resistances[x] if x >= drug_start_time: progression.append(stdout_resistants) if debug: outfile.write("Day: {0}\n".format(x)) outfile.write(str(inset_day) + "\n") outfile.write( "StdOut resistants: {0}\n".format(stdout_resistants)) stdout_predicted_prevalence = stdout_resistants / float( inset_day[dts.InsetChart.Channels.KEY_StatisticalPopulation]) if abs(inset_mdr_prevalence - stdout_predicted_prevalence) > 0.03: bad_msgs.append( "BAD: at timestep {0}, expected MDR prevalence: {1}, InsetChart had: {2}\n" .format(x, stdout_predicted_prevalence, inset_mdr_prevalence)) tb_drug_resistance_rate_hiv = param_obj["TB_Drug_Resistance_Rate_HIV"] new_resistances = [] pre_resistance = 0 failed_count = 0 total_test = 0 for x in range(drug_start_time + 1, len(Resistances)): resistance = Resistances[x] new_resistance = resistance - pre_resistance pre_resistance = resistance new_resistances.append(new_resistance) expected_mean = (starting_pop - resistance) * tb_drug_resistance_rate_hiv total_test += 1 if expected_mean >= 5: # advoid failing with too small mean result = sft.test_binomial_99ci( new_resistance, starting_pop - resistance, tb_drug_resistance_rate_hiv, outfile, category="time step {}".format(x + 1)) if not result: failed_count += 1 outfile.write( "Warning: New Resistance test fails for rate = {0} at time step {1}.\n" .format(tb_drug_resistance_rate_hiv, x + 1)) else: error_tolerance = 3 * math.sqrt( tb_drug_resistance_rate_hiv * (1 - tb_drug_resistance_rate_hiv) * (starting_pop - resistance)) # 3 sigma result = math.fabs(new_resistance - expected_mean) <= error_tolerance if not result: failed_count += 1 outfile.write( "Warning: New Resistance test fails for rate = {0} at time step {1}, " "new resistance = {2}, expected mean = {3}, error tolerance = {4}.\n" .format(tb_drug_resistance_rate_hiv, x + 1, new_resistance, expected_mean, error_tolerance)) if failed_count > math.ceil(total_test * 0.01): success = False outfile.write( "BAD: test failed {0} times out of {1} timestep, please check the warning message.\n" "".format(failed_count, total_test)) if debug: sft.plot_data(new_resistances, title="new resistance over time", category="new_resistance", show=True) series = sft.create_geometric_dis( param_obj["TB_Drug_Resistance_Rate_HIV"], starting_pop, len(progression), test_decay=False) sft.plot_data(progression, series, label1="progression", label2="geomatric dis", xlabel="days", ylabel="resistance", title="progression vs geomatric", category="progression_vs_geomatric", show=True, line=True) sft.plot_cdf(progression, series, label1="progression", label2="geomatric dis", title="progression vs geomatric cdf", category="progression_vs_geomatric_cdf", show=True) # success = sft.test_geometric_decay(progression, param_obj["TB_Drug_Resistance_Rate_HIV"], starting_pop, test_decay=False, report_file=outfile, debug=debug) if len(bad_msgs) > 0: success = False outfile.writelines(bad_msgs) outfile.write(sft.format_success_msg(success))
def create_report_file(param_obj, multipliers, infectiousness, report_name, debug): with open(report_name, "w") as outfile: success = True if not multipliers: outfile.write(sft.sft_no_test_data) sigma = param_obj[Param_keys.LOGNORMAL_SCALE] base_infectivity = param_obj[Param_keys.BASE_INFECTIVITY] if sigma > 0: mu = - sigma**2 / 2.0 # test log_normal distribution success = sft.test_lognorm(multipliers,mu=mu, sigma=sigma,report_file=outfile,round = False) # test mean_l = 1 mean_l = np.mean(multipliers) mean_infectiousness = np.mean(infectiousness) outfile.write("mean of the multipliers is {}, expected 1.0.\n".format(mean_l)) outfile.write("mean of the Infectiousness is {0}, while base infectivity is {1}.\n".format(mean_infectiousness, base_infectivity)) tolerance = 2e-2 if math.fabs(mean_l - 1.0) > tolerance: outfile.write("BAD: mean of the multipliers is {}, expected 1.0.\n".format(mean_l)) success = False # plotting size = len(multipliers) outfile.write("size is {}\n".format(size)) scale = math.exp(mu) dist_lognormal = stats.lognorm.rvs(sigma, 0, scale, size) sft.plot_data(multipliers, dist_lognormal, label1="Emod", label2="Scipy", ylabel="Multiplier", xlabel="data point", category="Emod_vs_Scipy", title="Emod_vs_Scipy, sigma = {}".format(sigma), show=True) sft.plot_probability(multipliers, dist_lognormal, precision=1, label1="Emod", label2="Scipy", category="Probability_mass_function_Emod_vs_Scipy", title="Emod_vs_Scipy, sigma = {}".format(sigma), show=True) sft.plot_cdf(multipliers,dist_lognormal,label1="Emod", label2="Scipy", category="cdf", title="cdf, sigma = {}".format(sigma), show=True, line = False) if debug: with open("scipy_data.txt", "w") as file: for n in sorted(dist_lognormal): file.write(str(n) + "\n") with open("emod_data.txt", "w") as file: for n in sorted(multipliers): file.write(str(n) + "\n") else: # sigma = 0, this feature is disabled for multiplier in multipliers: if multiplier != 1.0: success = False outfile.write("BAD: multiplier is {0} when {1} set to {2}, expected 1.0.\n".format(multiplier, Param_keys.LOGNORMAL_SCALE, sigma)) # plotting sft.plot_data(multipliers, label1="Multiplier", label2="NA", category="Multiplier", title="Multiplier_Sigma={}".format(sigma), ylabel="Multiplier", xlabel="data point", show=True) sft.plot_data(infectiousness, label1="Infectiousness", label2="NA",category="Infectiousness", title="Infectiousness_Sigma={0}_BaseInfectivity={1}".format(sigma,base_infectivity), ylabel="Infectiousness",xlabel="data point", show=True) outfile.write(sft.format_success_msg(success)) if debug: print "SUMMARY: Success={0}\n".format(success) return success
def create_report_file(drug_start_timestep, inactivation_times, active_count, inactivations, drug_inactivation_rate, report_name, debug = False): with open(report_name, "w") as outfile: success = True # ks exponential test doesn't work very well with large rate, use chi squared test instead. # while rate is small ks test for exponential distribution is more sensitive to catch the difference if drug_inactivation_rate < 0.1: outfile.write( "Testing inactivation times as draws from exponential distrib with rate {0}. " "Dataset size = {1}.\n".format( drug_inactivation_rate, len( inactivation_times ) ) ) success = dtk_sft.test_exponential( inactivation_times, drug_inactivation_rate, outfile, integers=True, roundup=True, round_nearest=False ) if not success: outfile.write("BAD: ks test for rate {} is False.\n".format(drug_inactivation_rate)) size = len(inactivation_times) scale = 1.0 / drug_inactivation_rate dist_exponential_np = numpy.random.exponential(scale, size) dist_exponential_np = [math.ceil(x) for x in dist_exponential_np] dtk_sft.plot_data_sorted(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_actual_vs_numpy", xlabel="data points", ylabel="Inactivation times", category="inactivation_times", show = True, line = True, overlap=True) dtk_sft.plot_cdf(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_cdf", xlabel="days", ylabel="probability", category="inactivation_times_cdf", show = True) dtk_sft.plot_probability(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_pdf", xlabel="days", ylabel="probability", category="inactivation_times_pdf", show = True) else: outfile.write("Testing inactivation count per day with rate {0}. \n".format( drug_inactivation_rate) ) expected_inactivation = [] for t in range( len(inactivations)): if t < drug_start_timestep : if inactivations[t] > 0: success = False outfile.write("BAD: expected no inactivations on drugs before day {0}, get {1} cases at timestep {2}.\n" "".format(drug_start_timestep , inactivations[t], t)) elif active_count[t] > 0: expected_inactivation.append(drug_inactivation_rate * active_count[t]) if len(inactivations) <= len(expected_inactivation) + drug_start_timestep: test_inactivation_dates = inactivations[drug_start_timestep+1:] expected_inactivation = expected_inactivation[:len(test_inactivation_dates)] else: test_inactivation_dates = inactivations[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_inactivation)] #print (len(inactivations), len(test_inactivation_dates), len(expected_inactivation)) #print (test_inactivation_dates, expected_inactivation) dtk_sft.plot_data(test_inactivation_dates, expected_inactivation, label1="actual inactivation", label2="expected inactivation", title="inactivation per day", xlabel="date after drug start day", ylabel="inactivation per day", category="inactivation_counts", show=True, line=True, overlap=True, sort=False) chi_result = dtk_sft.test_multinomial(dist=test_inactivation_dates, proportions=expected_inactivation, report_file=outfile, prob_flag=False) if not chi_result: success = False outfile.write("BAD: Chi-squared test reuslt is False.\n") outfile.write(dtk_sft.format_success_msg(success)) if debug: print(dtk_sft.format_success_msg(success)) return success
def create_report_file(param_obj, multipliers, infectiousness, report_name, debug): with open(report_name, "w") as outfile: success = True if not multipliers: outfile.write(sft.sft_no_test_data) sigma = param_obj[Param_keys.LOGNORMAL_SCALE] base_infectivity = param_obj[Param_keys.BASE_INFECTIVITY] if sigma > 0: mu = - sigma**2 / 2.0 # test log_normal distribution success = sft.test_lognorm(multipliers,mu=mu, sigma=sigma,report_file=outfile,round = False) # test mean_l = 1 mean_l = np.mean(multipliers) mean_infectiousness = np.mean(infectiousness) outfile.write("mean of the multipliers is {}, expected 1.0.\n".format(mean_l)) outfile.write("mean of the Infectiousness is {0}, while base infectivity is {1}.\n".format(mean_infectiousness, base_infectivity)) tolerance = 2e-2 if math.fabs(mean_l - 1.0) > tolerance: outfile.write("BAD: mean of the multipliers is {}, expected 1.0.\n".format(mean_l)) success = False # plotting size = len(multipliers) outfile.write("size is {}\n".format(size)) scale = math.exp(mu) dist_lognormal = stats.lognorm.rvs(sigma, 0, scale, size) sft.plot_data_sorted(multipliers, dist_lognormal, label1="Emod", label2="Scipy", ylabel="Multiplier", xlabel="data point", category="Emod_vs_Scipy", title="Emod_vs_Scipy, sigma = {}".format(sigma), show=True) sft.plot_probability(multipliers, dist_lognormal, precision=1, label1="Emod", label2="Scipy", category="Probability_mass_function_Emod_vs_Scipy", title="Emod_vs_Scipy, sigma = {}".format(sigma), show=True) sft.plot_cdf(multipliers,dist_lognormal,label1="Emod", label2="Scipy", category="cdf", title="cdf, sigma = {}".format(sigma), show=True, line = False) if debug: with open("scipy_data.txt", "w") as file: for n in sorted(dist_lognormal): file.write(str(n) + "\n") with open("emod_data.txt", "w") as file: for n in sorted(multipliers): file.write(str(n) + "\n") else: # sigma = 0, this feature is disabled for multiplier in multipliers: if multiplier != 1.0: success = False outfile.write("BAD: multiplier is {0} when {1} set to {2}, expected 1.0.\n".format(multiplier, Param_keys.LOGNORMAL_SCALE, sigma)) # plotting sft.plot_data_sorted(multipliers, label1="Multiplier", label2="NA", category="Multiplier", title="Multiplier_Sigma={}".format(sigma), ylabel="Multiplier", xlabel="data point", show=True) sft.plot_data_sorted(infectiousness, label1="Infectiousness", label2="NA",category="Infectiousness", title="Infectiousness_Sigma={0}_BaseInfectivity={1}".format(sigma,base_infectivity), ylabel="Infectiousness",xlabel="data point", show=True) outfile.write(sft.format_success_msg(success)) if debug: print( "SUMMARY: Success={0}\n".format(success) ) return success
def create_report_file(drug_start_timestep, disease_deaths, cum_deaths, deaths, infected_individuals, death_times, drug_mortality_rate_HIV, report_name ): with open(report_name, "w") as outfile: success = True length = len(cum_deaths) if sum(disease_deaths)==0 or sum(cum_deaths)==0 or len(death_times)==0: success = False outfile.write(dtk_sft.no_test_data) for x in range(length): if disease_deaths[x] != cum_deaths[x]: success = False outfile.write("BAD: at timestep {0}, disease deaths is {1} in InsetChart.json and {2} in stdout.txt.\n".format(x+1, disease_deaths[x], cum_deaths[x])) # ks exponential test doesn't work very well with large rate, use chi squared test instead # while rate is small ks test for exponential distribution is more sensitive to catch the difference if drug_mortality_rate_HIV < 0.1: outfile.write("Testing death times as draws from exponential distrib with rate {0}. " "Dataset size = {1}.\n".format(drug_mortality_rate_HIV, len(death_times))) ks_result = dtk_sft.test_exponential( death_times, drug_mortality_rate_HIV, report_file = outfile, integers=True, roundup=True, round_nearest=False ) if not ks_result: success = False outfile.write("BAD: ks test reuslt is False.\n") size = len(death_times) scale = 1.0 / drug_mortality_rate_HIV dist_exponential_np = numpy.random.exponential(scale, size) dist_exponential_np = [math.ceil(x) for x in dist_exponential_np] dtk_sft.plot_data_sorted(death_times, dist_exponential_np, label1="death times", label2="numpy data", title="death_times_actual_vs_numpy", xlabel="data points", ylabel="death times", category="death_times", show=True, line = True, overlap=True) dtk_sft.plot_cdf(death_times, dist_exponential_np, label1="death times", label2="numpy data", title="death_times_cdf", xlabel="days", ylabel="probability", category="death_times_cdf", show=True) else: outfile.write("Testing death count per day with rate {0}. \n".format(drug_mortality_rate_HIV)) expected_mortality = [] for t in range( len(deaths)): if t < drug_start_timestep + 1: if deaths[t] > 0: success = False outfile.write("BAD: expected no disease death on drugs before day {0}, get {1} cases at timestep {2}.\n" "".format(drug_start_timestep + 1, deaths[t], t)) elif infected_individuals[t] > 0: expected_mortality.append(drug_mortality_rate_HIV * infected_individuals[t]) expected_mortality.pop(0) # the Infected is off by one day test_death_dates = deaths[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_mortality)] dtk_sft.plot_data(test_death_dates, expected_mortality, label1="actual death", label2="expected death", title="death per day", xlabel="date after drug start day", ylabel="death per day", category="death_counts", show=True, line=True, overlap=True, sort=False) chi_result = dtk_sft.test_multinomial(dist=test_death_dates, proportions=expected_mortality, report_file=outfile, prob_flag=False) if not chi_result: success = False outfile.write("BAD: Chi-squared test reuslt is False.\n") outfile.write(dtk_sft.format_success_msg(success)) return success