def create_report_file(param_obj, campaign_obj, report_data_obj, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True base_infectivity = param_obj[KEY_BASE_INFECTIVITY] start_day = campaign_obj[KEY_START_DAY] new_infection = report_data_obj[KEY_NEW_INFECTION] immunity_acquisition_factor = param_obj[ KEY_IMMUNITY_ACQUISITION_FACTOR] # calculate expected number of infections for a time period of 3 months: number_of_month = 3 expected_new_infection = base_infectivity * dtk_sft.DAYS_IN_MONTH * number_of_month * immunity_acquisition_factor expected = [expected_new_infection ] * (dtk_sft.MONTHS_IN_YEAR // number_of_month) # group new infections for every 3 months: value_to_test = [] if len(new_infection) < start_day + dtk_sft.DAYS_IN_YEAR: success = False outfile.write( "BAD: the simulation duration is too short, please make sure it's at least {} days.\n" .format(start_day + dtk_sft.DAYS_IN_YEAR)) outfile.write( "running chi-squared test for expected new infections for {0} {1}-months time bins: \n" "base_infectivity = {2}, immunity_acquisition_factor = {3}.\n". format(dtk_sft.MONTHS_IN_YEAR // number_of_month, number_of_month, base_infectivity, immunity_acquisition_factor)) actual_new_infection = 0 i = 0 for t in range(start_day, len(new_infection)): actual_new_infection += new_infection[t] i += 1 if not i % (number_of_month * dtk_sft.DAYS_IN_MONTH): value_to_test.append(actual_new_infection) actual_new_infection = 0 dtk_sft.plot_data( value_to_test, dist2=expected, label1="actual_new_infections", label2="expected_new_infection", title="actual vs. expected new infection for every {} months". format(number_of_month), xlabel="every {} months".format(number_of_month), ylabel="# of new infections", category='actual_vs_expected_new_infections', show=True, line=True) result = dtk_sft.test_multinomial(dist=value_to_test, proportions=expected, report_file=outfile, prob_flag=False) if not result: success = False outfile.write( "BAD: The Chi-squared test for number of new infections in every {} months failed.\n" .format(number_of_month)) else: outfile.write( "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n" .format(number_of_month)) outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(param_obj, node_list, campaign_obj, migration_df, report_data_obj, stdout_filename, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True base_infectivity = param_obj[KEY_BASE_INFECTIVITY] start_day = campaign_obj[KEY_START_DAY] new_infection = report_data_obj[KEY_NEW_INFECTION] immunity_acquisition_factor = param_obj[ KEY_IMMUNITY_ACQUISITION_FACTOR] decay_rate = param_obj[KEY_DECAY_RATE] outfile.write("checking some test conditions:\n") outfile.write(" -- simulation duration: {} days\n".format( len(new_infection))) if len(new_infection) < start_day + 1 + dtk_sft.DAYS_IN_YEAR: success = False outfile.write( "BAD: the simulation duration is too short, please make sure it's at least {} days.\n" .format(start_day + 1 + dtk_sft.DAYS_IN_YEAR)) result = tms.check_test_condition(param_obj[KEY_NUM_CORES], node_list, migration_df, outfile) if not result: success = False # summary message is writen to the report file in the check_test_condition function number_of_month = 1 outfile.write( "calculate expected number of infections for a time period of {} month(" "unit is 1/years):\n".format((number_of_month))) t_initial = 0 expected = [] decay_rate *= dtk_sft.DAYS_IN_YEAR base_infectivity *= dtk_sft.DAYS_IN_YEAR step = number_of_month / dtk_sft.MONTHS_IN_YEAR for t_final in np.arange(step, 1.01, step): expected_new_infection = base_infectivity * ( t_final - t_initial) - base_infectivity * ( 1.0 - immunity_acquisition_factor) / decay_rate * math.exp( -1 * decay_rate * t_initial) * (1.0 - math.exp(-1 * decay_rate * (t_final - t_initial))) expected_new_infection *= len(node_list) expected.append(expected_new_infection) t_initial = t_final # group new infections for every month: value_to_test = [] outfile.write( "running chi-squared test for actual vs expected new infections for {0} {1}-months time bins: \n" "base_infectivity = {2}, immunity_acquisition_factor = {3}, decay rate = {4}.(unit is 1/years)\n" .format(dtk_sft.MONTHS_IN_YEAR // number_of_month, number_of_month, base_infectivity, immunity_acquisition_factor, decay_rate)) actual_new_infection = 0 i = 0 for t in range(start_day + 1, len(new_infection)): actual_new_infection += new_infection[t] i += 1 if not i % (number_of_month * dtk_sft.DAYS_IN_MONTH): value_to_test.append(actual_new_infection) actual_new_infection = 0 dtk_sft.plot_data( value_to_test, dist2=expected, label1="actual_new_infections", label2="expected_new_infection", title="actual vs. expected new infection for every {} month". format(number_of_month), xlabel="month", ylabel="# of new infections", category='actual_vs_expected_new_infections', show=True, line=False) result = dtk_sft.test_multinomial(dist=value_to_test, proportions=expected, report_file=outfile, prob_flag=False) if not result: success = False outfile.write( "BAD: The Chi-squared test for number of new infections in every {} months failed.\n" .format(number_of_month)) else: outfile.write( "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n" .format(number_of_month)) output_dict = parse_output_file(stdout_filename, debug) outfile.write( "checking if all cores are reporting in every time step in stdout file:\n" ) core_list = [str(n) for n in (range(param_obj[KEY_NUM_CORES]))] for t, cores in output_dict.items(): if core_list != sorted(cores): success = False outfile.write( "BAD: at time step {0}, these cores reported to stdout.txt are: {1}, while " "expected cores are: {2}.\n".format(t, cores, core_list)) outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(drug_start_timestep, inactivation_times, active_count, inactivations, drug_inactivation_rate, report_name, debug = False): with open(report_name, "w") as outfile: success = True # ks exponential test doesn't work very well with large rate, use chi squared test instead. # while rate is small ks test for exponential distribution is more sensitive to catch the difference if drug_inactivation_rate < 0.1: outfile.write( "Testing inactivation times as draws from exponential distrib with rate {0}. " "Dataset size = {1}.\n".format( drug_inactivation_rate, len( inactivation_times ) ) ) success = dtk_sft.test_exponential( inactivation_times, drug_inactivation_rate, outfile, integers=True, roundup=True, round_nearest=False ) if not success: outfile.write("BAD: ks test for rate {} is False.\n".format(drug_inactivation_rate)) size = len(inactivation_times) scale = 1.0 / drug_inactivation_rate dist_exponential_np = numpy.random.exponential(scale, size) dist_exponential_np = [math.ceil(x) for x in dist_exponential_np] dtk_sft.plot_data_sorted(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_actual_vs_numpy", xlabel="data points", ylabel="Inactivation times", category="inactivation_times", show = True, line = True, overlap=True) dtk_sft.plot_cdf(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_cdf", xlabel="days", ylabel="probability", category="inactivation_times_cdf", show = True) dtk_sft.plot_probability(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_pdf", xlabel="days", ylabel="probability", category="inactivation_times_pdf", show = True) else: outfile.write("Testing inactivation count per day with rate {0}. \n".format( drug_inactivation_rate) ) expected_inactivation = [] for t in range( len(inactivations)): if t < drug_start_timestep : if inactivations[t] > 0: success = False outfile.write("BAD: expected no inactivations on drugs before day {0}, get {1} cases at timestep {2}.\n" "".format(drug_start_timestep , inactivations[t], t)) elif active_count[t] > 0: expected_inactivation.append(drug_inactivation_rate * active_count[t]) if len(inactivations) <= len(expected_inactivation) + drug_start_timestep: test_inactivation_dates = inactivations[drug_start_timestep+1:] expected_inactivation = expected_inactivation[:len(test_inactivation_dates)] else: test_inactivation_dates = inactivations[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_inactivation)] #print (len(inactivations), len(test_inactivation_dates), len(expected_inactivation)) #print (test_inactivation_dates, expected_inactivation) dtk_sft.plot_data(test_inactivation_dates, expected_inactivation, label1="actual inactivation", label2="expected inactivation", title="inactivation per day", xlabel="date after drug start day", ylabel="inactivation per day", category="inactivation_counts", show=True, line=True, overlap=True, sort=False) chi_result = dtk_sft.test_multinomial(dist=test_inactivation_dates, proportions=expected_inactivation, report_file=outfile, prob_flag=False) if not chi_result: success = False outfile.write("BAD: Chi-squared test reuslt is False.\n") outfile.write(dtk_sft.format_success_msg(success)) if debug: print(dtk_sft.format_success_msg(success)) return success
def create_report_file(param_obj, campaign_obj, output_dict, report_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True sensitivity = campaign_obj[KEY_BASE_SENSITIVITY] treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION] proportions = [ sensitivity * treatment_fraction, (1.0 - sensitivity) * treatment_fraction, 1.0 - treatment_fraction ] positive = [] negative = [] default = [] total = [] failed_timestep = [] if not len(report_dict): success = False outfile.write(dtk_sft.sft_no_test_data) for t in report_dict: value_to_test = [ report_dict[t][KEY_POSITIVE], report_dict[t][KEY_NEGATIVE], report_dict[t][KEY_DEFAULT] ] positive.append(report_dict[t][KEY_POSITIVE]) negative.append(report_dict[t][KEY_NEGATIVE]) default.append(report_dict[t][KEY_DEFAULT]) total.append(sum(value_to_test)) outfile.write("Run Chi-squared test at time step {}.\n".format(t)) result = dtk_sft.test_multinomial(dist=value_to_test, proportions=proportions, report_file=outfile) if not result: failed_timestep.append(t) outfile.write( "Warning: At timestep {0}, the Chi-squared test failed.\n". format(t)) if len(failed_timestep) > math.ceil(0.05 * len(report_dict)): success = False outfile.write( "BAD: the Chi-squared test failed at timestep {0}.\n".format( ', '.join(str(x) for x in failed_timestep))) else: outfile.write( "GOOD: the Chi-squared test failed {} times, less than 5% of the total timestep.\n" .format(len(failed_timestep))) dtk_sft.plot_data( positive, dist2=total, label1="TBTestPositive", label2="Total tested", title="Test positive vs. total, positive proportion = {}".format( sensitivity * treatment_fraction), xlabel="time step", ylabel="# of individuals", category='Test_positive_vs_total', show=True, line=False) dtk_sft.plot_data( negative, dist2=total, label1="TBTestNegative", label2="Total tested", title="Test negative vs. total, negative proportion = {}".format( (1.0 - sensitivity) * treatment_fraction), xlabel="time step", ylabel="# of individuals", category='Test_negative_vs_total', show=True, line=False) dtk_sft.plot_data( default, dist2=total, label1="TBTestDefault", label2="Total tested", title="Test default vs. total, default proportion = {}".format( 1.0 - treatment_fraction), xlabel="time step", ylabel="# of individuals", category='Test_default_vs_total', show=True, line=False) # TODO: write test to check if report matches debug logging. Pending on #2279. May not need this part. outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(drug_start_timestep, disease_deaths, cum_deaths, deaths, infected_individuals, death_times, drug_mortality_rate_HIV, report_name ): with open(report_name, "w") as outfile: success = True length = len(cum_deaths) if sum(disease_deaths)==0 or sum(cum_deaths)==0 or len(death_times)==0: success = False outfile.write(dtk_sft.no_test_data) for x in range(length): if disease_deaths[x] != cum_deaths[x]: success = False outfile.write("BAD: at timestep {0}, disease deaths is {1} in InsetChart.json and {2} in stdout.txt.\n".format(x+1, disease_deaths[x], cum_deaths[x])) # ks exponential test doesn't work very well with large rate, use chi squared test instead # while rate is small ks test for exponential distribution is more sensitive to catch the difference if drug_mortality_rate_HIV < 0.1: outfile.write("Testing death times as draws from exponential distrib with rate {0}. " "Dataset size = {1}.\n".format(drug_mortality_rate_HIV, len(death_times))) ks_result = dtk_sft.test_exponential( death_times, drug_mortality_rate_HIV, report_file = outfile, integers=True, roundup=True, round_nearest=False ) if not ks_result: success = False outfile.write("BAD: ks test reuslt is False.\n") size = len(death_times) scale = 1.0 / drug_mortality_rate_HIV dist_exponential_np = numpy.random.exponential(scale, size) dist_exponential_np = [math.ceil(x) for x in dist_exponential_np] dtk_sft.plot_data_sorted(death_times, dist_exponential_np, label1="death times", label2="numpy data", title="death_times_actual_vs_numpy", xlabel="data points", ylabel="death times", category="death_times", show=True, line = True, overlap=True) dtk_sft.plot_cdf(death_times, dist_exponential_np, label1="death times", label2="numpy data", title="death_times_cdf", xlabel="days", ylabel="probability", category="death_times_cdf", show=True) else: outfile.write("Testing death count per day with rate {0}. \n".format(drug_mortality_rate_HIV)) expected_mortality = [] for t in range( len(deaths)): if t < drug_start_timestep + 1: if deaths[t] > 0: success = False outfile.write("BAD: expected no disease death on drugs before day {0}, get {1} cases at timestep {2}.\n" "".format(drug_start_timestep + 1, deaths[t], t)) elif infected_individuals[t] > 0: expected_mortality.append(drug_mortality_rate_HIV * infected_individuals[t]) expected_mortality.pop(0) # the Infected is off by one day test_death_dates = deaths[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_mortality)] dtk_sft.plot_data(test_death_dates, expected_mortality, label1="actual death", label2="expected death", title="death per day", xlabel="date after drug start day", ylabel="death per day", category="death_counts", show=True, line=True, overlap=True, sort=False) chi_result = dtk_sft.test_multinomial(dist=test_death_dates, proportions=expected_mortality, report_file=outfile, prob_flag=False) if not chi_result: success = False outfile.write("BAD: Chi-squared test reuslt is False.\n") outfile.write(dtk_sft.format_success_msg(success)) return success
def create_report_file(param_obj, campaign_obj, output_dict, report_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True sensitivity = campaign_obj[KEY_BASE_SENSITIVITY] treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION] treatment_fraction_negative_diagnosis = campaign_obj[ KEY_TREATMENT_FRACTION_NEGATIVE_DIAGNOSIS] proportions = [ sensitivity * treatment_fraction, (1.0 - sensitivity) * treatment_fraction_negative_diagnosis, (1.0 - sensitivity) * (1.0 - treatment_fraction_negative_diagnosis) + sensitivity * (1.0 - treatment_fraction) ] total_proportion = sum(proportions) positive = [] negative = [] default = [] total = [] for t in report_dict: value_to_test = [ report_dict[t][KEY_MDR_POSITIVE], report_dict[t][KEY_MDR_NEGATIVE], report_dict[t][KEY_MDR_DEFAULT] ] positive.append(value_to_test[0]) negative.append(value_to_test[1]) default.append(value_to_test[2]) total.append(int(sum(value_to_test) / total_proportion)) outfile.write("Run Chi-squared test at time step {}.\n".format(t)) result = dtk_sft.test_multinomial(dist=value_to_test, proportions=proportions, report_file=outfile) if not result: success = False outfile.write( "BAD: At timestep {0}, the Chi-squared test failed.\n". format(t)) dtk_sft.plot_data( positive, dist2=total, label1="TBMDRTestPositive", label2="Total tested", title="MDR Test positive vs. total, positive proportion = {}". format(proportions[0]), xlabel="time step", ylabel="# of individuals", category='MDR_Test_positive_vs_total', show=True, line=False) dtk_sft.plot_data( negative, dist2=total, label1="TBMDRTestNegative", label2="Total tested", title="MDR Test negative vs. total, negative proportion = {}". format(proportions[1]), xlabel="time step", ylabel="# of individuals", category='MDR_Test_negative_vs_total', show=True, line=False) dtk_sft.plot_data( default, dist2=total, label1="TBMDRTestDefault", label2="Total tested", title="MDR Test default vs. total, default proportion = {}".format( proportions[2]), xlabel="time step", ylabel="# of individuals", category='MDR_Test_default_vs_total', show=True, line=False) outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success