Esempio n. 1
0
def create_report_file(Resistances, initial_resistances, drug_start_time,
                       param_obj, report_name, inset_days, debug):
    with open(report_name, "w") as outfile:
        starting_pop = inset_days[0][
            dts.InsetChart.Channels.KEY_StatisticalPopulation]
        # success = sft.test_binomial_95ci( initial_resistances, starting_pop, param_obj["TB_Drug_Resistance_Rate_HIV"], outfile, "???" )
        success = True
        progression = []
        bad_msgs = []
        for x in range(len(inset_days)):
            inset_day = inset_days[x]
            inset_mdr_prevalence = inset_day[
                dts.InsetChart.Channels.KEY_MdrTbPrevalence]
            stdout_resistants = Resistances[x]
            if x >= drug_start_time:
                progression.append(stdout_resistants)
            if debug:
                outfile.write("Day: {0}\n".format(x))
                outfile.write(str(inset_day) + "\n")
                outfile.write(
                    "StdOut resistants: {0}\n".format(stdout_resistants))
            stdout_predicted_prevalence = stdout_resistants / float(
                inset_day[dts.InsetChart.Channels.KEY_StatisticalPopulation])
            if abs(inset_mdr_prevalence - stdout_predicted_prevalence) > 0.03:
                bad_msgs.append(
                    "BAD: at timestep {0}, expected MDR prevalence: {1}, InsetChart had: {2}\n"
                    .format(x, stdout_predicted_prevalence,
                            inset_mdr_prevalence))

        tb_drug_resistance_rate_hiv = param_obj["TB_Drug_Resistance_Rate_HIV"]
        new_resistances = []
        pre_resistance = 0
        failed_count = 0
        total_test = 0
        for x in range(drug_start_time + 1, len(Resistances)):
            resistance = Resistances[x]
            new_resistance = resistance - pre_resistance
            pre_resistance = resistance
            new_resistances.append(new_resistance)
            expected_mean = (starting_pop -
                             resistance) * tb_drug_resistance_rate_hiv
            total_test += 1
            if expected_mean >= 5:  # advoid failing with too small mean
                result = sft.test_binomial_99ci(
                    new_resistance,
                    starting_pop - resistance,
                    tb_drug_resistance_rate_hiv,
                    outfile,
                    category="time step {}".format(x + 1))
                if not result:
                    failed_count += 1
                    outfile.write(
                        "Warning: New Resistance test fails for rate = {0} at time step {1}.\n"
                        .format(tb_drug_resistance_rate_hiv, x + 1))
            else:
                error_tolerance = 3 * math.sqrt(
                    tb_drug_resistance_rate_hiv *
                    (1 - tb_drug_resistance_rate_hiv) *
                    (starting_pop - resistance))  # 3 sigma
                result = math.fabs(new_resistance -
                                   expected_mean) <= error_tolerance
                if not result:
                    failed_count += 1
                    outfile.write(
                        "Warning: New Resistance test fails for rate = {0} at time step {1}, "
                        "new resistance = {2}, expected mean = {3}, error tolerance = {4}.\n"
                        .format(tb_drug_resistance_rate_hiv, x + 1,
                                new_resistance, expected_mean,
                                error_tolerance))

        if failed_count > math.ceil(total_test * 0.01):
            success = False
            outfile.write(
                "BAD: test failed {0} times out of {1} timestep, please check the warning message.\n"
                "".format(failed_count, total_test))
        if debug:
            sft.plot_data(new_resistances,
                          title="new resistance over time",
                          category="new_resistance",
                          show=True)
            series = sft.create_geometric_dis(
                param_obj["TB_Drug_Resistance_Rate_HIV"],
                starting_pop,
                len(progression),
                test_decay=False)
            sft.plot_data(progression,
                          series,
                          label1="progression",
                          label2="geomatric dis",
                          xlabel="days",
                          ylabel="resistance",
                          title="progression vs geomatric",
                          category="progression_vs_geomatric",
                          show=True,
                          line=True)
            sft.plot_cdf(progression,
                         series,
                         label1="progression",
                         label2="geomatric dis",
                         title="progression vs geomatric cdf",
                         category="progression_vs_geomatric_cdf",
                         show=True)
        # success = sft.test_geometric_decay(progression, param_obj["TB_Drug_Resistance_Rate_HIV"], starting_pop, test_decay=False, report_file=outfile, debug=debug)

        if len(bad_msgs) > 0:
            success = False
            outfile.writelines(bad_msgs)

        outfile.write(sft.format_success_msg(success))
Esempio n. 2
0
def create_report_file(param_obj, multipliers, infectiousness, report_name, debug):
    with open(report_name, "w") as outfile:
        success = True
        if not multipliers:
            outfile.write(sft.sft_no_test_data)
        sigma = param_obj[Param_keys.LOGNORMAL_SCALE]
        base_infectivity = param_obj[Param_keys.BASE_INFECTIVITY]
        if sigma > 0:
            mu = - sigma**2 / 2.0
            # test log_normal distribution
            success = sft.test_lognorm(multipliers,mu=mu, sigma=sigma,report_file=outfile,round = False)

            # test mean_l = 1
            mean_l = np.mean(multipliers)
            mean_infectiousness = np.mean(infectiousness)
            outfile.write("mean of the multipliers is {}, expected 1.0.\n".format(mean_l))
            outfile.write("mean of the Infectiousness is {0}, while base infectivity is {1}.\n".format(mean_infectiousness,
                                                                                           base_infectivity))

            tolerance  = 2e-2
            if math.fabs(mean_l - 1.0) > tolerance:
                outfile.write("BAD: mean of the multipliers is {}, expected 1.0.\n".format(mean_l))
                success = False
            # plotting
            size = len(multipliers)
            outfile.write("size is {}\n".format(size))
            scale = math.exp(mu)
            dist_lognormal = stats.lognorm.rvs(sigma, 0, scale, size)
            sft.plot_data(multipliers, dist_lognormal,
                          label1="Emod", label2="Scipy",
                          ylabel="Multiplier", xlabel="data point",
                          category="Emod_vs_Scipy",
                          title="Emod_vs_Scipy, sigma = {}".format(sigma),
                          show=True)
            sft.plot_probability(multipliers, dist_lognormal,
                                 precision=1, label1="Emod", label2="Scipy",
                                 category="Probability_mass_function_Emod_vs_Scipy",
                                 title="Emod_vs_Scipy, sigma = {}".format(sigma),
                                 show=True)
            sft.plot_cdf(multipliers,dist_lognormal,label1="Emod", label2="Scipy",
                                 category="cdf",
                                 title="cdf, sigma = {}".format(sigma),
                                 show=True, line = False)
            if debug:
                with open("scipy_data.txt", "w") as file:
                    for n in sorted(dist_lognormal):
                        file.write(str(n) + "\n")
                with open("emod_data.txt", "w") as file:
                    for n in sorted(multipliers):
                        file.write(str(n) + "\n")
        else:
            # sigma = 0, this feature is disabled
            for multiplier in multipliers:
                if multiplier != 1.0:
                    success = False
                    outfile.write("BAD: multiplier is {0} when {1} set to {2}, expected 1.0.\n".format(multiplier, Param_keys.LOGNORMAL_SCALE, sigma))
            # plotting
            sft.plot_data(multipliers, label1="Multiplier", label2="NA",
                          category="Multiplier", title="Multiplier_Sigma={}".format(sigma),
                          ylabel="Multiplier", xlabel="data point",
                          show=True)
            sft.plot_data(infectiousness, label1="Infectiousness",
                          label2="NA",category="Infectiousness",
                          title="Infectiousness_Sigma={0}_BaseInfectivity={1}".format(sigma,base_infectivity),
                          ylabel="Infectiousness",xlabel="data point",
                          show=True)
        outfile.write(sft.format_success_msg(success))

    if debug:
        print "SUMMARY: Success={0}\n".format(success)
    return success
Esempio n. 3
0
def create_report_file(drug_start_timestep, inactivation_times, active_count, inactivations, drug_inactivation_rate, report_name, debug = False):
    with open(report_name, "w") as outfile:
        success = True
        # ks exponential test doesn't work very well with large rate, use chi squared test instead.
        # while rate is small ks test for exponential distribution is more sensitive to catch the difference
        if drug_inactivation_rate < 0.1:
            outfile.write( "Testing inactivation times as draws from exponential distrib with rate {0}. "
                           "Dataset size = {1}.\n".format( drug_inactivation_rate, len( inactivation_times ) ) )
            success = dtk_sft.test_exponential( inactivation_times, drug_inactivation_rate, outfile, integers=True,
                                                roundup=True, round_nearest=False )
            if not success:
                outfile.write("BAD: ks test for rate {} is False.\n".format(drug_inactivation_rate))
            size = len(inactivation_times)
            scale = 1.0 / drug_inactivation_rate
            dist_exponential_np = numpy.random.exponential(scale, size)
            dist_exponential_np = [math.ceil(x) for x in dist_exponential_np]
            dtk_sft.plot_data_sorted(inactivation_times, dist_exponential_np,
                              label1="test times", label2="numpy data",
                              title="inactivation_times_actual_vs_numpy",
                              xlabel="data points", ylabel="Inactivation times",
                              category="inactivation_times", show = True, line = True, overlap=True)
            dtk_sft.plot_cdf(inactivation_times, dist_exponential_np,
                             label1="test times", label2="numpy data",
                             title="inactivation_times_cdf",
                             xlabel="days", ylabel="probability",
                             category="inactivation_times_cdf", show = True)
            dtk_sft.plot_probability(inactivation_times, dist_exponential_np,
                                     label1="test times", label2="numpy data",
                                     title="inactivation_times_pdf",
                                     xlabel="days", ylabel="probability",
                                     category="inactivation_times_pdf", show = True)
        else:
            outfile.write("Testing inactivation count per day with rate {0}. \n".format( drug_inactivation_rate) )
            expected_inactivation = []
            for t in range( len(inactivations)):
                if t < drug_start_timestep :
                    if inactivations[t] > 0:
                        success = False
                        outfile.write("BAD: expected no inactivations on drugs before day {0}, get {1} cases at timestep {2}.\n"
                                      "".format(drug_start_timestep , inactivations[t], t))
                elif active_count[t] > 0:
                    expected_inactivation.append(drug_inactivation_rate * active_count[t])
            if len(inactivations) <= len(expected_inactivation) + drug_start_timestep:
                test_inactivation_dates = inactivations[drug_start_timestep+1:]
                expected_inactivation = expected_inactivation[:len(test_inactivation_dates)]
            else:
                test_inactivation_dates = inactivations[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_inactivation)]
            #print (len(inactivations), len(test_inactivation_dates), len(expected_inactivation))
            #print (test_inactivation_dates, expected_inactivation)
            dtk_sft.plot_data(test_inactivation_dates, expected_inactivation,
                                     label1="actual inactivation", label2="expected inactivation",
                                     title="inactivation per day",
                                     xlabel="date after drug start day", ylabel="inactivation per day",
                                     category="inactivation_counts", show=True, line=True, overlap=True, sort=False)

            chi_result = dtk_sft.test_multinomial(dist=test_inactivation_dates, proportions=expected_inactivation,
                                                  report_file=outfile, prob_flag=False)
            if not chi_result:
                success = False
                outfile.write("BAD: Chi-squared test reuslt is False.\n")
        outfile.write(dtk_sft.format_success_msg(success))
        if debug:
            print(dtk_sft.format_success_msg(success))
        return success
Esempio n. 4
0
def create_report_file(param_obj, multipliers, infectiousness, report_name, debug):
    with open(report_name, "w") as outfile:
        success = True
        if not multipliers:
            outfile.write(sft.sft_no_test_data)
        sigma = param_obj[Param_keys.LOGNORMAL_SCALE]
        base_infectivity = param_obj[Param_keys.BASE_INFECTIVITY]
        if sigma > 0:
            mu = - sigma**2 / 2.0
            # test log_normal distribution
            success = sft.test_lognorm(multipliers,mu=mu, sigma=sigma,report_file=outfile,round = False)

            # test mean_l = 1
            mean_l = np.mean(multipliers)
            mean_infectiousness = np.mean(infectiousness)
            outfile.write("mean of the multipliers is {}, expected 1.0.\n".format(mean_l))
            outfile.write("mean of the Infectiousness is {0}, while base infectivity is {1}.\n".format(mean_infectiousness,
                                                                                           base_infectivity))

            tolerance  = 2e-2
            if math.fabs(mean_l - 1.0) > tolerance:
                outfile.write("BAD: mean of the multipliers is {}, expected 1.0.\n".format(mean_l))
                success = False
            # plotting
            size = len(multipliers)
            outfile.write("size is {}\n".format(size))
            scale = math.exp(mu)
            dist_lognormal = stats.lognorm.rvs(sigma, 0, scale, size)
            sft.plot_data_sorted(multipliers, dist_lognormal,
                          label1="Emod", label2="Scipy",
                          ylabel="Multiplier", xlabel="data point",
                          category="Emod_vs_Scipy",
                          title="Emod_vs_Scipy, sigma = {}".format(sigma),
                          show=True)
            sft.plot_probability(multipliers, dist_lognormal,
                                 precision=1, label1="Emod", label2="Scipy",
                                 category="Probability_mass_function_Emod_vs_Scipy",
                                 title="Emod_vs_Scipy, sigma = {}".format(sigma),
                                 show=True)
            sft.plot_cdf(multipliers,dist_lognormal,label1="Emod", label2="Scipy",
                                 category="cdf",
                                 title="cdf, sigma = {}".format(sigma),
                                 show=True, line = False)
            if debug:
                with open("scipy_data.txt", "w") as file:
                    for n in sorted(dist_lognormal):
                        file.write(str(n) + "\n")
                with open("emod_data.txt", "w") as file:
                    for n in sorted(multipliers):
                        file.write(str(n) + "\n")
        else:
            # sigma = 0, this feature is disabled
            for multiplier in multipliers:
                if multiplier != 1.0:
                    success = False
                    outfile.write("BAD: multiplier is {0} when {1} set to {2}, expected 1.0.\n".format(multiplier, Param_keys.LOGNORMAL_SCALE, sigma))
            # plotting
            sft.plot_data_sorted(multipliers, label1="Multiplier", label2="NA",
                          category="Multiplier", title="Multiplier_Sigma={}".format(sigma),
                          ylabel="Multiplier", xlabel="data point",
                          show=True)
            sft.plot_data_sorted(infectiousness, label1="Infectiousness",
                          label2="NA",category="Infectiousness",
                          title="Infectiousness_Sigma={0}_BaseInfectivity={1}".format(sigma,base_infectivity),
                          ylabel="Infectiousness",xlabel="data point",
                          show=True)
        outfile.write(sft.format_success_msg(success))

    if debug:
        print( "SUMMARY: Success={0}\n".format(success) )
    return success
Esempio n. 5
0
def create_report_file(drug_start_timestep, disease_deaths, cum_deaths, deaths, infected_individuals, death_times, drug_mortality_rate_HIV, report_name ):
    with open(report_name, "w") as outfile:
        success = True
        length = len(cum_deaths)
        if sum(disease_deaths)==0 or sum(cum_deaths)==0 or len(death_times)==0:
            success = False
            outfile.write(dtk_sft.no_test_data)
        for x in range(length):
            if disease_deaths[x] != cum_deaths[x]:
                success = False
                outfile.write("BAD: at timestep {0}, disease deaths is {1} in InsetChart.json and {2} in stdout.txt.\n".format(x+1, disease_deaths[x], cum_deaths[x]))
        # ks exponential test doesn't work very well with large rate, use chi squared test instead
        # while rate is small ks test for exponential distribution is more sensitive to catch the difference
        if drug_mortality_rate_HIV < 0.1:
            outfile.write("Testing death times as draws from exponential distrib with rate {0}. "
                          "Dataset size = {1}.\n".format(drug_mortality_rate_HIV, len(death_times)))
            ks_result = dtk_sft.test_exponential( death_times, drug_mortality_rate_HIV, report_file = outfile,
                                                  integers=True, roundup=True, round_nearest=False )
            if not ks_result:
                success = False
                outfile.write("BAD: ks test reuslt is False.\n")
            size = len(death_times)
            scale = 1.0 / drug_mortality_rate_HIV
            dist_exponential_np = numpy.random.exponential(scale, size)
            dist_exponential_np = [math.ceil(x) for x in dist_exponential_np]
            dtk_sft.plot_data_sorted(death_times, dist_exponential_np,
                              label1="death times", label2="numpy data",
                              title="death_times_actual_vs_numpy",
                              xlabel="data points", ylabel="death times",
                              category="death_times", show=True, line = True, overlap=True)
            dtk_sft.plot_cdf(death_times, dist_exponential_np,
                             label1="death times", label2="numpy data",
                             title="death_times_cdf",
                             xlabel="days", ylabel="probability",
                             category="death_times_cdf", show=True)
        else:
            outfile.write("Testing death count per day with rate {0}. \n".format(drug_mortality_rate_HIV))
            expected_mortality = []
            for t in range( len(deaths)):
                if t < drug_start_timestep + 1:
                    if deaths[t] > 0:
                        success = False
                        outfile.write("BAD: expected no disease death on drugs before day {0}, get {1} cases at timestep {2}.\n"
                                      "".format(drug_start_timestep + 1, deaths[t], t))
                elif infected_individuals[t] > 0:
                    expected_mortality.append(drug_mortality_rate_HIV * infected_individuals[t])
            expected_mortality.pop(0) # the Infected is off by one day
            test_death_dates = deaths[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_mortality)]
            dtk_sft.plot_data(test_death_dates, expected_mortality,
                                     label1="actual death", label2="expected death",
                                     title="death per day",
                                     xlabel="date after drug start day", ylabel="death per day",
                                     category="death_counts", show=True, line=True, overlap=True, sort=False)

            chi_result = dtk_sft.test_multinomial(dist=test_death_dates, proportions=expected_mortality,
                                                  report_file=outfile, prob_flag=False)
            if not chi_result:
                success = False
                outfile.write("BAD: Chi-squared test reuslt is False.\n")

        outfile.write(dtk_sft.format_success_msg(success))
        return success