Python test_multinomial Exemples, dtk_sft.test_multinomial Python Exemples

Exemple #1

0

Afficher le fichier

def create_report_file(param_obj, campaign_obj, report_data_obj, report_name,
                       debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        base_infectivity = param_obj[KEY_BASE_INFECTIVITY]
        start_day = campaign_obj[KEY_START_DAY]
        new_infection = report_data_obj[KEY_NEW_INFECTION]
        immunity_acquisition_factor = param_obj[
            KEY_IMMUNITY_ACQUISITION_FACTOR]
        # calculate expected number of infections for a time period of 3 months:
        number_of_month = 3
        expected_new_infection = base_infectivity * dtk_sft.DAYS_IN_MONTH * number_of_month * immunity_acquisition_factor
        expected = [expected_new_infection
                    ] * (dtk_sft.MONTHS_IN_YEAR // number_of_month)
        # group new infections for every 3 months:
        value_to_test = []
        if len(new_infection) < start_day + dtk_sft.DAYS_IN_YEAR:
            success = False
            outfile.write(
                "BAD: the simulation duration is too short, please make sure it's at least {} days.\n"
                .format(start_day + dtk_sft.DAYS_IN_YEAR))
        outfile.write(
            "running chi-squared test for expected new infections for {0} {1}-months time bins: \n"
            "base_infectivity = {2}, immunity_acquisition_factor = {3}.\n".
            format(dtk_sft.MONTHS_IN_YEAR // number_of_month, number_of_month,
                   base_infectivity, immunity_acquisition_factor))
        actual_new_infection = 0
        i = 0
        for t in range(start_day, len(new_infection)):
            actual_new_infection += new_infection[t]
            i += 1
            if not i % (number_of_month * dtk_sft.DAYS_IN_MONTH):
                value_to_test.append(actual_new_infection)
                actual_new_infection = 0
        dtk_sft.plot_data(
            value_to_test,
            dist2=expected,
            label1="actual_new_infections",
            label2="expected_new_infection",
            title="actual vs. expected new infection for every {} months".
            format(number_of_month),
            xlabel="every {} months".format(number_of_month),
            ylabel="# of new infections",
            category='actual_vs_expected_new_infections',
            show=True,
            line=True)
        result = dtk_sft.test_multinomial(dist=value_to_test,
                                          proportions=expected,
                                          report_file=outfile,
                                          prob_flag=False)

        if not result:
            success = False
            outfile.write(
                "BAD: The Chi-squared test for number of new infections in every {} months failed.\n"
                .format(number_of_month))
        else:
            outfile.write(
                "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n"
                .format(number_of_month))
        outfile.write(dtk_sft.format_success_msg(success))

    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Exemple #2

0

Afficher le fichier

Fichier : dtk_post_process.py Projet : yukikatase/EMOD

def create_report_file(param_obj, node_list, campaign_obj, migration_df,
                       report_data_obj, stdout_filename, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        base_infectivity = param_obj[KEY_BASE_INFECTIVITY]
        start_day = campaign_obj[KEY_START_DAY]
        new_infection = report_data_obj[KEY_NEW_INFECTION]
        immunity_acquisition_factor = param_obj[
            KEY_IMMUNITY_ACQUISITION_FACTOR]
        decay_rate = param_obj[KEY_DECAY_RATE]

        outfile.write("checking some test conditions:\n")
        outfile.write("  -- simulation duration: {} days\n".format(
            len(new_infection)))
        if len(new_infection) < start_day + 1 + dtk_sft.DAYS_IN_YEAR:
            success = False
            outfile.write(
                "BAD: the simulation duration is too short, please make sure it's at least {} days.\n"
                .format(start_day + 1 + dtk_sft.DAYS_IN_YEAR))

        result = tms.check_test_condition(param_obj[KEY_NUM_CORES], node_list,
                                          migration_df, outfile)
        if not result:
            success = False
            # summary message is writen to the report file in the check_test_condition function

        number_of_month = 1
        outfile.write(
            "calculate expected number of infections for a time period of {} month("
            "unit is 1/years):\n".format((number_of_month)))
        t_initial = 0
        expected = []
        decay_rate *= dtk_sft.DAYS_IN_YEAR
        base_infectivity *= dtk_sft.DAYS_IN_YEAR
        step = number_of_month / dtk_sft.MONTHS_IN_YEAR
        for t_final in np.arange(step, 1.01, step):
            expected_new_infection = base_infectivity * (
                t_final - t_initial) - base_infectivity * (
                    1.0 - immunity_acquisition_factor) / decay_rate * math.exp(
                        -1 * decay_rate *
                        t_initial) * (1.0 - math.exp(-1 * decay_rate *
                                                     (t_final - t_initial)))
            expected_new_infection *= len(node_list)
            expected.append(expected_new_infection)
            t_initial = t_final
        # group new infections for every month:
        value_to_test = []
        outfile.write(
            "running chi-squared test for actual vs expected new infections for {0} {1}-months time bins: \n"
            "base_infectivity = {2}, immunity_acquisition_factor = {3}, decay rate = {4}.(unit is 1/years)\n"
            .format(dtk_sft.MONTHS_IN_YEAR // number_of_month, number_of_month,
                    base_infectivity, immunity_acquisition_factor, decay_rate))
        actual_new_infection = 0
        i = 0
        for t in range(start_day + 1, len(new_infection)):
            actual_new_infection += new_infection[t]
            i += 1
            if not i % (number_of_month * dtk_sft.DAYS_IN_MONTH):
                value_to_test.append(actual_new_infection)
                actual_new_infection = 0
        dtk_sft.plot_data(
            value_to_test,
            dist2=expected,
            label1="actual_new_infections",
            label2="expected_new_infection",
            title="actual vs. expected new infection for every {} month".
            format(number_of_month),
            xlabel="month",
            ylabel="# of new infections",
            category='actual_vs_expected_new_infections',
            show=True,
            line=False)
        result = dtk_sft.test_multinomial(dist=value_to_test,
                                          proportions=expected,
                                          report_file=outfile,
                                          prob_flag=False)

        if not result:
            success = False
            outfile.write(
                "BAD: The Chi-squared test for number of new infections in every {} months failed.\n"
                .format(number_of_month))
        else:
            outfile.write(
                "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n"
                .format(number_of_month))

        output_dict = parse_output_file(stdout_filename, debug)
        outfile.write(
            "checking if all cores are reporting in every time step in stdout file:\n"
        )
        core_list = [str(n) for n in (range(param_obj[KEY_NUM_CORES]))]
        for t, cores in output_dict.items():
            if core_list != sorted(cores):
                success = False
                outfile.write(
                    "BAD: at time step {0}, these cores reported to stdout.txt are: {1}, while "
                    "expected cores are: {2}.\n".format(t, cores, core_list))

        outfile.write(dtk_sft.format_success_msg(success))

    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Exemple #3

0

Afficher le fichier

def create_report_file(drug_start_timestep, inactivation_times, active_count, inactivations, drug_inactivation_rate, report_name, debug = False):
    with open(report_name, "w") as outfile:
        success = True
        # ks exponential test doesn't work very well with large rate, use chi squared test instead.
        # while rate is small ks test for exponential distribution is more sensitive to catch the difference
        if drug_inactivation_rate < 0.1:
            outfile.write( "Testing inactivation times as draws from exponential distrib with rate {0}. "
                           "Dataset size = {1}.\n".format( drug_inactivation_rate, len( inactivation_times ) ) )
            success = dtk_sft.test_exponential( inactivation_times, drug_inactivation_rate, outfile, integers=True,
                                                roundup=True, round_nearest=False )
            if not success:
                outfile.write("BAD: ks test for rate {} is False.\n".format(drug_inactivation_rate))
            size = len(inactivation_times)
            scale = 1.0 / drug_inactivation_rate
            dist_exponential_np = numpy.random.exponential(scale, size)
            dist_exponential_np = [math.ceil(x) for x in dist_exponential_np]
            dtk_sft.plot_data_sorted(inactivation_times, dist_exponential_np,
                              label1="test times", label2="numpy data",
                              title="inactivation_times_actual_vs_numpy",
                              xlabel="data points", ylabel="Inactivation times",
                              category="inactivation_times", show = True, line = True, overlap=True)
            dtk_sft.plot_cdf(inactivation_times, dist_exponential_np,
                             label1="test times", label2="numpy data",
                             title="inactivation_times_cdf",
                             xlabel="days", ylabel="probability",
                             category="inactivation_times_cdf", show = True)
            dtk_sft.plot_probability(inactivation_times, dist_exponential_np,
                                     label1="test times", label2="numpy data",
                                     title="inactivation_times_pdf",
                                     xlabel="days", ylabel="probability",
                                     category="inactivation_times_pdf", show = True)
        else:
            outfile.write("Testing inactivation count per day with rate {0}. \n".format( drug_inactivation_rate) )
            expected_inactivation = []
            for t in range( len(inactivations)):
                if t < drug_start_timestep :
                    if inactivations[t] > 0:
                        success = False
                        outfile.write("BAD: expected no inactivations on drugs before day {0}, get {1} cases at timestep {2}.\n"
                                      "".format(drug_start_timestep , inactivations[t], t))
                elif active_count[t] > 0:
                    expected_inactivation.append(drug_inactivation_rate * active_count[t])
            if len(inactivations) <= len(expected_inactivation) + drug_start_timestep:
                test_inactivation_dates = inactivations[drug_start_timestep+1:]
                expected_inactivation = expected_inactivation[:len(test_inactivation_dates)]
            else:
                test_inactivation_dates = inactivations[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_inactivation)]
            #print (len(inactivations), len(test_inactivation_dates), len(expected_inactivation))
            #print (test_inactivation_dates, expected_inactivation)
            dtk_sft.plot_data(test_inactivation_dates, expected_inactivation,
                                     label1="actual inactivation", label2="expected inactivation",
                                     title="inactivation per day",
                                     xlabel="date after drug start day", ylabel="inactivation per day",
                                     category="inactivation_counts", show=True, line=True, overlap=True, sort=False)

            chi_result = dtk_sft.test_multinomial(dist=test_inactivation_dates, proportions=expected_inactivation,
                                                  report_file=outfile, prob_flag=False)
            if not chi_result:
                success = False
                outfile.write("BAD: Chi-squared test reuslt is False.\n")
        outfile.write(dtk_sft.format_success_msg(success))
        if debug:
            print(dtk_sft.format_success_msg(success))
        return success

Exemple #4

0

Afficher le fichier

def create_report_file(param_obj, campaign_obj, output_dict, report_dict,
                       report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        sensitivity = campaign_obj[KEY_BASE_SENSITIVITY]
        treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION]
        proportions = [
            sensitivity * treatment_fraction,
            (1.0 - sensitivity) * treatment_fraction, 1.0 - treatment_fraction
        ]
        positive = []
        negative = []
        default = []
        total = []
        failed_timestep = []
        if not len(report_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)
        for t in report_dict:
            value_to_test = [
                report_dict[t][KEY_POSITIVE], report_dict[t][KEY_NEGATIVE],
                report_dict[t][KEY_DEFAULT]
            ]
            positive.append(report_dict[t][KEY_POSITIVE])
            negative.append(report_dict[t][KEY_NEGATIVE])
            default.append(report_dict[t][KEY_DEFAULT])
            total.append(sum(value_to_test))
            outfile.write("Run Chi-squared test at time step {}.\n".format(t))
            result = dtk_sft.test_multinomial(dist=value_to_test,
                                              proportions=proportions,
                                              report_file=outfile)
            if not result:
                failed_timestep.append(t)
                outfile.write(
                    "Warning: At timestep {0}, the Chi-squared test failed.\n".
                    format(t))
        if len(failed_timestep) > math.ceil(0.05 * len(report_dict)):
            success = False
            outfile.write(
                "BAD: the Chi-squared test failed at timestep {0}.\n".format(
                    ', '.join(str(x) for x in failed_timestep)))
        else:
            outfile.write(
                "GOOD: the Chi-squared test failed {} times, less than 5% of the total timestep.\n"
                .format(len(failed_timestep)))

        dtk_sft.plot_data(
            positive,
            dist2=total,
            label1="TBTestPositive",
            label2="Total tested",
            title="Test positive vs. total, positive proportion = {}".format(
                sensitivity * treatment_fraction),
            xlabel="time step",
            ylabel="# of individuals",
            category='Test_positive_vs_total',
            show=True,
            line=False)
        dtk_sft.plot_data(
            negative,
            dist2=total,
            label1="TBTestNegative",
            label2="Total tested",
            title="Test negative vs. total, negative proportion = {}".format(
                (1.0 - sensitivity) * treatment_fraction),
            xlabel="time step",
            ylabel="# of individuals",
            category='Test_negative_vs_total',
            show=True,
            line=False)
        dtk_sft.plot_data(
            default,
            dist2=total,
            label1="TBTestDefault",
            label2="Total tested",
            title="Test default vs. total, default proportion = {}".format(
                1.0 - treatment_fraction),
            xlabel="time step",
            ylabel="# of individuals",
            category='Test_default_vs_total',
            show=True,
            line=False)
        # TODO: write test to check if report matches debug logging. Pending on #2279. May not need this part.
        outfile.write(dtk_sft.format_success_msg(success))
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Exemple #5

0

Afficher le fichier

def create_report_file(drug_start_timestep, disease_deaths, cum_deaths, deaths, infected_individuals, death_times, drug_mortality_rate_HIV, report_name ):
    with open(report_name, "w") as outfile:
        success = True
        length = len(cum_deaths)
        if sum(disease_deaths)==0 or sum(cum_deaths)==0 or len(death_times)==0:
            success = False
            outfile.write(dtk_sft.no_test_data)
        for x in range(length):
            if disease_deaths[x] != cum_deaths[x]:
                success = False
                outfile.write("BAD: at timestep {0}, disease deaths is {1} in InsetChart.json and {2} in stdout.txt.\n".format(x+1, disease_deaths[x], cum_deaths[x]))
        # ks exponential test doesn't work very well with large rate, use chi squared test instead
        # while rate is small ks test for exponential distribution is more sensitive to catch the difference
        if drug_mortality_rate_HIV < 0.1:
            outfile.write("Testing death times as draws from exponential distrib with rate {0}. "
                          "Dataset size = {1}.\n".format(drug_mortality_rate_HIV, len(death_times)))
            ks_result = dtk_sft.test_exponential( death_times, drug_mortality_rate_HIV, report_file = outfile,
                                                  integers=True, roundup=True, round_nearest=False )
            if not ks_result:
                success = False
                outfile.write("BAD: ks test reuslt is False.\n")
            size = len(death_times)
            scale = 1.0 / drug_mortality_rate_HIV
            dist_exponential_np = numpy.random.exponential(scale, size)
            dist_exponential_np = [math.ceil(x) for x in dist_exponential_np]
            dtk_sft.plot_data_sorted(death_times, dist_exponential_np,
                              label1="death times", label2="numpy data",
                              title="death_times_actual_vs_numpy",
                              xlabel="data points", ylabel="death times",
                              category="death_times", show=True, line = True, overlap=True)
            dtk_sft.plot_cdf(death_times, dist_exponential_np,
                             label1="death times", label2="numpy data",
                             title="death_times_cdf",
                             xlabel="days", ylabel="probability",
                             category="death_times_cdf", show=True)
        else:
            outfile.write("Testing death count per day with rate {0}. \n".format(drug_mortality_rate_HIV))
            expected_mortality = []
            for t in range( len(deaths)):
                if t < drug_start_timestep + 1:
                    if deaths[t] > 0:
                        success = False
                        outfile.write("BAD: expected no disease death on drugs before day {0}, get {1} cases at timestep {2}.\n"
                                      "".format(drug_start_timestep + 1, deaths[t], t))
                elif infected_individuals[t] > 0:
                    expected_mortality.append(drug_mortality_rate_HIV * infected_individuals[t])
            expected_mortality.pop(0) # the Infected is off by one day
            test_death_dates = deaths[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_mortality)]
            dtk_sft.plot_data(test_death_dates, expected_mortality,
                                     label1="actual death", label2="expected death",
                                     title="death per day",
                                     xlabel="date after drug start day", ylabel="death per day",
                                     category="death_counts", show=True, line=True, overlap=True, sort=False)

            chi_result = dtk_sft.test_multinomial(dist=test_death_dates, proportions=expected_mortality,
                                                  report_file=outfile, prob_flag=False)
            if not chi_result:
                success = False
                outfile.write("BAD: Chi-squared test reuslt is False.\n")

        outfile.write(dtk_sft.format_success_msg(success))
        return success

Exemple #6

0

Afficher le fichier

Fichier : dtk_post_process.py Projet : yukikatase/EMOD

def create_report_file(param_obj, campaign_obj, output_dict, report_dict,
                       report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        sensitivity = campaign_obj[KEY_BASE_SENSITIVITY]
        treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION]
        treatment_fraction_negative_diagnosis = campaign_obj[
            KEY_TREATMENT_FRACTION_NEGATIVE_DIAGNOSIS]
        proportions = [
            sensitivity * treatment_fraction,
            (1.0 - sensitivity) * treatment_fraction_negative_diagnosis,
            (1.0 - sensitivity) *
            (1.0 - treatment_fraction_negative_diagnosis) + sensitivity *
            (1.0 - treatment_fraction)
        ]
        total_proportion = sum(proportions)
        positive = []
        negative = []
        default = []
        total = []
        for t in report_dict:
            value_to_test = [
                report_dict[t][KEY_MDR_POSITIVE],
                report_dict[t][KEY_MDR_NEGATIVE],
                report_dict[t][KEY_MDR_DEFAULT]
            ]
            positive.append(value_to_test[0])
            negative.append(value_to_test[1])
            default.append(value_to_test[2])
            total.append(int(sum(value_to_test) / total_proportion))
            outfile.write("Run Chi-squared test at time step {}.\n".format(t))
            result = dtk_sft.test_multinomial(dist=value_to_test,
                                              proportions=proportions,
                                              report_file=outfile)
            if not result:
                success = False
                outfile.write(
                    "BAD: At timestep {0}, the Chi-squared test failed.\n".
                    format(t))

        dtk_sft.plot_data(
            positive,
            dist2=total,
            label1="TBMDRTestPositive",
            label2="Total tested",
            title="MDR Test positive vs. total, positive proportion = {}".
            format(proportions[0]),
            xlabel="time step",
            ylabel="# of individuals",
            category='MDR_Test_positive_vs_total',
            show=True,
            line=False)
        dtk_sft.plot_data(
            negative,
            dist2=total,
            label1="TBMDRTestNegative",
            label2="Total tested",
            title="MDR Test negative vs. total, negative proportion = {}".
            format(proportions[1]),
            xlabel="time step",
            ylabel="# of individuals",
            category='MDR_Test_negative_vs_total',
            show=True,
            line=False)
        dtk_sft.plot_data(
            default,
            dist2=total,
            label1="TBMDRTestDefault",
            label2="Total tested",
            title="MDR Test default vs. total, default proportion = {}".format(
                proportions[2]),
            xlabel="time step",
            ylabel="# of individuals",
            category='MDR_Test_default_vs_total',
            show=True,
            line=False)
        outfile.write(dtk_sft.format_success_msg(success))
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success