Ejemplo n.º 1
0
def create_report_file(param_obj, campaign_obj, report_data_obj, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        base_infectivity = param_obj[KEY_BASE_INFECTIVITY]
        initial_effect = campaign_obj[KEY_INITIAL_EFFECT]
        start_day = campaign_obj[KEY_START_DAY]
        new_infection = report_data_obj[KEY_NEW_INFECTION]
        # calculate expected number of infections for a time period of 3 months:
        number_of_month = 3
        expected_new_infection = base_infectivity * sft.DAYS_IN_MONTH * number_of_month * (1.0 - initial_effect)
        # testing for one year
        expected = [expected_new_infection] * (sft.MONTHS_IN_YEAR // number_of_month)
        # group new infections for every 3 months:
        value_to_test = []
        if len(new_infection) < 2 * sft.DAYS_IN_YEAR:
            # the infected individual is imported at the end of first year, so simulation
            # duration need to be at least 2 years.
            success = False
            outfile.write("BAD: the simulation duration is too short, please make sure it's at least 2 years.\n")
        outfile.write("running chi-squared test for expected new infections for {0} {1}-months time bins: \n"
                      "base_infectivity = {2}, initial_effect = {3}.\n".format(sft.MONTHS_IN_YEAR // number_of_month,
                                                                               number_of_month, base_infectivity,
                                                                               initial_effect))
        actual_new_infection = 0
        i = 0
        for t in range(start_day, len(new_infection)):
            actual_new_infection += new_infection[t]
            i += 1
            if not i % (number_of_month * sft.DAYS_IN_MONTH): # at the end of every 3 month
                value_to_test.append(actual_new_infection)
                actual_new_infection = 0
        sft.plot_data(value_to_test, dist2=expected, label1="actual_new_infections",
                                   label2="expected_new_infection",
                                   title="actual vs. expected new infection for every {} months".format(number_of_month),
                                   xlabel="every {} months".format(number_of_month), ylabel="# of new infections",
                                   category='actual_vs_expected_new_infections',
                                   show=True, line=True)
        result = sft.test_multinomial(dist=value_to_test, proportions=expected, report_file=outfile, prob_flag=False)
        if not result:
            success = False
            outfile.write(
                "BAD: The Chi-squared test for number of new infections in every {} months failed.\n".format(number_of_month))
        else:
            outfile.write(
                "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n".format(
                    number_of_month))
        outfile.write(sft.format_success_msg(success))

    if debug:
        print( "SUMMARY: Success={0}\n".format(success) )
    return success
Ejemplo n.º 2
0
def create_report_file(drug_start_timestep, disease_deaths, cum_deaths, deaths, infected_individuals, death_times, drug_mortality_rate_HIV, report_name ):
    with open(report_name, "w") as outfile:
        success = True
        length = len(cum_deaths)
        if sum(disease_deaths)==0 or sum(cum_deaths)==0 or len(death_times)==0:
            success = False
            outfile.write(sft.no_test_data)
        for x in range(length):
            if disease_deaths[x] != cum_deaths[x]:
                success = False
                outfile.write("BAD: at timestep {0}, disease deaths is {1} in InsetChart.json and {2} in stdout.txt.\n".format(x+1, disease_deaths[x], cum_deaths[x]))
        # ks exponential test doesn't work very well with large rate, use chi squared test instead
        # while rate is small ks test for exponential distribution is more sensitive to catch the difference
        if drug_mortality_rate_HIV < 0.1:
            outfile.write("Testing death times as draws from exponential distrib with rate {0}. "
                          "Dataset size = {1}.\n".format(drug_mortality_rate_HIV, len(death_times)))
            ks_result = sft.test_exponential( death_times, drug_mortality_rate_HIV, report_file = outfile,
                                                  integers=True, roundup=True, round_nearest=False )
            if not ks_result:
                success = False
                outfile.write("BAD: ks test reuslt is False.\n")
            size = len(death_times)
            scale = 1.0 / drug_mortality_rate_HIV
            dist_exponential_np = numpy.random.exponential(scale, size)
            dist_exponential_np = [math.ceil(x) for x in dist_exponential_np]
            sft.plot_data_sorted(death_times, dist_exponential_np,
                              label1="death times", label2="numpy data",
                              title="death_times_actual_vs_numpy",
                              xlabel="data points", ylabel="death times",
                              category="death_times", show=True, line = True, overlap=True)
            sft.plot_cdf(death_times, dist_exponential_np,
                             label1="death times", label2="numpy data",
                             title="death_times_cdf",
                             xlabel="days", ylabel="probability",
                             category="death_times_cdf", show=True)
        else:
            outfile.write("Testing death count per day with rate {0}. \n".format(drug_mortality_rate_HIV))
            expected_mortality = []
            for t in range( len(deaths)):
                if t < drug_start_timestep + 1:
                    if deaths[t] > 0:
                        success = False
                        outfile.write("BAD: expected no disease death on drugs before day {0}, get {1} cases at timestep {2}.\n"
                                      "".format(drug_start_timestep + 1, deaths[t], t))
                elif infected_individuals[t] > 0:
                    expected_mortality.append(drug_mortality_rate_HIV * infected_individuals[t])
            expected_mortality.pop(0) # the Infected is off by one day
            test_death_dates = deaths[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_mortality)]
            sft.plot_data(test_death_dates, expected_mortality,
                                     label1="actual death", label2="expected death",
                                     title="death per day",
                                     xlabel="date after drug start day", ylabel="death per day",
                                     category="death_counts", show=True, line=True, overlap=True, sort=False)

            chi_result = sft.test_multinomial(dist=test_death_dates, proportions=expected_mortality,
                                                  report_file=outfile, prob_flag=False)
            if not chi_result:
                success = False
                outfile.write("BAD: Chi-squared test reuslt is False.\n")

        outfile.write(sft.format_success_msg(success))
        return success
Ejemplo n.º 3
0
def create_report_file(param_obj, campaign_obj, stdout_df, report_name, debug):
    with open(report_name, "w") as outfile:
        for name, param in param_obj.items():
            outfile.write("{0} = {1}\n".format(name, param))
        success = True
        sample_threshold = float(campaign_obj[
            CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold])
        base_sensitivity = float(campaign_obj[
            CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity])
        base_specificity = float(campaign_obj[
            CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity])
        ip_key_value = campaign_obj[
            CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value]
        outfile.write("{0} = {1}\n".format(
            CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold,
            sample_threshold))
        outfile.write("{0} = {1}\n".format(
            CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity,
            base_sensitivity))
        outfile.write("{0} = {1}\n".format(
            CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity,
            base_specificity))
        outfile.write("{0} = {1}\n".format(
            CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value,
            ip_key_value))

        if sample_threshold:
            outfile.write(
                "WARNING: {0} should be 0 in this test, got {1} from compaign file. Please fix the test.\n"
                "".format(
                    CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold,
                    sample_threshold))
        if base_specificity != 1:
            outfile.write(
                "WARNING: the {0} is {1}, expected value is 1.\n".format(
                    CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity,
                    base_specificity))
        if ip_key_value:
            success = False
            outfile.write(
                "BAD: {0} should be empty in this test, got {1} from compaign file. Please fix the test.\n"
                "".format(
                    CampaignKeys.EnvironmentalDiagnosticKeys.
                    Environment_IP_Key_Value, ip_key_value))

        duration = param_obj[ConfigKeys.Simulation_Duration]
        base_infectivity = param_obj[ConfigKeys.Base_Infectivity]
        expected_positive_count = expected_negative_count = 0
        contagion_list = []
        contagion = 0

        for t in range(1, duration):
            stdout_t_df = stdout_df[stdout_df[
                Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t]
            infected = stdout_t_df[Diagnostic_Support.Stdout.infected].iloc[0]
            stat_pop = stdout_t_df[Diagnostic_Support.Stdout.stat_pop].iloc[0]
            envi_sample = stdout_t_df[Diagnostic_Support.Stdout.sample].iloc[0]

            # calculated environmental contagion for next time step
            contagion = base_infectivity * infected / stat_pop
            if math.fabs(contagion - envi_sample) > envi_sample * 1e-2:
                success = False
                outfile.write(
                    "BAD: at time step {0} the environmental sample is {1}, expected value is {2}.\n"
                    .format(t, envi_sample, contagion))
            if contagion > sample_threshold:
                expected_test_positive = base_sensitivity
                expected_test_negative = 1.0 - base_sensitivity
            else:
                expected_test_positive = 1.0 - base_specificity
                expected_test_negative = base_specificity

            contagion_list.append(contagion)

            expected_positive_count += expected_test_positive
            expected_negative_count += expected_test_negative

        stdout_sum = stdout_df.sum()

        result = sft.test_multinomial(
            [
                stdout_sum[Diagnostic_Support.Stdout.test_positive],
                stdout_sum[Diagnostic_Support.Stdout.test_negative]
            ],
            proportions=[expected_positive_count, expected_negative_count],
            report_file=outfile,
            prob_flag=False,
        )

        message = "{0}: the total test positive and negative counts from StdOut.txt are {1} and {2}, expected values" \
                  " are {3} and {4}.\n"

        if result:
            outfile.write(
                message.format(
                    "GOOD",
                    stdout_sum[Diagnostic_Support.Stdout.test_positive],
                    stdout_sum[Diagnostic_Support.Stdout.test_negative],
                    expected_positive_count, expected_negative_count))
        else:
            success = False
            outfile.write(
                message.format(
                    "BAD", stdout_sum[Diagnostic_Support.Stdout.test_positive],
                    stdout_sum[Diagnostic_Support.Stdout.test_negative],
                    expected_positive_count, expected_negative_count))

        sft.plot_data(stdout_df[Diagnostic_Support.Stdout.sample].tolist()[1:],
                      contagion_list,
                      label1="Actual",
                      label2="Expected",
                      title="Environmental_Contagion",
                      xlabel="Day",
                      ylabel="Environmental_Contagion",
                      category='Environmental_Contagion',
                      overlap=True,
                      alpha=0.5)

        # positive_list = []
        # negative_list = []
        # for t in range(1, duration):
        #     infected = stdout_df[Stdout.infected].iloc[t]
        #     stat_pop = stdout_df[Stdout.stat_pop].iloc[t]
        #     test_positive = stdout_df[Stdout.test_positive].iloc[t]
        #     test_negative = stdout_df[Stdout.test_negative].iloc[t]
        #     test_default = stdout_df[Stdout.test_default].iloc[t]
        #
        #     susceptible = stat_pop - infected
        #     message = "BAD: at time {0}, total infected individuals = {1} and total susceptible individuals = {2}, " \
        #               "expected {3} ± {4} individuals receive a {5} test result, got {6} from logging.\n"
        #
        #     expected_test_positive = infected * base_sensitivity + susceptible * (1.0 - base_specificity)
        #     if math.fabs(test_positive - expected_test_positive) > 5e-2 * expected_test_positive:
        #         success = False
        #         outfile.write(message.format(
        #             t, infected, susceptible, expected_test_positive, 5e-2 * expected_test_positive, "positive",
        #             test_positive))
        #
        #     expected_test_negative = infected * (1.0 - base_sensitivity) + susceptible * base_specificity
        #     if math.fabs(test_negative - expected_test_negative) > 5e-2 * expected_test_negative:
        #         success = False
        #         outfile.write(message.format(
        #             t, infected, susceptible, expected_test_negative, 5e-2 * expected_test_negative, "negative",
        #             test_negative))
        #
        #     expected_test_default = 0
        #     if test_default != expected_test_default:
        #         success = False
        #         outfile.write(message.format(
        #                 t, infected, susceptible, expected_test_default, 0, "default", test_default))
        #
        #     positive_list.append([test_positive, expected_test_positive])
        #     negative_list.append([test_negative, expected_test_negative])
        # sft.plot_data(np.array(positive_list)[:, 0], np.array(positive_list)[:, 1],
        #               label1="Actual",
        #               label2="Expected",
        #               title="Test Positive", xlabel="Day",
        #               ylabel="Positive count",
        #               category='Test_Positive', overlap=True, alpha=0.5)
        # sft.plot_data(np.array(negative_list)[:, 0], np.array(negative_list)[:, 1],
        #               label1="Actual",
        #               label2="Expected",
        #               title="Test Negative", xlabel="Day",
        #               ylabel="Negative count",
        #               category='Test_Negative', overlap=True, alpha=0.5)
        outfile.write(sft.format_success_msg(success))
        if debug:
            print(sft.format_success_msg(success))
        return success
Ejemplo n.º 4
0
def create_report_file(drug_start_timestep,
                       inactivation_times,
                       active_count,
                       inactivations,
                       drug_inactivation_rate,
                       report_name,
                       debug=False):
    with open(report_name, "w") as outfile:
        success = True
        # ks exponential test doesn't work very well with large rate, use chi squared test instead.
        # while rate is small ks test for exponential distribution is more sensitive to catch the difference
        if drug_inactivation_rate < 0.1:
            outfile.write(
                "Testing inactivation times as draws from exponential distrib with rate {0}. "
                "Dataset size = {1}.\n".format(drug_inactivation_rate,
                                               len(inactivation_times)))
            success = sft.test_exponential(inactivation_times,
                                           drug_inactivation_rate,
                                           outfile,
                                           integers=True,
                                           roundup=True,
                                           round_nearest=False)
            if not success:
                outfile.write("BAD: ks test for rate {} is False.\n".format(
                    drug_inactivation_rate))
            size = len(inactivation_times)
            scale = 1.0 / drug_inactivation_rate
            dist_exponential_np = numpy.random.exponential(scale, size)
            dist_exponential_np = [math.ceil(x) for x in dist_exponential_np]
            sft.plot_data_sorted(inactivation_times,
                                 dist_exponential_np,
                                 label1="test times",
                                 label2="numpy data",
                                 title="inactivation_times_actual_vs_numpy",
                                 xlabel="data points",
                                 ylabel="Inactivation times",
                                 category="inactivation_times",
                                 show=True,
                                 line=True,
                                 overlap=True)
            sft.plot_cdf(inactivation_times,
                         dist_exponential_np,
                         label1="test times",
                         label2="numpy data",
                         title="inactivation_times_cdf",
                         xlabel="days",
                         ylabel="probability",
                         category="inactivation_times_cdf",
                         show=True)
            sft.plot_probability(inactivation_times,
                                 dist_exponential_np,
                                 label1="test times",
                                 label2="numpy data",
                                 title="inactivation_times_pdf",
                                 xlabel="days",
                                 ylabel="probability",
                                 category="inactivation_times_pdf",
                                 show=True)
        else:
            outfile.write(
                "Testing inactivation count per day with rate {0}. \n".format(
                    drug_inactivation_rate))
            expected_inactivation = []
            for t in range(len(inactivations)):
                if t < drug_start_timestep:
                    if inactivations[t] > 0:
                        success = False
                        outfile.write(
                            "BAD: expected no inactivations on drugs before day {0}, get {1} cases at timestep {2}.\n"
                            "".format(drug_start_timestep, inactivations[t],
                                      t))
                elif active_count[t] > 0:
                    expected_inactivation.append(drug_inactivation_rate *
                                                 active_count[t])
            if len(inactivations
                   ) <= len(expected_inactivation) + drug_start_timestep:
                test_inactivation_dates = inactivations[drug_start_timestep +
                                                        1:]
                expected_inactivation = expected_inactivation[:len(
                    test_inactivation_dates)]
            else:
                test_inactivation_dates = inactivations[
                    drug_start_timestep + 1:drug_start_timestep + 1 +
                    len(expected_inactivation)]
            #print (len(inactivations), len(test_inactivation_dates), len(expected_inactivation))
            #print (test_inactivation_dates, expected_inactivation)
            sft.plot_data(test_inactivation_dates,
                          expected_inactivation,
                          label1="actual inactivation",
                          label2="expected inactivation",
                          title="inactivation per day",
                          xlabel="date after drug start day",
                          ylabel="inactivation per day",
                          category="inactivation_counts",
                          show=True,
                          line=True,
                          overlap=True,
                          sort=False)

            chi_result = sft.test_multinomial(
                dist=test_inactivation_dates,
                proportions=expected_inactivation,
                report_file=outfile,
                prob_flag=False)
            if not chi_result:
                success = False
                outfile.write("BAD: Chi-squared test reuslt is False.\n")
        outfile.write(sft.format_success_msg(success))
        if debug:
            print(sft.format_success_msg(success))
        return success
Ejemplo n.º 5
0
def create_report_file(param_obj, node_list, campaign_obj, migration_df,
                       report_data_obj, stdout_filename, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        base_infectivity = param_obj[KEY_BASE_INFECTIVITY]
        start_day = campaign_obj[KEY_START_DAY]
        new_infection = report_data_obj[KEY_NEW_INFECTION]
        immunity_acquisition_factor = param_obj[
            KEY_IMMUNITY_ACQUISITION_FACTOR]
        decay_rate = param_obj[KEY_DECAY_RATE]

        outfile.write("checking some test conditions:\n")
        outfile.write("  -- simulation duration: {} days\n".format(
            len(new_infection)))
        if len(new_infection) < start_day + 1 + sft.DAYS_IN_YEAR:
            success = False
            outfile.write(
                "BAD: the simulation duration is too short, please make sure it's at least {} days.\n"
                .format(start_day + 1 + sft.DAYS_IN_YEAR))

        result = tms.check_test_condition(param_obj[KEY_NUM_CORES], node_list,
                                          migration_df, outfile)
        if not result:
            success = False
            # summary message is writen to the report file in the check_test_condition function

        number_of_month = 1
        outfile.write(
            "calculate expected number of infections for a time period of {} month("
            "unit is 1/years):\n".format((number_of_month)))
        t_initial = 0
        expected = []
        decay_rate *= sft.DAYS_IN_YEAR
        base_infectivity *= sft.DAYS_IN_YEAR
        step = number_of_month / sft.MONTHS_IN_YEAR
        for t_final in np.arange(step, 1.01, step):
            expected_new_infection = base_infectivity * (
                t_final - t_initial) - base_infectivity * (
                    1.0 - immunity_acquisition_factor) / decay_rate * math.exp(
                        -1 * decay_rate *
                        t_initial) * (1.0 - math.exp(-1 * decay_rate *
                                                     (t_final - t_initial)))
            expected_new_infection *= len(node_list)
            expected.append(expected_new_infection)
            t_initial = t_final
        # group new infections for every month:
        value_to_test = []
        outfile.write(
            "running chi-squared test for actual vs expected new infections for {0} {1}-months time bins: \n"
            "base_infectivity = {2}, immunity_acquisition_factor = {3}, decay rate = {4}.(unit is 1/years)\n"
            .format(sft.MONTHS_IN_YEAR // number_of_month, number_of_month,
                    base_infectivity, immunity_acquisition_factor, decay_rate))
        actual_new_infection = 0
        i = 0
        for t in range(start_day + 1, len(new_infection)):
            actual_new_infection += new_infection[t]
            i += 1
            if not i % (number_of_month * sft.DAYS_IN_MONTH):
                value_to_test.append(actual_new_infection)
                actual_new_infection = 0
        sft.plot_data(
            value_to_test,
            dist2=expected,
            label1="actual_new_infections",
            label2="expected_new_infection",
            title="actual vs. expected new infection for every {} month".
            format(number_of_month),
            xlabel="month",
            ylabel="# of new infections",
            category='actual_vs_expected_new_infections',
            show=True,
            line=False)
        result = sft.test_multinomial(dist=value_to_test,
                                      proportions=expected,
                                      report_file=outfile,
                                      prob_flag=False)

        if not result:
            success = False
            outfile.write(
                "BAD: The Chi-squared test for number of new infections in every {} months failed.\n"
                .format(number_of_month))
        else:
            outfile.write(
                "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n"
                .format(number_of_month))

        output_dict = parse_output_file(stdout_filename, debug)
        outfile.write(
            "checking if all cores are reporting in every time step in stdout file:\n"
        )
        core_list = [str(n) for n in (range(param_obj[KEY_NUM_CORES]))]
        for t, cores in output_dict.items():
            if core_list != sorted(cores):
                success = False
                outfile.write(
                    "BAD: at time step {0}, these cores reported to stdout.txt are: {1}, while "
                    "expected cores are: {2}.\n".format(t, cores, core_list))

        outfile.write(sft.format_success_msg(success))

    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success
Ejemplo n.º 6
0
def create_report_file(param_obj, campaign_obj, report_data_obj, report_name,
                       debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        base_infectivity = param_obj[KEY_BASE_INFECTIVITY]
        start_day = campaign_obj[KEY_START_DAY]
        new_infection = report_data_obj[KEY_NEW_INFECTION]
        immunity_acquisition_factor = param_obj[
            KEY_IMMUNITY_ACQUISITION_FACTOR]
        decay_rate = param_obj[KEY_DECAY_RATE]
        # calculate expected number of infections for a time period of 1 month:
        # unit is 1/years
        number_of_month = 1
        t_initial = 0
        expected = []
        decay_rate *= sft.DAYS_IN_YEAR
        base_infectivity *= sft.DAYS_IN_YEAR
        step = number_of_month / sft.MONTHS_IN_YEAR
        for t_final in np.arange(step, 1.01, step):
            expected_new_infection = base_infectivity * (
                t_final - t_initial) - base_infectivity * (
                    1.0 - immunity_acquisition_factor) / decay_rate * math.exp(
                        -1 * decay_rate *
                        t_initial) * (1.0 - math.exp(-1 * decay_rate *
                                                     (t_final - t_initial)))
            expected.append(expected_new_infection)
            t_initial = t_final
        # group new infections for every month:
        value_to_test = []
        if len(new_infection) < start_day + 1 + sft.DAYS_IN_YEAR:
            success = False
            outfile.write(
                "BAD: the simulation duration is too short, please make sure it's at least {} days.\n"
                .format(start_day + 1 + sft.DAYS_IN_YEAR))
        outfile.write(
            "running chi-squared test for expected new infections for {0} {1}-months time bins: \n"
            "base_infectivity = {2}, immunity_acquisition_factor = {3}, decay rate = {4}.(unit is 1/years)\n"
            .format(sft.MONTHS_IN_YEAR // number_of_month, number_of_month,
                    base_infectivity, immunity_acquisition_factor, decay_rate))
        actual_new_infection = 0
        i = 0
        for t in range(start_day + 1, len(new_infection)):
            actual_new_infection += new_infection[t]
            i += 1
            if not i % (number_of_month * sft.DAYS_IN_MONTH):
                value_to_test.append(actual_new_infection)
                actual_new_infection = 0
        sft.plot_data(
            value_to_test,
            dist2=expected,
            label1="actual_new_infections",
            label2="expected_new_infection",
            title="actual vs. expected new infection for every {} month".
            format(number_of_month),
            xlabel="month",
            ylabel="# of new infections",
            category='actual_vs_expected_new_infections',
            show=True,
            line=False)
        result = sft.test_multinomial(dist=value_to_test,
                                      proportions=expected,
                                      report_file=outfile,
                                      prob_flag=False)

        if not result:
            success = False
            outfile.write(
                "BAD: The Chi-squared test for number of new infections in every {} months failed.\n"
                .format(number_of_month))
        else:
            outfile.write(
                "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n"
                .format(number_of_month))

        outfile.write(sft.format_success_msg(success))

    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success
Ejemplo n.º 7
0
def create_report_file(param_obj, campaign_obj, property_obj, property_df,
                       stdout_df, recorder_obj, report_name,
                       report_event_recorder, stdout_filename, debug):
    with open(report_name, "w") as outfile:
        for name, param in param_obj.items():
            outfile.write("{0} = {1}\n".format(name, param))
        sample_threshold = float(campaign_obj[
            CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold])
        base_sensitivity = float(campaign_obj[
            CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity])
        base_specificity = float(campaign_obj[
            CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity])
        ip_key_value = campaign_obj[
            CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value]
        outfile.write("{0} = {1}\n".format(
            CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold,
            sample_threshold))
        outfile.write("{0} = {1}\n".format(
            CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity,
            base_sensitivity))
        outfile.write("{0} = {1}\n".format(
            CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity,
            base_specificity))
        outfile.write("{0} = {1}\n".format(
            CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value,
            ip_key_value))
        success = recorder_obj[1]

        if not success:
            error_message = recorder_obj[0]
            outfile.write(
                "Failed to parse report file: {0}, get exception: {1}.\n".
                format(report_event_recorder, error_message))
        else:
            # Throw warning messages for condition checks. Make sure all features are enabled.
            if not sample_threshold:
                outfile.write(
                    "WARNING: {0} should not be 0 in this test, got {1} from compaign file. Please fix the test.\n"
                    "".format(
                        CampaignKeys.EnvironmentalDiagnosticKeys.
                        Sample_Threshold, sample_threshold))
            if base_specificity == 1:
                outfile.write(
                    "WARNING: the {0} is {1}, expected value is less than 1.\n"
                    .format(
                        CampaignKeys.EnvironmentalDiagnosticKeys.
                        Base_Specificity, base_specificity))
            if base_sensitivity == 1:
                outfile.write(
                    "WARNING: the {0} is {1}, expected value is less than 1.\n"
                    .format(
                        CampaignKeys.EnvironmentalDiagnosticKeys.
                        Base_Sensitivity, base_sensitivity))
            if not ip_key_value:
                outfile.write(
                    "WARNING: {0} should not be empty in this test, got '{1}' from compaign file. Please fix the test.\n"
                    "".format(
                        CampaignKeys.EnvironmentalDiagnosticKeys.
                        Environment_IP_Key_Value, ip_key_value))

            duration = param_obj[ConfigKeys.Simulation_Duration]
            base_infectivity = param_obj[ConfigKeys.Base_Infectivity]

            positive_list = []
            negative_list = []
            positive_event_list = []
            negative_event_list = []

            # get infected and stat_pop channels for the selected IP group from property report
            infected_ip_group_list = property_df[[
                c for c in property_df.columns
                if Diagnostic_Support.channels[0] in c
            ]]
            stat_pop_ip_group_list = property_df[[
                c for c in property_df.columns
                if Diagnostic_Support.channels[-1] in c
            ]]

            # group by time and event name, then count how many event in each time step and put the value into a new
            # column named: "Test result counts"
            event_df = recorder_obj[0]
            event_df = event_df.groupby([
                Diagnostic_Support.ReportColumn.time,
                Diagnostic_Support.ReportColumn.event
            ]).size().reset_index()
            event_df.rename(
                columns={0: Diagnostic_Support.ReportColumn.counts},
                inplace=True)

            contagion_list = []
            expected_positive_count = expected_negative_count = 0
            for t in range(1, duration):
                # Test 1: make sure we get the correct contagion sample and
                # number of positive and negative results in StdOut.txt
                stdout_t_df = stdout_df[stdout_df[
                    Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t]

                #stdout_next_t_df = stdout_df[stdout_df[Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t + 1]

                infected = stdout_t_df[
                    Diagnostic_Support.Stdout.infected].iloc[0]
                stat_pop = stdout_t_df[
                    Diagnostic_Support.Stdout.stat_pop].iloc[0]

                test_positive = stdout_t_df[
                    Diagnostic_Support.Stdout.test_positive].iloc[0]
                test_negative = stdout_t_df[
                    Diagnostic_Support.Stdout.test_negative].iloc[0]
                test_default = stdout_t_df[
                    Diagnostic_Support.Stdout.test_default].iloc[0]
                envi_sample = stdout_t_df[
                    Diagnostic_Support.Stdout.sample].iloc[0]
                ip = stdout_t_df[Diagnostic_Support.Stdout.ip_value].iloc[0]

                infected_ip_group = infected_ip_group_list.iloc[t - 1][0]
                stat_pop_ip_group = stat_pop_ip_group_list.iloc[t - 1][0]
                if stat_pop == stat_pop_ip_group and infected == infected_ip_group:
                    success = False
                    outfile.write(
                        "BAD: at time step {0} the total stat_pop = {1} and total infect = {2}, we got "
                        "stat_pop_ip_group = {3} and infected_ip_group = {4} in group {5}, we expect to "
                        "see less stat_pop and infected individual in the IP group , this is not a valid test "
                        "for Environment_IP_Key_Value, please check the test condition.\n"
                        .format(t, stat_pop, infected, stat_pop_ip_group,
                                infected_ip_group, ip_key_value))
                if ip_key_value != ip:
                    success = False
                    outfile.write(
                        "BAD: at time step {0}, IP={1} from StdOut.txt, expected IP={2}.\n"
                        .format(t, ip, ip_key_value))
                susceptible = stat_pop_ip_group - infected_ip_group
                message = "BAD: at time {0}, group {1} has infected individuals = {2} and susceptible individuals = {3}," \
                " expected {4} individuals receive a {5} test result, got {6} from logging.\n"

                # calculated environmental contagion
                contagion = base_infectivity * infected_ip_group / stat_pop_ip_group
                contagion_list.append(contagion)
                if math.fabs(contagion - envi_sample) > envi_sample * 1e-2:
                    success = False
                    outfile.write(
                        "BAD: at time step {0} the environmental sample for IP group {1} is {2}, expected value is {3}"
                        ".\n".format(t, ip_key_value, envi_sample, contagion))
                # positive = real positive or false positive
                # negative = false negative or real negative
                if contagion > sample_threshold:
                    expected_test_positive = base_sensitivity
                    expected_test_negative = 1.0 - base_sensitivity
                else:
                    expected_test_positive = 1.0 - base_specificity
                    expected_test_negative = base_specificity

                expected_positive_count += expected_test_positive
                expected_negative_count += expected_test_negative

                # no test default in this intervention
                expected_test_default = 0
                if test_default != expected_test_default:
                    success = False
                    outfile.write(
                        message.format(t, ip_key_value, infected_ip_group,
                                       susceptible, expected_test_default,
                                       "default", test_default))
                positive_list.append([test_positive, expected_test_positive])
                negative_list.append([test_negative, expected_test_negative])
                # End of Test 1 at this time step

                # Test 2: make sure events reported in ReportEventRecorder.csv and test results from StdOut.txt are matched.
                message = "BAD: at time {0}, {1} records {2} {3} events, got {4} {5} results from {5}.\n"

                # get the positive event count from data frame
                positive_event = event_df[
                    (event_df[Diagnostic_Support.ReportColumn.time] == t)
                    & (event_df[Diagnostic_Support.ReportColumn.event] ==
                       Diagnostic_Support.ReportColumn.positive)][
                           Diagnostic_Support.ReportColumn.counts].values
                if len(positive_event):
                    positive_event = positive_event[0]
                else:
                    positive_event = 0

                # StdOut.txt should match ReportEventRecorder.csv
                if test_positive != positive_event:
                    success = False
                    outfile.write(
                        message.format(
                            t, report_event_recorder, positive_event,
                            Diagnostic_Support.ReportColumn.positive,
                            test_positive,
                            Diagnostic_Support.Stdout.test_positive,
                            stdout_filename))

                # get the negative event count from data frame
                negative_event = event_df[
                    (event_df[Diagnostic_Support.ReportColumn.time] == t)
                    & (event_df[Diagnostic_Support.ReportColumn.event] ==
                       Diagnostic_Support.ReportColumn.negative)][
                           Diagnostic_Support.ReportColumn.counts].values
                if len(negative_event):
                    negative_event = negative_event[0]
                else:
                    negative_event = 0

                # StdOut.txt should match ReportEventRecorder.csv
                if test_negative != negative_event:
                    success = False
                    outfile.write(
                        message.format(
                            t, report_event_recorder, negative_event,
                            Diagnostic_Support.ReportColumn.negative,
                            test_negative,
                            Diagnostic_Support.Stdout.test_negative,
                            stdout_filename))

                positive_event_list.append(positive_event)
                negative_event_list.append(negative_event)
                # End of Test 2 at this time step

            stdout_sum = stdout_df.sum()

            result = sft.test_multinomial(
                [
                    stdout_sum[Diagnostic_Support.Stdout.test_positive],
                    stdout_sum[Diagnostic_Support.Stdout.test_negative]
                ],
                proportions=[expected_positive_count, expected_negative_count],
                report_file=outfile,
                prob_flag=False,
            )

            message = "{0}: the total test positive and negative counts from StdOut.txt are {1} and {2}, expected values" \
                      " are {3} and {4}.\n"

            if result:
                outfile.write(
                    message.format(
                        "GOOD",
                        stdout_sum[Diagnostic_Support.Stdout.test_positive],
                        stdout_sum[Diagnostic_Support.Stdout.test_negative],
                        expected_positive_count, expected_negative_count))
            else:
                success = False
                outfile.write(
                    message.format(
                        "BAD",
                        stdout_sum[Diagnostic_Support.Stdout.test_positive],
                        stdout_sum[Diagnostic_Support.Stdout.test_negative],
                        expected_positive_count, expected_negative_count))

            # these two plots are replaced with the scatter with fit line plots
            # sft.plot_data(np.array(positive_list)[:, 0], np.array(positive_list)[:, 1],
            #                   label1="Actual",
            #                   label2="Probability of Positive",
            #                   title="Test Positive\n Group {}".format(ip_key_value), xlabel="Day",
            #                   ylabel="Positive count",
            #                   category='Test_Positive_Probability', overlap=False)
            # sft.plot_data(np.array(negative_list)[:, 0], np.array(negative_list)[:, 1],
            #                   label1="Actual",
            #                   label2="Probability of Negative",
            #                   title="Test Negative\n Group {}".format(ip_key_value), xlabel="Day",
            #                   ylabel="Negative count",
            #                   category='Test_Negative_Probability', overlap=False)

            sft.plot_scatter_fit_line(
                np.array(positive_list)[:, 0],
                dist2=np.array(positive_list)[:, 1],
                label1="Actual",
                label2="Probability of Positive",
                title="Test Positive\n Group {}".format(ip_key_value),
                xlabel="Day",
                ylabel="Positive count",
                category='Test_Positive_Probability_Scatter_Fit_Line')

            sft.plot_scatter_fit_line(
                np.array(negative_list)[:, 0],
                dist2=np.array(negative_list)[:, 1],
                label1="Actual",
                label2="Probability of Negative",
                title="Test Negative\n Group {}".format(ip_key_value),
                xlabel="Day",
                ylabel="Negative count",
                category='Test_Negative_Probability_Scatter_Fit_Line')

            sft.plot_data(
                np.array(positive_list)[:, 0],
                positive_event_list,
                label1=stdout_filename,
                label2=report_event_recorder,
                title="Test Positive\n Group {}".format(ip_key_value),
                xlabel="Day",
                ylabel="Positive count",
                category='Test_Positive_stdout_vs_event_recorder',
                overlap=True,
                alpha=0.5)
            sft.plot_data(
                np.array(negative_list)[:, 0],
                negative_event_list,
                label1=stdout_filename,
                label2=report_event_recorder,
                title="Test Negative\n Group {}".format(ip_key_value),
                xlabel="Day",
                ylabel="Negative count",
                category='Test_Negative_stdout_vs_event_recorder',
                overlap=True,
                alpha=0.5)
            sft.plot_data(
                stdout_df[Diagnostic_Support.Stdout.sample].tolist()[1:],
                contagion_list,
                label1="Actual",
                label2="Expected",
                title="Environmental_Contagion",
                xlabel="Day",
                ylabel="Environmental_Contagion",
                category='Environmental_Contagion',
                overlap=True,
                alpha=0.5)
        outfile.write(sft.format_success_msg(success))
        if debug:
            print(sft.format_success_msg(success))
        return success
Ejemplo n.º 8
0
def create_report_file(param_obj, campaign_obj, output_dict, report_dict,
                       report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        sensitivity = campaign_obj[KEY_BASE_SENSITIVITY]
        treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION]
        proportions = [
            sensitivity * treatment_fraction,
            (1.0 - sensitivity) * treatment_fraction, 1.0 - treatment_fraction
        ]
        positive = []
        negative = []
        default = []
        total = []
        failed_timestep = []

        point_fail = 0
        point_tolerance = 0.3
        if not len(report_dict):
            success = False
            outfile.write(sft.sft_no_test_data)
        for t in report_dict:
            value_to_test = [
                report_dict[t][KEY_POSITIVE], report_dict[t][KEY_NEGATIVE],
                report_dict[t][KEY_DEFAULT]
            ]
            positive.append(report_dict[t][KEY_POSITIVE])
            negative.append(report_dict[t][KEY_NEGATIVE])
            default.append(report_dict[t][KEY_DEFAULT])
            total.append(sum(value_to_test))
            outfile.write("Run Chi-squared test at time step {}.\n".format(t))
            result = sft.test_multinomial(dist=value_to_test,
                                          proportions=proportions,
                                          report_file=outfile)
            if not result:
                point_fail += 1
                failed_timestep.append(t)
                outfile.write(
                    "Warning: At timestep {0}, the Chi-squared test failed.\n".
                    format(t))
        if len(failed_timestep) > math.ceil(0.05 * len(report_dict)):
            success = False
            outfile.write(
                "BAD: the Chi-squared test failed at timestep {0}.\n".format(
                    ', '.join(str(x) for x in failed_timestep)))
        else:
            outfile.write(
                "GOOD: the Chi-squared test failed {} times, less than 5% of the total timestep.\n"
                .format(len(failed_timestep)))
        outfile.write(
            "BIG TEST: Testing the total proportion across the simulation\n")
        total_result = sft.test_multinomial(
            dist=[sum(positive), sum(negative),
                  sum(default)],
            proportions=proportions,
            report_file=outfile)
        if not total_result:
            success = False
            outfile.write("FAIL: the total chi-square test fails.\n")

        sft.plot_data(
            positive,
            dist2=total,
            label1="TBTestPositive",
            label2="Total tested",
            title="Test positive vs. total, positive proportion = {}".format(
                sensitivity * treatment_fraction),
            xlabel="time step",
            ylabel="# of individuals",
            category='Test_positive_vs_total',
            show=True,
            line=False)
        sft.plot_data(
            negative,
            dist2=total,
            label1="TBTestNegative",
            label2="Total tested",
            title="Test negative vs. total, negative proportion = {}".format(
                (1.0 - sensitivity) * treatment_fraction),
            xlabel="time step",
            ylabel="# of individuals",
            category='Test_negative_vs_total',
            show=True,
            line=False)
        sft.plot_data(
            default,
            dist2=total,
            label1="TBTestDefault",
            label2="Total tested",
            title="Test default vs. total, default proportion = {}".format(
                1.0 - treatment_fraction),
            xlabel="time step",
            ylabel="# of individuals",
            category='Test_default_vs_total',
            show=True,
            line=False)
        # TODO: write test to check if report matches debug logging. Pending on #2279. May not need this part.
        outfile.write(sft.format_success_msg(success))
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success
Ejemplo n.º 9
0
def create_report_file(param_obj, campaign_obj, output_dict, report_dict,
                       report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        sensitivity = campaign_obj[KEY_BASE_SENSITIVITY]
        treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION]
        treatment_fraction_negative_diagnosis = campaign_obj[
            KEY_TREATMENT_FRACTION_NEGATIVE_DIAGNOSIS]
        proportions = [
            sensitivity * treatment_fraction,
            (1.0 - sensitivity) * treatment_fraction_negative_diagnosis,
            (1.0 - sensitivity) *
            (1.0 - treatment_fraction_negative_diagnosis) + sensitivity *
            (1.0 - treatment_fraction)
        ]
        total_proportion = sum(proportions)
        positive = []
        negative = []
        default = []
        total = []
        pass_count = 0
        fail_count = 0
        for t in report_dict:
            value_to_test = [
                report_dict[t][KEY_MDR_POSITIVE],
                report_dict[t][KEY_MDR_NEGATIVE],
                report_dict[t][KEY_MDR_DEFAULT]
            ]
            positive.append(value_to_test[0])
            negative.append(value_to_test[1])
            default.append(value_to_test[2])
            total.append(int(sum(value_to_test) / total_proportion))
            outfile.write(f"Timestep {t} Chi-squared test: {value_to_test[0]} positive, {value_to_test[1]} negative," \
                          f" {value_to_test[2]} default.\n")
            result = sft.test_multinomial(dist=value_to_test,
                                          proportions=proportions,
                                          report_file=outfile)
            if not result:
                # success = False
                fail_count += 1
                outfile.write(
                    "BAD: At timestep {0}, the Chi-squared test failed.\n".
                    format(t))
            else:
                pass_count += 1

        point_count = pass_count + fail_count
        point_check_tolerance = 0.2
        if fail_count / point_count > point_check_tolerance:
            success = False
            outfile.write(
                f"FAIL: more than {point_check_tolerance} of the points checked failed validation.\n"
            )
        positives = sum(positive)
        negatives = sum(negative)
        defaults = sum(default)
        outfile.write(
            "BIG TEST: testing all of the diagnoses in the sim next!\n")
        sum_result = sft.test_multinomial(
            dist=[positives, negatives, defaults],
            proportions=proportions,
            report_file=outfile)
        if not sum_result:
            success = False
            outfile.write("FAIL: the sum chi-square test fails.\n")

        sft.plot_data(
            positive,
            dist2=total,
            label1="TBMDRTestPositive",
            label2="Total tested",
            title="MDR Test positive vs. total, positive proportion = {}".
            format(proportions[0]),
            xlabel="time step",
            ylabel="# of individuals",
            category='MDR_Test_positive_vs_total',
            show=True,
            line=False)
        sft.plot_data(
            negative,
            dist2=total,
            label1="TBMDRTestNegative",
            label2="Total tested",
            title="MDR Test negative vs. total, negative proportion = {}".
            format(proportions[1]),
            xlabel="time step",
            ylabel="# of individuals",
            category='MDR_Test_negative_vs_total',
            show=True,
            line=False)
        sft.plot_data(
            default,
            dist2=total,
            label1="TBMDRTestDefault",
            label2="Total tested",
            title="MDR Test default vs. total, default proportion = {}".format(
                proportions[2]),
            xlabel="time step",
            ylabel="# of individuals",
            category='MDR_Test_default_vs_total',
            show=True,
            line=False)
        outfile.write(sft.format_success_msg(success))
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success
def create_report_file(param_obj, campaign_obj, stdout_df, report_name, debug):
    with open(report_name, "w") as outfile:
        for name, param in param_obj.items():
            outfile.write("{0} = {1}\n".format(name, param))
        success = True
        sample_threshold = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold])
        base_sensitivity = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity])
        base_specificity = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity])
        ip_key_value = campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value]
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold))
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity))
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity))
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value,
                                           ip_key_value))

        if sample_threshold:
            outfile.write("WARNING: {0} should be 0 in this test, got {1} from compaign file. Please fix the test.\n"
                          "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold))
        if base_specificity != 1:
            outfile.write("WARNING: the {0} is {1}, expected value is 1.\n".format(
                CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity))
        if ip_key_value:
            success = False
            outfile.write("BAD: {0} should be empty in this test, got {1} from compaign file. Please fix the test.\n"
                          "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value, ip_key_value))

        duration = param_obj[ConfigKeys.Simulation_Duration]
        base_infectivity = param_obj[ConfigKeys.Base_Infectivity]
        expected_positive_count = expected_negative_count = 0
        contagion_list = []
        contagion = 0

        for t in range(1, duration):
            stdout_t_df = stdout_df[stdout_df[Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t]
            infected = stdout_t_df[Diagnostic_Support.Stdout.infected].iloc[0]
            stat_pop = stdout_t_df[Diagnostic_Support.Stdout.stat_pop].iloc[0]
            envi_sample = stdout_t_df[Diagnostic_Support.Stdout.sample].iloc[0]

            # calculated environmental contagion for next time step
            contagion = base_infectivity * infected /stat_pop
            if math.fabs(contagion - envi_sample) > envi_sample * 1e-2:
                success = False
                outfile.write("BAD: at time step {0} the environmental sample is {1}, expected value is {2}.\n".format(
                    t, envi_sample, contagion
                ))
            if contagion > sample_threshold:
                expected_test_positive = base_sensitivity
                expected_test_negative = 1.0 - base_sensitivity
            else:
                expected_test_positive = 1.0 - base_specificity
                expected_test_negative = base_specificity

            contagion_list.append(contagion)

            expected_positive_count += expected_test_positive
            expected_negative_count += expected_test_negative

        stdout_sum = stdout_df.sum()

        result = sft.test_multinomial([stdout_sum[Diagnostic_Support.Stdout.test_positive],
                                  stdout_sum[Diagnostic_Support.Stdout.test_negative]],
                                 proportions=[expected_positive_count, expected_negative_count],
                                 report_file=outfile,
                                 prob_flag=False, )

        message = "{0}: the total test positive and negative counts from StdOut.txt are {1} and {2}, expected values" \
                  " are {3} and {4}.\n"

        if result:
            outfile.write(message.format("GOOD", stdout_sum[Diagnostic_Support.Stdout.test_positive],
                                         stdout_sum[Diagnostic_Support.Stdout.test_negative],
                                         expected_positive_count, expected_negative_count))
        else:
            success = False
            outfile.write(message.format("BAD", stdout_sum[Diagnostic_Support.Stdout.test_positive],
                                         stdout_sum[Diagnostic_Support.Stdout.test_negative],
                                         expected_positive_count, expected_negative_count))

        sft.plot_data(stdout_df[Diagnostic_Support.Stdout.sample].tolist()[1:], contagion_list,
                          label1="Actual",
                          label2="Expected",
                          title="Environmental_Contagion", xlabel="Day",
                          ylabel="Environmental_Contagion",
                          category='Environmental_Contagion', overlap=True, alpha=0.5)

        # positive_list = []
        # negative_list = []
        # for t in range(1, duration):
        #     infected = stdout_df[Stdout.infected].iloc[t]
        #     stat_pop = stdout_df[Stdout.stat_pop].iloc[t]
        #     test_positive = stdout_df[Stdout.test_positive].iloc[t]
        #     test_negative = stdout_df[Stdout.test_negative].iloc[t]
        #     test_default = stdout_df[Stdout.test_default].iloc[t]
        #
        #     susceptible = stat_pop - infected
        #     message = "BAD: at time {0}, total infected individuals = {1} and total susceptible individuals = {2}, " \
        #               "expected {3} ± {4} individuals receive a {5} test result, got {6} from logging.\n"
        #
        #     expected_test_positive = infected * base_sensitivity + susceptible * (1.0 - base_specificity)
        #     if math.fabs(test_positive - expected_test_positive) > 5e-2 * expected_test_positive:
        #         success = False
        #         outfile.write(message.format(
        #             t, infected, susceptible, expected_test_positive, 5e-2 * expected_test_positive, "positive",
        #             test_positive))
        #
        #     expected_test_negative = infected * (1.0 - base_sensitivity) + susceptible * base_specificity
        #     if math.fabs(test_negative - expected_test_negative) > 5e-2 * expected_test_negative:
        #         success = False
        #         outfile.write(message.format(
        #             t, infected, susceptible, expected_test_negative, 5e-2 * expected_test_negative, "negative",
        #             test_negative))
        #
        #     expected_test_default = 0
        #     if test_default != expected_test_default:
        #         success = False
        #         outfile.write(message.format(
        #                 t, infected, susceptible, expected_test_default, 0, "default", test_default))
        #
        #     positive_list.append([test_positive, expected_test_positive])
        #     negative_list.append([test_negative, expected_test_negative])
        # sft.plot_data(np.array(positive_list)[:, 0], np.array(positive_list)[:, 1],
        #               label1="Actual",
        #               label2="Expected",
        #               title="Test Positive", xlabel="Day",
        #               ylabel="Positive count",
        #               category='Test_Positive', overlap=True, alpha=0.5)
        # sft.plot_data(np.array(negative_list)[:, 0], np.array(negative_list)[:, 1],
        #               label1="Actual",
        #               label2="Expected",
        #               title="Test Negative", xlabel="Day",
        #               ylabel="Negative count",
        #               category='Test_Negative', overlap=True, alpha=0.5)
        outfile.write(sft.format_success_msg(success))
        if debug:
            print(sft.format_success_msg(success))
        return success
def create_report_file(param_obj, intervention_obj, outbreak_obj, stdout_df, report_name, debug):
    with open(report_name, "w") as outfile:
        for name, param in param_obj.items():
            outfile.write("{0} = {1}\n".format(name, param))
        success = True
        sample_threshold = float(intervention_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold])
        base_sensitivity = float(intervention_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity])
        base_specificity = float(intervention_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity])
        ip_key_value = intervention_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value]
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold))
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity))
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity))
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value,
                                           ip_key_value))

        # make sure no outbreak in this test, so the sample should not be greater than threshold at all time
        if outbreak_obj:
            success = False
            outfile.write("BAD: Campaign.json should not have {0}, got {1} from the json file.\n".format(
                CampaignKeys.InterventionClassKeys.OutbreakIndividual, outbreak_obj
            ))
        if sample_threshold:
            outfile.write("WARNING: {0} should be 0 in this test, got {1} from compaign file. Please fix the test.\n"
                          "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold))
        if base_sensitivity != 1:
            outfile.write("WARNING: the {0} is {1}, expected value is 1.\n".format(
                CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity))
        if ip_key_value:
            success = False
            outfile.write("BAD: {0} should be empty in this test, got {1} from compaign file. Please fix the test.\n"
                          "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value, ip_key_value))

        duration = param_obj[ConfigKeys.Simulation_Duration]
        base_infectivity = param_obj[ConfigKeys.Base_Infectivity]
        expected_positive_count = expected_negative_count = 0
        contagion_list = []
        contagion = 0
        for t in range(1, duration):
            stdout_t_df = stdout_df[stdout_df[Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t]
            infected = stdout_t_df[Diagnostic_Support.Stdout.infected].iloc[0]
            stat_pop = stdout_t_df[Diagnostic_Support.Stdout.stat_pop].iloc[0]
            envi_sample = stdout_t_df[Diagnostic_Support.Stdout.sample].iloc[0]

            # calculated environmental contagion
            contagion = base_infectivity * infected /stat_pop
            if math.fabs(contagion - envi_sample) > envi_sample * 1e-2:
                success = False
                outfile.write("BAD: at time step {0} the environmental sample is {1}, expected value is {2}.\n".format(
                    t, envi_sample, contagion
                ))
            if contagion > sample_threshold:
                expected_test_positive = base_sensitivity
                expected_test_negative = 1.0 - base_sensitivity
            else:
                expected_test_positive = 1.0 - base_specificity
                expected_test_negative = base_specificity
            contagion_list.append(contagion)

            expected_positive_count += expected_test_positive
            expected_negative_count += expected_test_negative

        stdout_sum = stdout_df.sum()

        result = sft.test_multinomial([stdout_sum[Diagnostic_Support.Stdout.test_positive],
                                  stdout_sum[Diagnostic_Support.Stdout.test_negative]],
                                 proportions=[expected_positive_count, expected_negative_count],
                                 report_file=outfile,
                                 prob_flag=False, )

        message = "{0}: the total test positive and negative counts from StdOut.txt are {1} and {2}, expected values" \
                  " are {3} and {4}.\n"

        if result:
            outfile.write(message.format("GOOD", stdout_sum[Diagnostic_Support.Stdout.test_positive],
                                         stdout_sum[Diagnostic_Support.Stdout.test_negative],
                                         expected_positive_count, expected_negative_count))
        else:
            success = False
            outfile.write(message.format("BAD", stdout_sum[Diagnostic_Support.Stdout.test_positive],
                                         stdout_sum[Diagnostic_Support.Stdout.test_negative],
                                         expected_positive_count, expected_negative_count))

        sft.plot_data(stdout_df[Diagnostic_Support.Stdout.sample].tolist()[1:], contagion_list,
                          label1="Actual",
                          label2="Expected",
                          title="Environmental_Contagion", xlabel="Day",
                          ylabel="Environmental_Contagion",
                          category='Environmental_Contagion', overlap=True, alpha=0.5)
        outfile.write(sft.format_success_msg(success))
        if debug:
            print(sft.format_success_msg(success))
        return success
def create_report_file(param_obj, campaign_obj, property_obj, property_df, stdout_df, recorder_obj,
                       report_name, report_event_recorder, stdout_filename, debug):
    with open(report_name, "w") as outfile:
        for name, param in param_obj.items():
            outfile.write("{0} = {1}\n".format(name, param))
        sample_threshold = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold])
        base_sensitivity = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity])
        base_specificity = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity])
        ip_key_value = campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value]
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold))
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity))
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity))
        outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value,
                                           ip_key_value))
        success = recorder_obj[1]

        if not success:
            error_message = recorder_obj[0]
            outfile.write("Failed to parse report file: {0}, get exception: {1}.\n".format(report_event_recorder,
                                                                                           error_message
                                                                                           ))
        else:
            # Throw warning messages for condition checks. Make sure all features are enabled.
            if not sample_threshold:
                outfile.write("WARNING: {0} should not be 0 in this test, got {1} from compaign file. Please fix the test.\n"
                              "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold))
            if base_specificity == 1:
                outfile.write("WARNING: the {0} is {1}, expected value is less than 1.\n".format(
                    CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity))
            if base_sensitivity == 1:
                outfile.write("WARNING: the {0} is {1}, expected value is less than 1.\n".format(
                    CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity))
            if not ip_key_value:
                outfile.write(
                    "WARNING: {0} should not be empty in this test, got '{1}' from compaign file. Please fix the test.\n"
                    "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value, ip_key_value))

            duration = param_obj[ConfigKeys.Simulation_Duration]
            base_infectivity = param_obj[ConfigKeys.Base_Infectivity]

            positive_list = []
            negative_list = []
            positive_event_list = []
            negative_event_list = []

            # get infected and stat_pop channels for the selected IP group from property report
            infected_ip_group_list = property_df[[c for c in property_df.columns if Diagnostic_Support.channels[0] in c]]
            stat_pop_ip_group_list = property_df[[c for c in property_df.columns if Diagnostic_Support.channels[-1] in c]]

            # group by time and event name, then count how many event in each time step and put the value into a new
            # column named: "Test result counts"
            event_df = recorder_obj[0]
            event_df = event_df.groupby([Diagnostic_Support.ReportColumn.time,
                                         Diagnostic_Support.ReportColumn.event]).size().reset_index()
            event_df.rename(columns={0: Diagnostic_Support.ReportColumn.counts}, inplace=True)

            contagion_list = []
            expected_positive_count = expected_negative_count = 0
            for t in range(1, duration):
                # Test 1: make sure we get the correct contagion sample and
                # number of positive and negative results in StdOut.txt
                stdout_t_df = stdout_df[stdout_df[Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t]

                #stdout_next_t_df = stdout_df[stdout_df[Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t + 1]

                infected = stdout_t_df[Diagnostic_Support.Stdout.infected].iloc[0]
                stat_pop = stdout_t_df[Diagnostic_Support.Stdout.stat_pop].iloc[0]

                test_positive = stdout_t_df[Diagnostic_Support.Stdout.test_positive].iloc[0]
                test_negative = stdout_t_df[Diagnostic_Support.Stdout.test_negative].iloc[0]
                test_default = stdout_t_df[Diagnostic_Support.Stdout.test_default].iloc[0]
                envi_sample = stdout_t_df[Diagnostic_Support.Stdout.sample].iloc[0]
                ip = stdout_t_df[Diagnostic_Support.Stdout.ip_value].iloc[0]

                infected_ip_group = infected_ip_group_list.iloc[t - 1][0]
                stat_pop_ip_group = stat_pop_ip_group_list.iloc[t - 1][0]
                if stat_pop == stat_pop_ip_group and infected == infected_ip_group:
                    success = False
                    outfile.write("BAD: at time step {0} the total stat_pop = {1} and total infect = {2}, we got "
                                  "stat_pop_ip_group = {3} and infected_ip_group = {4} in group {5}, we expect to "
                                  "see less stat_pop and infected individual in the IP group , this is not a valid test "
                                  "for Environment_IP_Key_Value, please check the test condition.\n".format(t, stat_pop,
                                   infected, stat_pop_ip_group, infected_ip_group, ip_key_value))
                if ip_key_value != ip:
                    success = False
                    outfile.write("BAD: at time step {0}, IP={1} from StdOut.txt, expected IP={2}.\n".format(
                        t, ip, ip_key_value))
                susceptible = stat_pop_ip_group - infected_ip_group
                message = "BAD: at time {0}, group {1} has infected individuals = {2} and susceptible individuals = {3}," \
                " expected {4} individuals receive a {5} test result, got {6} from logging.\n"

                # calculated environmental contagion
                contagion = base_infectivity * infected_ip_group / stat_pop_ip_group
                contagion_list.append(contagion)
                if math.fabs(contagion - envi_sample) > envi_sample * 1e-2:
                    success = False
                    outfile.write(
                        "BAD: at time step {0} the environmental sample for IP group {1} is {2}, expected value is {3}"
                        ".\n".format(
                            t, ip_key_value, envi_sample, contagion
                        ))
                # positive = real positive or false positive
                # negative = false negative or real negative
                if contagion > sample_threshold:
                    expected_test_positive = base_sensitivity
                    expected_test_negative = 1.0 - base_sensitivity
                else:
                    expected_test_positive = 1.0 - base_specificity
                    expected_test_negative = base_specificity

                expected_positive_count += expected_test_positive
                expected_negative_count += expected_test_negative

                # no test default in this intervention
                expected_test_default = 0
                if test_default != expected_test_default:
                    success = False
                    outfile.write(message.format(
                        t, ip_key_value, infected_ip_group, susceptible, expected_test_default,
                        "default", test_default))
                positive_list.append([test_positive, expected_test_positive])
                negative_list.append([test_negative, expected_test_negative])
                # End of Test 1 at this time step

                # Test 2: make sure events reported in ReportEventRecorder.csv and test results from StdOut.txt are matched.
                message = "BAD: at time {0}, {1} records {2} {3} events, got {4} {5} results from {5}.\n"

                # get the positive event count from data frame
                positive_event = event_df[
                    (event_df[Diagnostic_Support.ReportColumn.time] == t) &
                    (event_df[Diagnostic_Support.ReportColumn.event] == Diagnostic_Support.ReportColumn.positive)][
                    Diagnostic_Support.ReportColumn.counts].values
                if len(positive_event):
                    positive_event = positive_event[0]
                else:
                    positive_event = 0

                # StdOut.txt should match ReportEventRecorder.csv
                if test_positive != positive_event:
                    success = False
                    outfile.write(message.format(
                        t, report_event_recorder, positive_event, Diagnostic_Support.ReportColumn.positive, test_positive,
                        Diagnostic_Support.Stdout.test_positive, stdout_filename))

                # get the negative event count from data frame
                negative_event = event_df[
                    (event_df[Diagnostic_Support.ReportColumn.time] == t) &
                    (event_df[Diagnostic_Support.ReportColumn.event] == Diagnostic_Support.ReportColumn.negative)][
                    Diagnostic_Support.ReportColumn.counts].values
                if len(negative_event):
                    negative_event = negative_event[0]
                else:
                    negative_event = 0

                # StdOut.txt should match ReportEventRecorder.csv
                if test_negative != negative_event:
                    success = False
                    outfile.write(message.format(
                        t, report_event_recorder, negative_event, Diagnostic_Support.ReportColumn.negative, test_negative,
                        Diagnostic_Support.Stdout.test_negative, stdout_filename))

                positive_event_list.append(positive_event)
                negative_event_list.append(negative_event)
                # End of Test 2 at this time step

            stdout_sum = stdout_df.sum()

            result = sft.test_multinomial([stdout_sum[Diagnostic_Support.Stdout.test_positive],
                                               stdout_sum[Diagnostic_Support.Stdout.test_negative]],
                                              proportions=[expected_positive_count, expected_negative_count],
                                              report_file=outfile,
                                              prob_flag=False, )

            message = "{0}: the total test positive and negative counts from StdOut.txt are {1} and {2}, expected values" \
                      " are {3} and {4}.\n"

            if result:
                outfile.write(message.format("GOOD", stdout_sum[Diagnostic_Support.Stdout.test_positive],
                                             stdout_sum[Diagnostic_Support.Stdout.test_negative],
                                             expected_positive_count, expected_negative_count))
            else:
                success = False
                outfile.write(message.format("BAD", stdout_sum[Diagnostic_Support.Stdout.test_positive],
                                             stdout_sum[Diagnostic_Support.Stdout.test_negative],
                                             expected_positive_count, expected_negative_count))


            # these two plots are replaced with the scatter with fit line plots
            # sft.plot_data(np.array(positive_list)[:, 0], np.array(positive_list)[:, 1],
            #                   label1="Actual",
            #                   label2="Probability of Positive",
            #                   title="Test Positive\n Group {}".format(ip_key_value), xlabel="Day",
            #                   ylabel="Positive count",
            #                   category='Test_Positive_Probability', overlap=False)
            # sft.plot_data(np.array(negative_list)[:, 0], np.array(negative_list)[:, 1],
            #                   label1="Actual",
            #                   label2="Probability of Negative",
            #                   title="Test Negative\n Group {}".format(ip_key_value), xlabel="Day",
            #                   ylabel="Negative count",
            #                   category='Test_Negative_Probability', overlap=False)

            sft.plot_scatter_fit_line(np.array(positive_list)[:, 0], dist2=np.array(positive_list)[:, 1],
                                          label1="Actual", label2="Probability of Positive",
                                          title="Test Positive\n Group {}".format(ip_key_value),
                                          xlabel="Day",
                                          ylabel="Positive count",
                                          category='Test_Positive_Probability_Scatter_Fit_Line')

            sft.plot_scatter_fit_line(np.array(negative_list)[:, 0], dist2=np.array(negative_list)[:, 1],
                                          label1="Actual", label2="Probability of Negative",
                                          title="Test Negative\n Group {}".format(ip_key_value),
                                          xlabel="Day",
                                          ylabel="Negative count",
                                          category='Test_Negative_Probability_Scatter_Fit_Line')

            sft.plot_data(np.array(positive_list)[:, 0], positive_event_list,
                              label1=stdout_filename,
                              label2=report_event_recorder,
                              title="Test Positive\n Group {}".format(ip_key_value), xlabel="Day",
                              ylabel="Positive count",
                              category='Test_Positive_stdout_vs_event_recorder', overlap=True, alpha=0.5)
            sft.plot_data(np.array(negative_list)[:, 0], negative_event_list,
                              label1=stdout_filename,
                              label2=report_event_recorder,
                              title="Test Negative\n Group {}".format(ip_key_value), xlabel="Day",
                              ylabel="Negative count",
                              category='Test_Negative_stdout_vs_event_recorder', overlap=True, alpha=0.5)
            sft.plot_data(stdout_df[Diagnostic_Support.Stdout.sample].tolist()[1:], contagion_list,
                              label1="Actual",
                              label2="Expected",
                              title="Environmental_Contagion", xlabel="Day",
                              ylabel="Environmental_Contagion",
                              category='Environmental_Contagion', overlap=True, alpha=0.5)
        outfile.write(sft.format_success_msg(success))
        if debug:
            print(sft.format_success_msg(success))
        return success