def test_new_infections(expected_new_infection, actual_new_infection, susceptible_pop,
                        probability, failed_count, route, insetchart_name, binomial_test_file, t):
    if expected_new_infection < 5 or susceptible_pop * (1 - probability) < 5:
        binom_pmf = stats.binom.pmf(k=actual_new_infection, n=susceptible_pop, p=probability)
        if binom_pmf < 5e-2:
            failed_count += 1
            binomial_test_file.write("BAD: at timestep {0}, total new infections for route {1} is "
                                     "{2} in {3}, expected = {4}, calculated binomial pmf is {5}.\n"
                                     "".format(t + 1, route, actual_new_infection, insetchart_name,
                                               expected_new_infection, binom_pmf))

    elif not sft.test_binomial_99ci(num_success=actual_new_infection, num_trials=susceptible_pop,
                                        prob=probability, report_file=binomial_test_file,
                                        category="new infections by route({0}) at time {1}".format(route, t + 1)):
        failed_count += 1
        # standard_deviation = math.sqrt(
        #     probability * (1 - probability) * susceptible_pop)
        # # 99% confidence interval
        # lower_bound = expected_new_infection - 3 * standard_deviation
        # upper_bound = expected_new_infection + 3 * standard_deviation
        # binomial_test_file.write("WARNING: at timestep {0}, total new infections for route {1} is {2} in {3},"
        #                          " expected within 99% binomial interval ({4}, {5}) with mean = {6}\n"
        #                          "".format(t, route, insetchart_name, actual_new_infection,
        #                                    lower_bound, upper_bound, expected_new_infection))
    return failed_count
예제 #2
0
def test_new_infections(expected_new_infection, actual_new_infection,
                        susceptible_pop, probability, failed_count, route,
                        insetchart_name, binomial_test_file, t):
    if expected_new_infection < 5 or susceptible_pop * (1 - probability) < 5:
        binom_pmf = stats.binom.pmf(k=actual_new_infection,
                                    n=susceptible_pop,
                                    p=probability)
        if binom_pmf < 5e-2:
            failed_count += 1
            binomial_test_file.write(
                "BAD: at timestep {0}, total new infections for route {1} is "
                "{2} in {3}, expected = {4}, calculated binomial pmf is {5}.\n"
                "".format(t + 1, route, actual_new_infection, insetchart_name,
                          expected_new_infection, binom_pmf))

    elif not sft.test_binomial_99ci(
            num_success=actual_new_infection,
            num_trials=susceptible_pop,
            prob=probability,
            report_file=binomial_test_file,
            category="new infections by route({0}) at time {1}".format(
                route, t + 1)):
        failed_count += 1
        # standard_deviation = math.sqrt(
        #     probability * (1 - probability) * susceptible_pop)
        # # 99% confidence interval
        # lower_bound = expected_new_infection - 3 * standard_deviation
        # upper_bound = expected_new_infection + 3 * standard_deviation
        # binomial_test_file.write("WARNING: at timestep {0}, total new infections for route {1} is {2} in {3},"
        #                          " expected within 99% binomial interval ({4}, {5}) with mean = {6}\n"
        #                          "".format(t, route, insetchart_name, actual_new_infection,
        #                                    lower_bound, upper_bound, expected_new_infection))
    return failed_count
예제 #3
0
def test_new_infections(expected_new_infection, actual_new_infection,
                        calculated_prob, failed_count, route, t, group,
                        outfile, susceptible_population, file):
    if expected_new_infection < 5 or susceptible_population * (
            1 - calculated_prob) < 5:
        binom_pmf = stats.binom.pmf(k=actual_new_infection,
                                    n=susceptible_population,
                                    p=calculated_prob)
        if binom_pmf < 1e-3:
            failed_count += 1
            outfile.write(
                "WARNING: at timestep {0}, new infections for {1} group route {2} is "
                "{3}, expected = {4}, calculated binomial pmf is {5}.\n"
                "".format(t, group, route, actual_new_infection,
                          expected_new_infection, binom_pmf))

    elif not sft.test_binomial_99ci(
            num_success=actual_new_infection,
            num_trials=susceptible_population,
            prob=calculated_prob,
            report_file=file,
            category="new infections for {0} at time {1}".format(group, t)):
        failed_count += 1
        # math.sqrt(prob * (1 - prob) * num_trials)
        standard_deviation = math.sqrt(
            calculated_prob * (1 - calculated_prob) * susceptible_population)
        # 99% confidence interval
        lower_bound = expected_new_infection - 3 * standard_deviation
        upper_bound = expected_new_infection + 3 * standard_deviation
        outfile.write(
            "WARNING: at timestep {0}, new infections for {1} group route {2} is {3},"
            " expected within 99% binomial interval ({4}, {5}) with mean = {6}\n"
            "".format(t, group, route, actual_new_infection, lower_bound,
                      upper_bound, expected_new_infection))
    return failed_count
예제 #4
0
def create_report_file( active_TB_treatments,relapses, tb_drug_relapse_rate_hiv,drug_start_timestep, report_name ):
    with open(report_name, "w") as outfile:
        print(str(relapses), str(tb_drug_relapse_rate_hiv))
        success = True
        if sum(active_TB_treatments)==0 or sum(relapses)==0:
            success = False
            outfile.write(sft.sft_no_test_data)
        for x in range (drug_start_timestep + 1, len(active_TB_treatments) - 1):
            active_TB_treatment = int(active_TB_treatments[x])
            relapse = relapses[x + 1]
            result = sft.test_binomial_99ci( relapse, active_TB_treatment, tb_drug_relapse_rate_hiv, outfile, category="time step {}".format(x+1) )
            if not result:
                success = False
                outfile.write("BAD: test fails for rate = {0} at time step {1}.\n".format(tb_drug_relapse_rate_hiv, x +1))

        outfile.write(sft.format_success_msg(success))
        return success
예제 #5
0
def create_report_file(param_obj, campaign_obj, stdout_df, property_df, property_list, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[ConfigKeys.Config_Name]
        base_infectivity = param_obj[ConfigKeys.Base_Infectivity]
        outfile.write("Config_name = {}\n".format(config_name))
        outfile.write("{0} = {1} {2} = {3}\n".format(
            ConfigKeys.Base_Infectivity, base_infectivity,
            ConfigKeys.Run_Number, param_obj[ConfigKeys.Run_Number]))

        success = True

        outfile.write("Test 1: checking test conditions/setup in config.json and campaign.json:\n")
        if int(param_obj[ConfigKeys.Enable_Heterogeneous_Intranode_Transmission]) != 1:
            success = False
            outfile.write("BAD: HINT is not enabled, please check the test.\n")
        else:
            if not all(x == 0 for x in campaign_obj[CampaignKeys.Start_Day]):
                success = False
                outfile.write("BAD: All intervention should start at day 0, please check campaign.json.\n")

            if not all(x == 1 for x in campaign_obj[CampaignKeys.Demographic_Coverage]):
                success = False
                outfile.write("BAD: {} should be 1, please check campaign.json.\n".format(CampaignKeys.Demographic_Coverage))

            expected_property_restrictions = []
            seed_groups = []
            susceptible_groups = []
            for property_obj in property_list:
                property_values = property_obj[DemographicsKeys.PropertyKeys.Values]
                seed_groups.append(property_values[0])
                susceptible_groups.append(property_values[1])
                expected_property_restrictions.append(["{0}:{1}".format(
                    property_obj[DemographicsKeys.PropertyKeys.Property], property_values[0])])
            if campaign_obj[CampaignKeys.Property_Restrictions] != expected_property_restrictions:
                success = False
                outfile.write(
                    "BAD: {0} should be {1}, got {2} in campaign.json, please check campaign.json.\n".format(
                        CampaignKeys.Property_Restrictions, expected_property_restrictions,
                        campaign_obj[CampaignKeys.Property_Restrictions]))

        outfile.write("Test 1: campaign and config met preconditions: {}.\n".format(success))

        if success:
            outfile.write("Test 2: Testing contagion and probability with calculated values for every time step:\n")
            outfile.write("Test 3: Testing New Infection channel from property report based on transmission matrix:\n")
            outfile.write("Test 2 and 3 and running at the same time:\n")
            result_2 = result_3 = True
            stat_pop = stdout_df[hint_support.Stdout.stat_pop]
            # infected = stdout_df[Stdout.infected]
            duration = param_obj[ConfigKeys.Simulation_Duration]

            contagion_list = []
            prob_list = []
            seed_cols = []
            for seed_group in seed_groups:
                # get all the column names that contains the seed group
                seed_cols += [c for c in property_df.columns if seed_group in c]
            # remove duplicates
            seed_cols = list(set(seed_cols))
            # get all the column names that only contains the susceptible group, which should be property_df.columns - seed_cols
            susceptible_only_cols = [c for c in property_df.columns if c not in seed_cols]

            susceptible_cols = []
            for susceptible_group in susceptible_groups:
                susceptible_cols += [c for c in property_df.columns if susceptible_group in c]
            susceptible_cols = list(set(susceptible_cols))
            # get all the column names that only contains the seed group, which should be property_df.columns - susceptible_cols
            seed_only_cols = [c for c in property_df.columns if c not in susceptible_cols]

            # test data for seed group
            infected_seed = property_df[[c for c in seed_only_cols if hint_support.channels[0] in c]]
            # population_seed = property_df[[c for c in seed_cols if channels[2] in c]]

            # test data for susceptible group
            infected_sus = property_df[[c for c in susceptible_only_cols if hint_support.channels[0] in c]]
            new_infection_sus = property_df[[c for c in susceptible_only_cols if hint_support.channels[1] in c]]
            population_sus = property_df[[c for c in susceptible_only_cols if hint_support.channels[2] in c]]

            expected_new_infection_list = []
            failed_count = 0
            for t in range(duration):
                calculated_contagion = 0
                for col in infected_seed:
                    # calculate infectivity of seed group
                    # nomalized with total population
                    infectivity_seed = base_infectivity * infected_seed[col].iloc[t] / stat_pop[t]
                    infectivity_mod = 1
                    for i in range(len(seed_groups)):
                        seed_group = seed_groups[i]
                        susceptible_group = susceptible_groups[i]
                        if seed_group in col:
                            for property_obj in property_list:
                                property_values = property_obj[DemographicsKeys.PropertyKeys.Values]
                                if seed_group in property_values:
                                    transmission_matrix = property_obj[DemographicsKeys.PropertyKeys.Matrix]
                                    infectivity_mod *= transmission_matrix[property_values.index(seed_group)][
                                                       property_values.index(susceptible_group)]

                    # calculate contagion of susceptible group
                    calculated_contagion += infectivity_seed * infectivity_mod

                # round the calculated value to 6 Decimal numbers
                calculated_contagion = round(calculated_contagion, 6)

                # get contagion of susceptible group from stdout
                # group_id is the last element in all group_ids
                group_id = -1
                for property_obj in property_list:
                    group_id += len(property_obj[DemographicsKeys.PropertyKeys.Values])
                actual_contagion = stdout_df[(stdout_df[ConfigKeys.Simulation_Timestep] == t) &
                                             (stdout_df[hint_support.Stdout.group_id] == group_id)][hint_support.Stdout.contagion].values[0]
                contagion_list.append([actual_contagion, calculated_contagion])
                if math.fabs(calculated_contagion - actual_contagion) > 5e-2 * calculated_contagion:
                    result_2 = success = False
                    outfile.write("    BAD: at time step {0}, for group {1} id {2}, the total contagion is {3}, "
                                  "expected {4}.\n".format(t, susceptible_group, group_id, actual_contagion,
                                                           calculated_contagion
                    ))

                # calculate infection probability based on contagion
                calculated_prob = 1.0 - math.exp(-1 * calculated_contagion * param_obj[ConfigKeys.Simulation_Timestep])
                # round the calculated value to 6 Decimal numbers
                calculated_prob = round(calculated_prob, 6)
                # get infection probability of susceptible group from stdout_df
                actual_prob = stdout_df[(stdout_df[ConfigKeys.Simulation_Timestep] == t) &
                                             (stdout_df[hint_support.Stdout.group_id] == group_id)][hint_support.Stdout.prob].values[0]
                prob_list.append([actual_prob, calculated_prob])
                if math.fabs(calculated_prob - actual_prob) > 5e-2 * calculated_prob:
                    result_2 = success = False
                    outfile.write("    BAD: at time step {0}, for group {1} id {2}, the infected probability is "
                                  "{3}, expected {4}.\n".format( t, susceptible_group, group_id, actual_prob,
                                                                 calculated_prob
                    ))

                # calculate expected new infection for susceptible group
                susceptible_population = population_sus.iloc[t][0] - infected_sus.iloc[t][0]
                expected_new_infection = susceptible_population * calculated_prob
                expected_new_infection_list.append(expected_new_infection)
                actual_new_infection = new_infection_sus.iloc[t][0]
                with open("DEBUG_binomial_test_{}.txt".format(susceptible_group), 'w') as file:
                    if expected_new_infection < 5 or susceptible_population * (1 - calculated_prob) < 5:
                        binom_pmf = stats.binom.pmf(k=actual_new_infection, n=susceptible_population, p=calculated_prob)
                        if binom_pmf < 1e-3:
                            failed_count += 1
                            outfile.write("WARNING: at timestep {0}, new infections for {1} group is {2}, expected "
                                          " = {3}, calculated binomial pmf is {4}.\n"
                                          "".format(t, susceptible_group, actual_new_infection,
                                                    expected_new_infection, binom_pmf))

                    elif not sft.test_binomial_99ci(num_success=actual_new_infection, num_trials=susceptible_population,
                                               prob=calculated_prob, report_file=file,
                                               category="new infections for {0} at time {1}".format(susceptible_group, t)):
                        failed_count += 1
                        standard_deviation = math.sqrt(
                            calculated_prob * (1 - calculated_prob) * susceptible_population)
                        # 99% confidence interval
                        lower_bound = expected_new_infection - 3 * standard_deviation
                        upper_bound = expected_new_infection + 3 * standard_deviation
                        outfile.write("WARNING: at timestep {0}, new infections for {1} group is {2}, expected "
                                      "within 99% binomial interval ({3}, {4}) with mean = {5}\n".format(t, susceptible_group,
                                                                                      actual_new_infection,
                                                                                      lower_bound, upper_bound,
                                                                                      expected_new_infection))

            # make sure non-susceptible groups has no new infections after outbreak
            new_infection_non_susceptible = property_df[
                [c for c in seed_cols if hint_support.channels[1] in c]]
            message_template = "{0}: total new infection after outbreak for {1} is {2}, expected 0.\n"
            for col in new_infection_non_susceptible:
                total_new_infection = new_infection_non_susceptible.ix[1:, col].sum()
                if total_new_infection != 0:
                    success = False
                    outfile.write(message_template.format("BAD", col, total_new_infection))
                else:
                    outfile.write(message_template.format("GOOD", col, total_new_infection))

            # plotting
            # get the group name for the susceptible only group
            group_name = susceptible_only_cols[0].replace(":"," ", 1).split()[-1]
            # ":" is not allowed in filename, replace it with "-" to avoid OSError
            group_name_modified = group_name.replace(":", "-")
            sft.plot_data(np.array(contagion_list)[:, 0], np.array(contagion_list)[:, 1], label1='contagion from logging', label2="calculated contagion",
                              title="{}\ncontagion".format(group_name), xlabel='day',ylabel='contagion',category="contagion_{}".format(group_name_modified),
                              line=True, alpha=0.5, overlap=True)
            sft.plot_data(np.array(prob_list)[:, 0], np.array(prob_list)[:, 1], label1='probability from logging', label2="calculated probability",
                              title="{}\nprobability".format(group_name), xlabel='day',ylabel='probability',category="probability_{}".format(group_name_modified),
                              line=True, alpha=0.5, overlap=True)

            sft.plot_data(new_infection_sus.ix[:, 0].tolist(), expected_new_infection_list,
                              label1='from property report',
                              label2="calculated data",
                              title="{}\nnew infections".format(group_name),
                              xlabel='day', ylabel='new_infections',
                              category="new_infections_{}".format(group_name_modified),
                              line=False, alpha=0.5, overlap=True, sort=False)

            message_template = "{0}: binomial test for {1} failed {2} times within {3} total timesteps, which is " \
                               "{4}% fail rate, test is {5}.\n"
            if failed_count / duration > 5e-2:
                result_3 = success = False
                outfile.write(message_template.format("BAD", new_infection_sus.columns.values, failed_count,
                                                      duration, (failed_count / duration) * 100, "failed"))
            else:
                outfile.write(message_template.format("GOOD", new_infection_sus.columns.values, failed_count,
                                                      duration, (failed_count / duration) * 100, "passed"))

            outfile.write("Test 2: result is: {}.\n".format(result_2))
            outfile.write("Test 3: result is: {}.\n".format(result_3))

        outfile.write(sft.format_success_msg(success))
    if debug:
        print(sft.format_success_msg(success))
    return success
def create_report_file(param_obj, campaign_obj, stdout_df, property_df, property_obj, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[ConfigKeys.Config_Name]
        base_infectivity = param_obj[ConfigKeys.Base_Infectivity]
        outfile.write("Config_name = {}\n".format(config_name))
        outfile.write("{0} = {1} {2} = {3}\n".format(
            ConfigKeys.Base_Infectivity, base_infectivity,
            ConfigKeys.Run_Number, param_obj[ConfigKeys.Run_Number]))

        success = True

        outfile.write("Test 1: checking test conditions/setup in config.json and campaign.json:\n")
        if int(param_obj[ConfigKeys.Enable_Heterogeneous_Intranode_Transmission]) != 1:
            success = False
            outfile.write("BAD: HINT is not enabled, please check the test.\n")
        else:
            if not all(x == 0 for x in campaign_obj[CampaignKeys.Start_Day]):
                success = False
                outfile.write("BAD: All intervention should start at day 0, please check campaign.json.\n")

            if not all(x == 1 for x in campaign_obj[CampaignKeys.Demographic_Coverage]):
                success = False
                outfile.write("BAD: {} should be 1, please check campaign.json.\n".format(CampaignKeys.Demographic_Coverage))

            expected_property_restrictions = []
            # seed_groups = list(filter(lambda value: 'seed' in value.lower(),
            #                           property_obj[DemographicsKeys.PropertyKeys.Values]))
            # easier to read
            property_values = property_obj[DemographicsKeys.PropertyKeys.Values]
            seed_groups = [value for value in property_values if 'seed' in value.lower()]
            for value in seed_groups:
                expected_property_restrictions.append(["{0}:{1}".format(
                    property_obj[DemographicsKeys.PropertyKeys.Property], value)])
            if campaign_obj[CampaignKeys.Property_Restrictions] != expected_property_restrictions:
                success = False
                outfile.write(
                    "BAD: {0} should be {1}, got {2} in campaign.json, please check campaign.json.\n".format(
                        CampaignKeys.Property_Restrictions, expected_property_restrictions,
                        campaign_obj[CampaignKeys.Property_Restrictions]))
        outfile.write("Test 1: campaign and config met preconditions: {}.\n".format(success))

        if success:
            outfile.write("Test 2: Testing contagion and probability with calculated values for every time step:\n")
            outfile.write("Test 3: Testing New Infection channel from property report based on transmission matrix:\n")
            outfile.write("Test 2 and 3 and running at the same time:\n")
            result_2 = result_3 = True
            stat_pop = stdout_df[Stdout.stat_pop]
            infected = stdout_df[Stdout.infected]
            duration = param_obj[ConfigKeys.Simulation_Duration]
            transmission_matrix = property_obj[DemographicsKeys.PropertyKeys.Matrix]
            for seed_group in seed_groups:
                contagion_list = []
                prob_list = []
                # get the name of susceptible group, there should be only one susceptible group based on the query
                susceptible_group = [value for value in property_obj[DemographicsKeys.PropertyKeys.Values
                ] if "susceptible" in value.lower() and seed_group.split('_')[-1] in value][0]
                # get all the column names that contains the seed group
                seed_cols = [c for c in property_df.columns if seed_group in c]
                # get all the column names that contains the susceptible group
                susceptible_cols = [c for c in property_df.columns if susceptible_group in c]

                # test data for seed group
                infected_seed = property_df[[c for c in seed_cols if channels[0] in c]]
                # population_seed = property_df[[c for c in seed_cols if channels[2] in c]]

                # test data for susceptible group
                infected_sus = property_df[[c for c in susceptible_cols if channels[0] in c]]
                new_infection_sus = property_df[[c for c in susceptible_cols if channels[1] in c]]
                population_sus = property_df[[c for c in susceptible_cols if channels[2] in c]]

                expected_new_infection_list = []
                failed_count = 0
                for t in range(duration):
                    # calculate infectivity of seed group
                    # infectivity_seed = base_infectivity * infected_seed.iloc[t][0] / population_seed.iloc[t][0]
                    # nomalized with total population
                    infectivity_seed = base_infectivity * infected_seed.iloc[t][0] / stat_pop[t]

                    # calculate contagion of susceptible group
                    calculated_contagion = infectivity_seed * transmission_matrix[property_values.index(seed_group)][
                        property_values.index(susceptible_group)]

                    # get contagion of susceptible group from stdout_df
                    group_id = property_values.index(susceptible_group)
                    actual_contagion = stdout_df[(stdout_df[ConfigKeys.Simulation_Timestep] == t) &
                                                 (stdout_df[Stdout.group_id] == group_id)][Stdout.contagion].values[0]
                    contagion_list.append([actual_contagion, calculated_contagion])
                    if math.fabs(calculated_contagion - actual_contagion) > 5e-2 * calculated_contagion:
                        result_2 = success = False
                        outfile.write("    BAD: at time step {0}, for group {1} id {2}, the total contagion is {3}, "
                                      "expected {4}.\n".format(t, susceptible_group, group_id, actual_contagion,
                                                               calculated_contagion
                        ))

                    # calculate infection probability based on contagion
                    calculated_prob = 1.0 - math.exp(-1 * calculated_contagion * param_obj[ConfigKeys.Simulation_Timestep])
                    # get infection probability of susceptible group from stdout_df
                    actual_prob = stdout_df[(stdout_df[ConfigKeys.Simulation_Timestep] == t) &
                                                 (stdout_df[Stdout.group_id] == group_id)][Stdout.prob].values[0]
                    prob_list.append([actual_prob, calculated_prob])
                    if math.fabs(calculated_prob - actual_prob) > 5e-2 * calculated_prob:
                        result_2 = success = False
                        outfile.write("    BAD: at time step {0}, for group {1} id {2}, the infected probability is "
                                      "{3}, expected {4}.\n".format( t, susceptible_group, group_id, actual_prob,
                                                                     calculated_prob
                        ))

                    # calculate expected new infection for susceptible group
                    susceptible_population = population_sus.iloc[t][0] - infected_sus.iloc[t][0]
                    expected_new_infection = susceptible_population * calculated_prob
                    expected_new_infection_list.append(expected_new_infection)
                    actual_new_infection = new_infection_sus.iloc[t][0]
                    with open("DEBUG_binomial_test_{}.txt".format(susceptible_group), 'w') as file:
                        if expected_new_infection < 5 or susceptible_population * (1 - calculated_prob) < 5:
                            binom_pmf = stats.binom.pmf(k=actual_new_infection, n=susceptible_population, p=calculated_prob)
                            if binom_pmf < 1e-3:
                                failed_count += 1
                                outfile.write("WARNING: at timestep {0}, new infections for {1} group is {2}, expected "
                                              " = {3}, calculated binomial pmf is {4}.\n"
                                              "".format(t, susceptible_group, actual_new_infection,
                                                        expected_new_infection, binom_pmf))

                        elif not sft.test_binomial_99ci(num_success=actual_new_infection, num_trials=susceptible_population,
                                                   prob=calculated_prob, report_file=file,
                                                   category="new infections for {0} at time {1}".format(susceptible_group, t)):
                            failed_count += 1
                            standard_deviation = math.sqrt(
                                calculated_prob * (1 - calculated_prob) * susceptible_population)
                            # 99% confidence interval
                            lower_bound = expected_new_infection - 3 * standard_deviation
                            upper_bound = expected_new_infection + 3 * standard_deviation
                            outfile.write("WARNING: at timestep {0}, new infections for {1} group is {2}, expected "
                                          "within 99% binomial interval ({3}, {4}) with mean = {5}\n".format(t, susceptible_group,
                                                                                          actual_new_infection,
                                                                                          lower_bound, upper_bound,
                                                                                          expected_new_infection))


                sft.plot_data(np.array(contagion_list)[:, 0], np.array(contagion_list)[:, 1], label1='contagion from logging', label2="calculated contagion",
                                  title="actual vs. expected contagion", xlabel='day',ylabel='contagion',category="contagion_{}".format(susceptible_group),
                                  line=True, alpha=0.5, overlap=True)
                sft.plot_data(np.array(prob_list)[:, 0], np.array(prob_list)[:, 1], label1='probability from logging', label2="calculated probability",
                                  title="actual vs. expected probability", xlabel='day',ylabel='probability',category="probability_{}".format(susceptible_group),
                                  line=True, alpha=0.5, overlap=True)

                sft.plot_data(new_infection_sus.ix[:, 0].tolist(), expected_new_infection_list,
                                  label1='from property report',
                                  label2="calculated data",
                                  title="new infections for {}".format(susceptible_group),
                                  xlabel='day', ylabel='new_infections',
                                  category="new_infections_{}".format(susceptible_group),
                                  line=False, alpha=0.5, overlap=True, sort=False)

                message_template = "{0}: binomial test for {1} failed {2} times within {3} total timesteps, which is " \
                                   "{4}% fail rate, test is {5}.\n"
                if failed_count / duration > 5e-2:
                    result_3 = success = False
                    outfile.write(message_template.format("BAD", new_infection_sus.columns.values, failed_count,
                                                          duration, (failed_count / duration) * 100, "failed"))
                else:
                    outfile.write(message_template.format("GOOD", new_infection_sus.columns.values, failed_count,
                                                          duration, (failed_count / duration) * 100, "passed"))

            outfile.write("Test 2: result is: {}.\n".format(result_2))
            outfile.write("Test 3: result is: {}.\n".format(result_3))

        outfile.write(sft.format_success_msg(success))
    if debug:
        print(sft.format_success_msg(success))
    return success
예제 #7
0
def create_report_file(param_obj, campaign_obj, stdout_df, property_df,
                       property_obj, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[ConfigKeys.Config_Name]
        base_infectivity = param_obj[ConfigKeys.Base_Infectivity]
        outfile.write("Config_name = {}\n".format(config_name))
        outfile.write("{0} = {1} {2} = {3}\n".format(
            ConfigKeys.Base_Infectivity, base_infectivity,
            ConfigKeys.Run_Number, param_obj[ConfigKeys.Run_Number]))

        success = True

        outfile.write(
            "Test 1: checking test conditions/setup in config.json and campaign.json:\n"
        )
        if int(param_obj[
                ConfigKeys.Enable_Heterogeneous_Intranode_Transmission]) != 1:
            success = False
            outfile.write("BAD: HINT is not enabled, please check the test.\n")
        else:
            if not all(x == 0 for x in campaign_obj[CampaignKeys.Start_Day]):
                success = False
                outfile.write(
                    "BAD: All intervention should start at day 0, please check campaign.json.\n"
                )

            if not all(
                    x == 1
                    for x in campaign_obj[CampaignKeys.Demographic_Coverage]):
                success = False
                outfile.write(
                    "BAD: {} should be 1, please check campaign.json.\n".
                    format(CampaignKeys.Demographic_Coverage))

            expected_property_restrictions = []
            # seed_groups = list(filter(lambda value: 'seed' in value.lower(),
            #                           property_obj[DemographicsKeys.PropertyKeys.Values]))
            # easier to read
            property_values = property_obj[
                DemographicsKeys.PropertyKeys.Values]
            seed_groups = [
                value for value in property_values if 'seed' in value.lower()
            ]
            for value in seed_groups:
                expected_property_restrictions.append([
                    "{0}:{1}".format(
                        property_obj[DemographicsKeys.PropertyKeys.Property],
                        value)
                ])
            if campaign_obj[
                    CampaignKeys.
                    Property_Restrictions] != expected_property_restrictions:
                success = False
                outfile.write(
                    "BAD: {0} should be {1}, got {2} in campaign.json, please check campaign.json.\n"
                    .format(CampaignKeys.Property_Restrictions,
                            expected_property_restrictions,
                            campaign_obj[CampaignKeys.Property_Restrictions]))
        outfile.write(
            "Test 1: campaign and config met preconditions: {}.\n".format(
                success))

        if success:
            outfile.write(
                "Test 2: Testing contagion and probability with calculated values for every time step:\n"
            )
            outfile.write(
                "Test 3: Testing New Infection channel from property report based on transmission matrix:\n"
            )
            outfile.write("Test 2 and 3 and running at the same time:\n")
            result_2 = result_3 = True
            stat_pop = stdout_df[Stdout.stat_pop]
            infected = stdout_df[Stdout.infected]
            duration = param_obj[ConfigKeys.Simulation_Duration]
            transmission_matrix = property_obj[
                DemographicsKeys.PropertyKeys.Matrix]
            for seed_group in seed_groups:
                contagion_list = []
                prob_list = []
                # get the name of susceptible group, there should be only one susceptible group based on the query
                susceptible_group = [
                    value for value in property_obj[
                        DemographicsKeys.PropertyKeys.Values]
                    if "susceptible" in value.lower()
                    and seed_group.split('_')[-1] in value
                ][0]
                # get all the column names that contains the seed group
                seed_cols = [c for c in property_df.columns if seed_group in c]
                # get all the column names that contains the susceptible group
                susceptible_cols = [
                    c for c in property_df.columns if susceptible_group in c
                ]

                # test data for seed group
                infected_seed = property_df[[
                    c for c in seed_cols if channels[0] in c
                ]]
                # population_seed = property_df[[c for c in seed_cols if channels[2] in c]]

                # test data for susceptible group
                infected_sus = property_df[[
                    c for c in susceptible_cols if channels[0] in c
                ]]
                new_infection_sus = property_df[[
                    c for c in susceptible_cols if channels[1] in c
                ]]
                population_sus = property_df[[
                    c for c in susceptible_cols if channels[2] in c
                ]]

                expected_new_infection_list = []
                failed_count = 0
                for t in range(duration):
                    # calculate infectivity of seed group
                    # infectivity_seed = base_infectivity * infected_seed.iloc[t][0] / population_seed.iloc[t][0]
                    # nomalized with total population
                    infectivity_seed = base_infectivity * infected_seed.iloc[
                        t][0] / stat_pop[t]

                    # calculate contagion of susceptible group
                    calculated_contagion = infectivity_seed * transmission_matrix[
                        property_values.index(seed_group)][
                            property_values.index(susceptible_group)]

                    # get contagion of susceptible group from stdout_df
                    group_id = property_values.index(susceptible_group)
                    actual_contagion = stdout_df[
                        (stdout_df[ConfigKeys.Simulation_Timestep] == t)
                        & (stdout_df[Stdout.group_id] == group_id)][
                            Stdout.contagion].values[0]
                    contagion_list.append(
                        [actual_contagion, calculated_contagion])
                    if math.fabs(calculated_contagion - actual_contagion
                                 ) > 5e-2 * calculated_contagion:
                        result_2 = success = False
                        outfile.write(
                            "    BAD: at time step {0}, for group {1} id {2}, the total contagion is {3}, "
                            "expected {4}.\n".format(t, susceptible_group,
                                                     group_id,
                                                     actual_contagion,
                                                     calculated_contagion))

                    # calculate infection probability based on contagion
                    calculated_prob = 1.0 - math.exp(
                        -1 * calculated_contagion *
                        param_obj[ConfigKeys.Simulation_Timestep])
                    # get infection probability of susceptible group from stdout_df
                    actual_prob = stdout_df[
                        (stdout_df[ConfigKeys.Simulation_Timestep] == t)
                        & (stdout_df[Stdout.group_id] == group_id)][
                            Stdout.prob].values[0]
                    prob_list.append([actual_prob, calculated_prob])
                    if math.fabs(calculated_prob -
                                 actual_prob) > 5e-2 * calculated_prob:
                        result_2 = success = False
                        outfile.write(
                            "    BAD: at time step {0}, for group {1} id {2}, the infected probability is "
                            "{3}, expected {4}.\n".format(
                                t, susceptible_group, group_id, actual_prob,
                                calculated_prob))

                    # calculate expected new infection for susceptible group
                    susceptible_population = population_sus.iloc[t][
                        0] - infected_sus.iloc[t][0]
                    expected_new_infection = susceptible_population * calculated_prob
                    expected_new_infection_list.append(expected_new_infection)
                    actual_new_infection = new_infection_sus.iloc[t][0]
                    with open(
                            "DEBUG_binomial_test_{}.txt".format(
                                susceptible_group), 'w') as file:
                        if expected_new_infection < 5 or susceptible_population * (
                                1 - calculated_prob) < 5:
                            binom_pmf = stats.binom.pmf(
                                k=actual_new_infection,
                                n=susceptible_population,
                                p=calculated_prob)
                            if binom_pmf < 1e-3:
                                failed_count += 1
                                outfile.write(
                                    "WARNING: at timestep {0}, new infections for {1} group is {2}, expected "
                                    " = {3}, calculated binomial pmf is {4}.\n"
                                    "".format(t, susceptible_group,
                                              actual_new_infection,
                                              expected_new_infection,
                                              binom_pmf))

                        elif not sft.test_binomial_99ci(
                                num_success=actual_new_infection,
                                num_trials=susceptible_population,
                                prob=calculated_prob,
                                report_file=file,
                                category="new infections for {0} at time {1}".
                                format(susceptible_group, t)):
                            failed_count += 1
                            standard_deviation = math.sqrt(
                                calculated_prob * (1 - calculated_prob) *
                                susceptible_population)
                            # 99% confidence interval
                            lower_bound = expected_new_infection - 3 * standard_deviation
                            upper_bound = expected_new_infection + 3 * standard_deviation
                            outfile.write(
                                "WARNING: at timestep {0}, new infections for {1} group is {2}, expected "
                                "within 99% binomial interval ({3}, {4}) with mean = {5}\n"
                                .format(t, susceptible_group,
                                        actual_new_infection, lower_bound,
                                        upper_bound, expected_new_infection))

                sft.plot_data(
                    np.array(contagion_list)[:, 0],
                    np.array(contagion_list)[:, 1],
                    label1='contagion from logging',
                    label2="calculated contagion",
                    title="actual vs. expected contagion",
                    xlabel='day',
                    ylabel='contagion',
                    category="contagion_{}".format(susceptible_group),
                    line=True,
                    alpha=0.5,
                    overlap=True)
                sft.plot_data(
                    np.array(prob_list)[:, 0],
                    np.array(prob_list)[:, 1],
                    label1='probability from logging',
                    label2="calculated probability",
                    title="actual vs. expected probability",
                    xlabel='day',
                    ylabel='probability',
                    category="probability_{}".format(susceptible_group),
                    line=True,
                    alpha=0.5,
                    overlap=True)

                sft.plot_data(
                    new_infection_sus.ix[:, 0].tolist(),
                    expected_new_infection_list,
                    label1='from property report',
                    label2="calculated data",
                    title="new infections for {}".format(susceptible_group),
                    xlabel='day',
                    ylabel='new_infections',
                    category="new_infections_{}".format(susceptible_group),
                    line=False,
                    alpha=0.5,
                    overlap=True,
                    sort=False)

                message_template = "{0}: binomial test for {1} failed {2} times within {3} total timesteps, which is " \
                                   "{4}% fail rate, test is {5}.\n"
                if failed_count / duration > 5e-2:
                    result_3 = success = False
                    outfile.write(
                        message_template.format(
                            "BAD", new_infection_sus.columns.values,
                            failed_count, duration,
                            (failed_count / duration) * 100, "failed"))
                else:
                    outfile.write(
                        message_template.format(
                            "GOOD", new_infection_sus.columns.values,
                            failed_count, duration,
                            (failed_count / duration) * 100, "passed"))

            outfile.write("Test 2: result is: {}.\n".format(result_2))
            outfile.write("Test 3: result is: {}.\n".format(result_3))

        outfile.write(sft.format_success_msg(success))
    if debug:
        print(sft.format_success_msg(success))
    return success
예제 #8
0
def create_report_file(Resistances, initial_resistances, drug_start_time,
                       param_obj, report_name, inset_days, debug):
    with open(report_name, "w") as outfile:
        starting_pop = inset_days[0][
            dts.InsetChart.Channels.KEY_StatisticalPopulation]
        # success = sft.test_binomial_95ci( initial_resistances, starting_pop, param_obj["TB_Drug_Resistance_Rate_HIV"], outfile, "???" )
        success = True
        progression = []
        bad_msgs = []
        for x in range(len(inset_days)):
            inset_day = inset_days[x]
            inset_mdr_prevalence = inset_day[
                dts.InsetChart.Channels.KEY_MdrTbPrevalence]
            stdout_resistants = Resistances[x]
            if x >= drug_start_time:
                progression.append(stdout_resistants)
            if debug:
                outfile.write("Day: {0}\n".format(x))
                outfile.write(str(inset_day) + "\n")
                outfile.write(
                    "StdOut resistants: {0}\n".format(stdout_resistants))
            stdout_predicted_prevalence = stdout_resistants / float(
                inset_day[dts.InsetChart.Channels.KEY_StatisticalPopulation])
            if abs(inset_mdr_prevalence - stdout_predicted_prevalence) > 0.03:
                bad_msgs.append(
                    "BAD: at timestep {0}, expected MDR prevalence: {1}, InsetChart had: {2}\n"
                    .format(x, stdout_predicted_prevalence,
                            inset_mdr_prevalence))

        tb_drug_resistance_rate_hiv = param_obj["TB_Drug_Resistance_Rate_HIV"]
        new_resistances = []
        pre_resistance = 0
        failed_count = 0
        total_test = 0
        for x in range(drug_start_time + 1, len(Resistances)):
            resistance = Resistances[x]
            new_resistance = resistance - pre_resistance
            pre_resistance = resistance
            new_resistances.append(new_resistance)
            expected_mean = (starting_pop -
                             resistance) * tb_drug_resistance_rate_hiv
            total_test += 1
            if expected_mean >= 5:  # advoid failing with too small mean
                result = sft.test_binomial_99ci(
                    new_resistance,
                    starting_pop - resistance,
                    tb_drug_resistance_rate_hiv,
                    outfile,
                    category="time step {}".format(x + 1))
                if not result:
                    failed_count += 1
                    outfile.write(
                        "Warning: New Resistance test fails for rate = {0} at time step {1}.\n"
                        .format(tb_drug_resistance_rate_hiv, x + 1))
            else:
                error_tolerance = 3 * math.sqrt(
                    tb_drug_resistance_rate_hiv *
                    (1 - tb_drug_resistance_rate_hiv) *
                    (starting_pop - resistance))  # 3 sigma
                result = math.fabs(new_resistance -
                                   expected_mean) <= error_tolerance
                if not result:
                    failed_count += 1
                    outfile.write(
                        "Warning: New Resistance test fails for rate = {0} at time step {1}, "
                        "new resistance = {2}, expected mean = {3}, error tolerance = {4}.\n"
                        .format(tb_drug_resistance_rate_hiv, x + 1,
                                new_resistance, expected_mean,
                                error_tolerance))

        if failed_count > math.ceil(total_test * 0.01):
            success = False
            outfile.write(
                "BAD: test failed {0} times out of {1} timestep, please check the warning message.\n"
                "".format(failed_count, total_test))
        if debug:
            sft.plot_data(new_resistances,
                          title="new resistance over time",
                          category="new_resistance",
                          show=True)
            series = sft.create_geometric_dis(
                param_obj["TB_Drug_Resistance_Rate_HIV"],
                starting_pop,
                len(progression),
                test_decay=False)
            sft.plot_data(progression,
                          series,
                          label1="progression",
                          label2="geomatric dis",
                          xlabel="days",
                          ylabel="resistance",
                          title="progression vs geomatric",
                          category="progression_vs_geomatric",
                          show=True,
                          line=True)
            sft.plot_cdf(progression,
                         series,
                         label1="progression",
                         label2="geomatric dis",
                         title="progression vs geomatric cdf",
                         category="progression_vs_geomatric_cdf",
                         show=True)
        # success = sft.test_geometric_decay(progression, param_obj["TB_Drug_Resistance_Rate_HIV"], starting_pop, test_decay=False, report_file=outfile, debug=debug)

        if len(bad_msgs) > 0:
            success = False
            outfile.writelines(bad_msgs)

        outfile.write(sft.format_success_msg(success))