def test_new_infections(expected_new_infection, actual_new_infection, susceptible_pop, probability, failed_count, route, insetchart_name, binomial_test_file, t): if expected_new_infection < 5 or susceptible_pop * (1 - probability) < 5: binom_pmf = stats.binom.pmf(k=actual_new_infection, n=susceptible_pop, p=probability) if binom_pmf < 5e-2: failed_count += 1 binomial_test_file.write("BAD: at timestep {0}, total new infections for route {1} is " "{2} in {3}, expected = {4}, calculated binomial pmf is {5}.\n" "".format(t + 1, route, actual_new_infection, insetchart_name, expected_new_infection, binom_pmf)) elif not sft.test_binomial_99ci(num_success=actual_new_infection, num_trials=susceptible_pop, prob=probability, report_file=binomial_test_file, category="new infections by route({0}) at time {1}".format(route, t + 1)): failed_count += 1 # standard_deviation = math.sqrt( # probability * (1 - probability) * susceptible_pop) # # 99% confidence interval # lower_bound = expected_new_infection - 3 * standard_deviation # upper_bound = expected_new_infection + 3 * standard_deviation # binomial_test_file.write("WARNING: at timestep {0}, total new infections for route {1} is {2} in {3}," # " expected within 99% binomial interval ({4}, {5}) with mean = {6}\n" # "".format(t, route, insetchart_name, actual_new_infection, # lower_bound, upper_bound, expected_new_infection)) return failed_count
def test_new_infections(expected_new_infection, actual_new_infection, susceptible_pop, probability, failed_count, route, insetchart_name, binomial_test_file, t): if expected_new_infection < 5 or susceptible_pop * (1 - probability) < 5: binom_pmf = stats.binom.pmf(k=actual_new_infection, n=susceptible_pop, p=probability) if binom_pmf < 5e-2: failed_count += 1 binomial_test_file.write( "BAD: at timestep {0}, total new infections for route {1} is " "{2} in {3}, expected = {4}, calculated binomial pmf is {5}.\n" "".format(t + 1, route, actual_new_infection, insetchart_name, expected_new_infection, binom_pmf)) elif not sft.test_binomial_99ci( num_success=actual_new_infection, num_trials=susceptible_pop, prob=probability, report_file=binomial_test_file, category="new infections by route({0}) at time {1}".format( route, t + 1)): failed_count += 1 # standard_deviation = math.sqrt( # probability * (1 - probability) * susceptible_pop) # # 99% confidence interval # lower_bound = expected_new_infection - 3 * standard_deviation # upper_bound = expected_new_infection + 3 * standard_deviation # binomial_test_file.write("WARNING: at timestep {0}, total new infections for route {1} is {2} in {3}," # " expected within 99% binomial interval ({4}, {5}) with mean = {6}\n" # "".format(t, route, insetchart_name, actual_new_infection, # lower_bound, upper_bound, expected_new_infection)) return failed_count
def test_new_infections(expected_new_infection, actual_new_infection, calculated_prob, failed_count, route, t, group, outfile, susceptible_population, file): if expected_new_infection < 5 or susceptible_population * ( 1 - calculated_prob) < 5: binom_pmf = stats.binom.pmf(k=actual_new_infection, n=susceptible_population, p=calculated_prob) if binom_pmf < 1e-3: failed_count += 1 outfile.write( "WARNING: at timestep {0}, new infections for {1} group route {2} is " "{3}, expected = {4}, calculated binomial pmf is {5}.\n" "".format(t, group, route, actual_new_infection, expected_new_infection, binom_pmf)) elif not sft.test_binomial_99ci( num_success=actual_new_infection, num_trials=susceptible_population, prob=calculated_prob, report_file=file, category="new infections for {0} at time {1}".format(group, t)): failed_count += 1 # math.sqrt(prob * (1 - prob) * num_trials) standard_deviation = math.sqrt( calculated_prob * (1 - calculated_prob) * susceptible_population) # 99% confidence interval lower_bound = expected_new_infection - 3 * standard_deviation upper_bound = expected_new_infection + 3 * standard_deviation outfile.write( "WARNING: at timestep {0}, new infections for {1} group route {2} is {3}," " expected within 99% binomial interval ({4}, {5}) with mean = {6}\n" "".format(t, group, route, actual_new_infection, lower_bound, upper_bound, expected_new_infection)) return failed_count
def create_report_file( active_TB_treatments,relapses, tb_drug_relapse_rate_hiv,drug_start_timestep, report_name ): with open(report_name, "w") as outfile: print(str(relapses), str(tb_drug_relapse_rate_hiv)) success = True if sum(active_TB_treatments)==0 or sum(relapses)==0: success = False outfile.write(sft.sft_no_test_data) for x in range (drug_start_timestep + 1, len(active_TB_treatments) - 1): active_TB_treatment = int(active_TB_treatments[x]) relapse = relapses[x + 1] result = sft.test_binomial_99ci( relapse, active_TB_treatment, tb_drug_relapse_rate_hiv, outfile, category="time step {}".format(x+1) ) if not result: success = False outfile.write("BAD: test fails for rate = {0} at time step {1}.\n".format(tb_drug_relapse_rate_hiv, x +1)) outfile.write(sft.format_success_msg(success)) return success
def create_report_file(param_obj, campaign_obj, stdout_df, property_df, property_list, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[ConfigKeys.Config_Name] base_infectivity = param_obj[ConfigKeys.Base_Infectivity] outfile.write("Config_name = {}\n".format(config_name)) outfile.write("{0} = {1} {2} = {3}\n".format( ConfigKeys.Base_Infectivity, base_infectivity, ConfigKeys.Run_Number, param_obj[ConfigKeys.Run_Number])) success = True outfile.write("Test 1: checking test conditions/setup in config.json and campaign.json:\n") if int(param_obj[ConfigKeys.Enable_Heterogeneous_Intranode_Transmission]) != 1: success = False outfile.write("BAD: HINT is not enabled, please check the test.\n") else: if not all(x == 0 for x in campaign_obj[CampaignKeys.Start_Day]): success = False outfile.write("BAD: All intervention should start at day 0, please check campaign.json.\n") if not all(x == 1 for x in campaign_obj[CampaignKeys.Demographic_Coverage]): success = False outfile.write("BAD: {} should be 1, please check campaign.json.\n".format(CampaignKeys.Demographic_Coverage)) expected_property_restrictions = [] seed_groups = [] susceptible_groups = [] for property_obj in property_list: property_values = property_obj[DemographicsKeys.PropertyKeys.Values] seed_groups.append(property_values[0]) susceptible_groups.append(property_values[1]) expected_property_restrictions.append(["{0}:{1}".format( property_obj[DemographicsKeys.PropertyKeys.Property], property_values[0])]) if campaign_obj[CampaignKeys.Property_Restrictions] != expected_property_restrictions: success = False outfile.write( "BAD: {0} should be {1}, got {2} in campaign.json, please check campaign.json.\n".format( CampaignKeys.Property_Restrictions, expected_property_restrictions, campaign_obj[CampaignKeys.Property_Restrictions])) outfile.write("Test 1: campaign and config met preconditions: {}.\n".format(success)) if success: outfile.write("Test 2: Testing contagion and probability with calculated values for every time step:\n") outfile.write("Test 3: Testing New Infection channel from property report based on transmission matrix:\n") outfile.write("Test 2 and 3 and running at the same time:\n") result_2 = result_3 = True stat_pop = stdout_df[hint_support.Stdout.stat_pop] # infected = stdout_df[Stdout.infected] duration = param_obj[ConfigKeys.Simulation_Duration] contagion_list = [] prob_list = [] seed_cols = [] for seed_group in seed_groups: # get all the column names that contains the seed group seed_cols += [c for c in property_df.columns if seed_group in c] # remove duplicates seed_cols = list(set(seed_cols)) # get all the column names that only contains the susceptible group, which should be property_df.columns - seed_cols susceptible_only_cols = [c for c in property_df.columns if c not in seed_cols] susceptible_cols = [] for susceptible_group in susceptible_groups: susceptible_cols += [c for c in property_df.columns if susceptible_group in c] susceptible_cols = list(set(susceptible_cols)) # get all the column names that only contains the seed group, which should be property_df.columns - susceptible_cols seed_only_cols = [c for c in property_df.columns if c not in susceptible_cols] # test data for seed group infected_seed = property_df[[c for c in seed_only_cols if hint_support.channels[0] in c]] # population_seed = property_df[[c for c in seed_cols if channels[2] in c]] # test data for susceptible group infected_sus = property_df[[c for c in susceptible_only_cols if hint_support.channels[0] in c]] new_infection_sus = property_df[[c for c in susceptible_only_cols if hint_support.channels[1] in c]] population_sus = property_df[[c for c in susceptible_only_cols if hint_support.channels[2] in c]] expected_new_infection_list = [] failed_count = 0 for t in range(duration): calculated_contagion = 0 for col in infected_seed: # calculate infectivity of seed group # nomalized with total population infectivity_seed = base_infectivity * infected_seed[col].iloc[t] / stat_pop[t] infectivity_mod = 1 for i in range(len(seed_groups)): seed_group = seed_groups[i] susceptible_group = susceptible_groups[i] if seed_group in col: for property_obj in property_list: property_values = property_obj[DemographicsKeys.PropertyKeys.Values] if seed_group in property_values: transmission_matrix = property_obj[DemographicsKeys.PropertyKeys.Matrix] infectivity_mod *= transmission_matrix[property_values.index(seed_group)][ property_values.index(susceptible_group)] # calculate contagion of susceptible group calculated_contagion += infectivity_seed * infectivity_mod # round the calculated value to 6 Decimal numbers calculated_contagion = round(calculated_contagion, 6) # get contagion of susceptible group from stdout # group_id is the last element in all group_ids group_id = -1 for property_obj in property_list: group_id += len(property_obj[DemographicsKeys.PropertyKeys.Values]) actual_contagion = stdout_df[(stdout_df[ConfigKeys.Simulation_Timestep] == t) & (stdout_df[hint_support.Stdout.group_id] == group_id)][hint_support.Stdout.contagion].values[0] contagion_list.append([actual_contagion, calculated_contagion]) if math.fabs(calculated_contagion - actual_contagion) > 5e-2 * calculated_contagion: result_2 = success = False outfile.write(" BAD: at time step {0}, for group {1} id {2}, the total contagion is {3}, " "expected {4}.\n".format(t, susceptible_group, group_id, actual_contagion, calculated_contagion )) # calculate infection probability based on contagion calculated_prob = 1.0 - math.exp(-1 * calculated_contagion * param_obj[ConfigKeys.Simulation_Timestep]) # round the calculated value to 6 Decimal numbers calculated_prob = round(calculated_prob, 6) # get infection probability of susceptible group from stdout_df actual_prob = stdout_df[(stdout_df[ConfigKeys.Simulation_Timestep] == t) & (stdout_df[hint_support.Stdout.group_id] == group_id)][hint_support.Stdout.prob].values[0] prob_list.append([actual_prob, calculated_prob]) if math.fabs(calculated_prob - actual_prob) > 5e-2 * calculated_prob: result_2 = success = False outfile.write(" BAD: at time step {0}, for group {1} id {2}, the infected probability is " "{3}, expected {4}.\n".format( t, susceptible_group, group_id, actual_prob, calculated_prob )) # calculate expected new infection for susceptible group susceptible_population = population_sus.iloc[t][0] - infected_sus.iloc[t][0] expected_new_infection = susceptible_population * calculated_prob expected_new_infection_list.append(expected_new_infection) actual_new_infection = new_infection_sus.iloc[t][0] with open("DEBUG_binomial_test_{}.txt".format(susceptible_group), 'w') as file: if expected_new_infection < 5 or susceptible_population * (1 - calculated_prob) < 5: binom_pmf = stats.binom.pmf(k=actual_new_infection, n=susceptible_population, p=calculated_prob) if binom_pmf < 1e-3: failed_count += 1 outfile.write("WARNING: at timestep {0}, new infections for {1} group is {2}, expected " " = {3}, calculated binomial pmf is {4}.\n" "".format(t, susceptible_group, actual_new_infection, expected_new_infection, binom_pmf)) elif not sft.test_binomial_99ci(num_success=actual_new_infection, num_trials=susceptible_population, prob=calculated_prob, report_file=file, category="new infections for {0} at time {1}".format(susceptible_group, t)): failed_count += 1 standard_deviation = math.sqrt( calculated_prob * (1 - calculated_prob) * susceptible_population) # 99% confidence interval lower_bound = expected_new_infection - 3 * standard_deviation upper_bound = expected_new_infection + 3 * standard_deviation outfile.write("WARNING: at timestep {0}, new infections for {1} group is {2}, expected " "within 99% binomial interval ({3}, {4}) with mean = {5}\n".format(t, susceptible_group, actual_new_infection, lower_bound, upper_bound, expected_new_infection)) # make sure non-susceptible groups has no new infections after outbreak new_infection_non_susceptible = property_df[ [c for c in seed_cols if hint_support.channels[1] in c]] message_template = "{0}: total new infection after outbreak for {1} is {2}, expected 0.\n" for col in new_infection_non_susceptible: total_new_infection = new_infection_non_susceptible.ix[1:, col].sum() if total_new_infection != 0: success = False outfile.write(message_template.format("BAD", col, total_new_infection)) else: outfile.write(message_template.format("GOOD", col, total_new_infection)) # plotting # get the group name for the susceptible only group group_name = susceptible_only_cols[0].replace(":"," ", 1).split()[-1] # ":" is not allowed in filename, replace it with "-" to avoid OSError group_name_modified = group_name.replace(":", "-") sft.plot_data(np.array(contagion_list)[:, 0], np.array(contagion_list)[:, 1], label1='contagion from logging', label2="calculated contagion", title="{}\ncontagion".format(group_name), xlabel='day',ylabel='contagion',category="contagion_{}".format(group_name_modified), line=True, alpha=0.5, overlap=True) sft.plot_data(np.array(prob_list)[:, 0], np.array(prob_list)[:, 1], label1='probability from logging', label2="calculated probability", title="{}\nprobability".format(group_name), xlabel='day',ylabel='probability',category="probability_{}".format(group_name_modified), line=True, alpha=0.5, overlap=True) sft.plot_data(new_infection_sus.ix[:, 0].tolist(), expected_new_infection_list, label1='from property report', label2="calculated data", title="{}\nnew infections".format(group_name), xlabel='day', ylabel='new_infections', category="new_infections_{}".format(group_name_modified), line=False, alpha=0.5, overlap=True, sort=False) message_template = "{0}: binomial test for {1} failed {2} times within {3} total timesteps, which is " \ "{4}% fail rate, test is {5}.\n" if failed_count / duration > 5e-2: result_3 = success = False outfile.write(message_template.format("BAD", new_infection_sus.columns.values, failed_count, duration, (failed_count / duration) * 100, "failed")) else: outfile.write(message_template.format("GOOD", new_infection_sus.columns.values, failed_count, duration, (failed_count / duration) * 100, "passed")) outfile.write("Test 2: result is: {}.\n".format(result_2)) outfile.write("Test 3: result is: {}.\n".format(result_3)) outfile.write(sft.format_success_msg(success)) if debug: print(sft.format_success_msg(success)) return success
def create_report_file(param_obj, campaign_obj, stdout_df, property_df, property_obj, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[ConfigKeys.Config_Name] base_infectivity = param_obj[ConfigKeys.Base_Infectivity] outfile.write("Config_name = {}\n".format(config_name)) outfile.write("{0} = {1} {2} = {3}\n".format( ConfigKeys.Base_Infectivity, base_infectivity, ConfigKeys.Run_Number, param_obj[ConfigKeys.Run_Number])) success = True outfile.write("Test 1: checking test conditions/setup in config.json and campaign.json:\n") if int(param_obj[ConfigKeys.Enable_Heterogeneous_Intranode_Transmission]) != 1: success = False outfile.write("BAD: HINT is not enabled, please check the test.\n") else: if not all(x == 0 for x in campaign_obj[CampaignKeys.Start_Day]): success = False outfile.write("BAD: All intervention should start at day 0, please check campaign.json.\n") if not all(x == 1 for x in campaign_obj[CampaignKeys.Demographic_Coverage]): success = False outfile.write("BAD: {} should be 1, please check campaign.json.\n".format(CampaignKeys.Demographic_Coverage)) expected_property_restrictions = [] # seed_groups = list(filter(lambda value: 'seed' in value.lower(), # property_obj[DemographicsKeys.PropertyKeys.Values])) # easier to read property_values = property_obj[DemographicsKeys.PropertyKeys.Values] seed_groups = [value for value in property_values if 'seed' in value.lower()] for value in seed_groups: expected_property_restrictions.append(["{0}:{1}".format( property_obj[DemographicsKeys.PropertyKeys.Property], value)]) if campaign_obj[CampaignKeys.Property_Restrictions] != expected_property_restrictions: success = False outfile.write( "BAD: {0} should be {1}, got {2} in campaign.json, please check campaign.json.\n".format( CampaignKeys.Property_Restrictions, expected_property_restrictions, campaign_obj[CampaignKeys.Property_Restrictions])) outfile.write("Test 1: campaign and config met preconditions: {}.\n".format(success)) if success: outfile.write("Test 2: Testing contagion and probability with calculated values for every time step:\n") outfile.write("Test 3: Testing New Infection channel from property report based on transmission matrix:\n") outfile.write("Test 2 and 3 and running at the same time:\n") result_2 = result_3 = True stat_pop = stdout_df[Stdout.stat_pop] infected = stdout_df[Stdout.infected] duration = param_obj[ConfigKeys.Simulation_Duration] transmission_matrix = property_obj[DemographicsKeys.PropertyKeys.Matrix] for seed_group in seed_groups: contagion_list = [] prob_list = [] # get the name of susceptible group, there should be only one susceptible group based on the query susceptible_group = [value for value in property_obj[DemographicsKeys.PropertyKeys.Values ] if "susceptible" in value.lower() and seed_group.split('_')[-1] in value][0] # get all the column names that contains the seed group seed_cols = [c for c in property_df.columns if seed_group in c] # get all the column names that contains the susceptible group susceptible_cols = [c for c in property_df.columns if susceptible_group in c] # test data for seed group infected_seed = property_df[[c for c in seed_cols if channels[0] in c]] # population_seed = property_df[[c for c in seed_cols if channels[2] in c]] # test data for susceptible group infected_sus = property_df[[c for c in susceptible_cols if channels[0] in c]] new_infection_sus = property_df[[c for c in susceptible_cols if channels[1] in c]] population_sus = property_df[[c for c in susceptible_cols if channels[2] in c]] expected_new_infection_list = [] failed_count = 0 for t in range(duration): # calculate infectivity of seed group # infectivity_seed = base_infectivity * infected_seed.iloc[t][0] / population_seed.iloc[t][0] # nomalized with total population infectivity_seed = base_infectivity * infected_seed.iloc[t][0] / stat_pop[t] # calculate contagion of susceptible group calculated_contagion = infectivity_seed * transmission_matrix[property_values.index(seed_group)][ property_values.index(susceptible_group)] # get contagion of susceptible group from stdout_df group_id = property_values.index(susceptible_group) actual_contagion = stdout_df[(stdout_df[ConfigKeys.Simulation_Timestep] == t) & (stdout_df[Stdout.group_id] == group_id)][Stdout.contagion].values[0] contagion_list.append([actual_contagion, calculated_contagion]) if math.fabs(calculated_contagion - actual_contagion) > 5e-2 * calculated_contagion: result_2 = success = False outfile.write(" BAD: at time step {0}, for group {1} id {2}, the total contagion is {3}, " "expected {4}.\n".format(t, susceptible_group, group_id, actual_contagion, calculated_contagion )) # calculate infection probability based on contagion calculated_prob = 1.0 - math.exp(-1 * calculated_contagion * param_obj[ConfigKeys.Simulation_Timestep]) # get infection probability of susceptible group from stdout_df actual_prob = stdout_df[(stdout_df[ConfigKeys.Simulation_Timestep] == t) & (stdout_df[Stdout.group_id] == group_id)][Stdout.prob].values[0] prob_list.append([actual_prob, calculated_prob]) if math.fabs(calculated_prob - actual_prob) > 5e-2 * calculated_prob: result_2 = success = False outfile.write(" BAD: at time step {0}, for group {1} id {2}, the infected probability is " "{3}, expected {4}.\n".format( t, susceptible_group, group_id, actual_prob, calculated_prob )) # calculate expected new infection for susceptible group susceptible_population = population_sus.iloc[t][0] - infected_sus.iloc[t][0] expected_new_infection = susceptible_population * calculated_prob expected_new_infection_list.append(expected_new_infection) actual_new_infection = new_infection_sus.iloc[t][0] with open("DEBUG_binomial_test_{}.txt".format(susceptible_group), 'w') as file: if expected_new_infection < 5 or susceptible_population * (1 - calculated_prob) < 5: binom_pmf = stats.binom.pmf(k=actual_new_infection, n=susceptible_population, p=calculated_prob) if binom_pmf < 1e-3: failed_count += 1 outfile.write("WARNING: at timestep {0}, new infections for {1} group is {2}, expected " " = {3}, calculated binomial pmf is {4}.\n" "".format(t, susceptible_group, actual_new_infection, expected_new_infection, binom_pmf)) elif not sft.test_binomial_99ci(num_success=actual_new_infection, num_trials=susceptible_population, prob=calculated_prob, report_file=file, category="new infections for {0} at time {1}".format(susceptible_group, t)): failed_count += 1 standard_deviation = math.sqrt( calculated_prob * (1 - calculated_prob) * susceptible_population) # 99% confidence interval lower_bound = expected_new_infection - 3 * standard_deviation upper_bound = expected_new_infection + 3 * standard_deviation outfile.write("WARNING: at timestep {0}, new infections for {1} group is {2}, expected " "within 99% binomial interval ({3}, {4}) with mean = {5}\n".format(t, susceptible_group, actual_new_infection, lower_bound, upper_bound, expected_new_infection)) sft.plot_data(np.array(contagion_list)[:, 0], np.array(contagion_list)[:, 1], label1='contagion from logging', label2="calculated contagion", title="actual vs. expected contagion", xlabel='day',ylabel='contagion',category="contagion_{}".format(susceptible_group), line=True, alpha=0.5, overlap=True) sft.plot_data(np.array(prob_list)[:, 0], np.array(prob_list)[:, 1], label1='probability from logging', label2="calculated probability", title="actual vs. expected probability", xlabel='day',ylabel='probability',category="probability_{}".format(susceptible_group), line=True, alpha=0.5, overlap=True) sft.plot_data(new_infection_sus.ix[:, 0].tolist(), expected_new_infection_list, label1='from property report', label2="calculated data", title="new infections for {}".format(susceptible_group), xlabel='day', ylabel='new_infections', category="new_infections_{}".format(susceptible_group), line=False, alpha=0.5, overlap=True, sort=False) message_template = "{0}: binomial test for {1} failed {2} times within {3} total timesteps, which is " \ "{4}% fail rate, test is {5}.\n" if failed_count / duration > 5e-2: result_3 = success = False outfile.write(message_template.format("BAD", new_infection_sus.columns.values, failed_count, duration, (failed_count / duration) * 100, "failed")) else: outfile.write(message_template.format("GOOD", new_infection_sus.columns.values, failed_count, duration, (failed_count / duration) * 100, "passed")) outfile.write("Test 2: result is: {}.\n".format(result_2)) outfile.write("Test 3: result is: {}.\n".format(result_3)) outfile.write(sft.format_success_msg(success)) if debug: print(sft.format_success_msg(success)) return success
def create_report_file(param_obj, campaign_obj, stdout_df, property_df, property_obj, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[ConfigKeys.Config_Name] base_infectivity = param_obj[ConfigKeys.Base_Infectivity] outfile.write("Config_name = {}\n".format(config_name)) outfile.write("{0} = {1} {2} = {3}\n".format( ConfigKeys.Base_Infectivity, base_infectivity, ConfigKeys.Run_Number, param_obj[ConfigKeys.Run_Number])) success = True outfile.write( "Test 1: checking test conditions/setup in config.json and campaign.json:\n" ) if int(param_obj[ ConfigKeys.Enable_Heterogeneous_Intranode_Transmission]) != 1: success = False outfile.write("BAD: HINT is not enabled, please check the test.\n") else: if not all(x == 0 for x in campaign_obj[CampaignKeys.Start_Day]): success = False outfile.write( "BAD: All intervention should start at day 0, please check campaign.json.\n" ) if not all( x == 1 for x in campaign_obj[CampaignKeys.Demographic_Coverage]): success = False outfile.write( "BAD: {} should be 1, please check campaign.json.\n". format(CampaignKeys.Demographic_Coverage)) expected_property_restrictions = [] # seed_groups = list(filter(lambda value: 'seed' in value.lower(), # property_obj[DemographicsKeys.PropertyKeys.Values])) # easier to read property_values = property_obj[ DemographicsKeys.PropertyKeys.Values] seed_groups = [ value for value in property_values if 'seed' in value.lower() ] for value in seed_groups: expected_property_restrictions.append([ "{0}:{1}".format( property_obj[DemographicsKeys.PropertyKeys.Property], value) ]) if campaign_obj[ CampaignKeys. Property_Restrictions] != expected_property_restrictions: success = False outfile.write( "BAD: {0} should be {1}, got {2} in campaign.json, please check campaign.json.\n" .format(CampaignKeys.Property_Restrictions, expected_property_restrictions, campaign_obj[CampaignKeys.Property_Restrictions])) outfile.write( "Test 1: campaign and config met preconditions: {}.\n".format( success)) if success: outfile.write( "Test 2: Testing contagion and probability with calculated values for every time step:\n" ) outfile.write( "Test 3: Testing New Infection channel from property report based on transmission matrix:\n" ) outfile.write("Test 2 and 3 and running at the same time:\n") result_2 = result_3 = True stat_pop = stdout_df[Stdout.stat_pop] infected = stdout_df[Stdout.infected] duration = param_obj[ConfigKeys.Simulation_Duration] transmission_matrix = property_obj[ DemographicsKeys.PropertyKeys.Matrix] for seed_group in seed_groups: contagion_list = [] prob_list = [] # get the name of susceptible group, there should be only one susceptible group based on the query susceptible_group = [ value for value in property_obj[ DemographicsKeys.PropertyKeys.Values] if "susceptible" in value.lower() and seed_group.split('_')[-1] in value ][0] # get all the column names that contains the seed group seed_cols = [c for c in property_df.columns if seed_group in c] # get all the column names that contains the susceptible group susceptible_cols = [ c for c in property_df.columns if susceptible_group in c ] # test data for seed group infected_seed = property_df[[ c for c in seed_cols if channels[0] in c ]] # population_seed = property_df[[c for c in seed_cols if channels[2] in c]] # test data for susceptible group infected_sus = property_df[[ c for c in susceptible_cols if channels[0] in c ]] new_infection_sus = property_df[[ c for c in susceptible_cols if channels[1] in c ]] population_sus = property_df[[ c for c in susceptible_cols if channels[2] in c ]] expected_new_infection_list = [] failed_count = 0 for t in range(duration): # calculate infectivity of seed group # infectivity_seed = base_infectivity * infected_seed.iloc[t][0] / population_seed.iloc[t][0] # nomalized with total population infectivity_seed = base_infectivity * infected_seed.iloc[ t][0] / stat_pop[t] # calculate contagion of susceptible group calculated_contagion = infectivity_seed * transmission_matrix[ property_values.index(seed_group)][ property_values.index(susceptible_group)] # get contagion of susceptible group from stdout_df group_id = property_values.index(susceptible_group) actual_contagion = stdout_df[ (stdout_df[ConfigKeys.Simulation_Timestep] == t) & (stdout_df[Stdout.group_id] == group_id)][ Stdout.contagion].values[0] contagion_list.append( [actual_contagion, calculated_contagion]) if math.fabs(calculated_contagion - actual_contagion ) > 5e-2 * calculated_contagion: result_2 = success = False outfile.write( " BAD: at time step {0}, for group {1} id {2}, the total contagion is {3}, " "expected {4}.\n".format(t, susceptible_group, group_id, actual_contagion, calculated_contagion)) # calculate infection probability based on contagion calculated_prob = 1.0 - math.exp( -1 * calculated_contagion * param_obj[ConfigKeys.Simulation_Timestep]) # get infection probability of susceptible group from stdout_df actual_prob = stdout_df[ (stdout_df[ConfigKeys.Simulation_Timestep] == t) & (stdout_df[Stdout.group_id] == group_id)][ Stdout.prob].values[0] prob_list.append([actual_prob, calculated_prob]) if math.fabs(calculated_prob - actual_prob) > 5e-2 * calculated_prob: result_2 = success = False outfile.write( " BAD: at time step {0}, for group {1} id {2}, the infected probability is " "{3}, expected {4}.\n".format( t, susceptible_group, group_id, actual_prob, calculated_prob)) # calculate expected new infection for susceptible group susceptible_population = population_sus.iloc[t][ 0] - infected_sus.iloc[t][0] expected_new_infection = susceptible_population * calculated_prob expected_new_infection_list.append(expected_new_infection) actual_new_infection = new_infection_sus.iloc[t][0] with open( "DEBUG_binomial_test_{}.txt".format( susceptible_group), 'w') as file: if expected_new_infection < 5 or susceptible_population * ( 1 - calculated_prob) < 5: binom_pmf = stats.binom.pmf( k=actual_new_infection, n=susceptible_population, p=calculated_prob) if binom_pmf < 1e-3: failed_count += 1 outfile.write( "WARNING: at timestep {0}, new infections for {1} group is {2}, expected " " = {3}, calculated binomial pmf is {4}.\n" "".format(t, susceptible_group, actual_new_infection, expected_new_infection, binom_pmf)) elif not sft.test_binomial_99ci( num_success=actual_new_infection, num_trials=susceptible_population, prob=calculated_prob, report_file=file, category="new infections for {0} at time {1}". format(susceptible_group, t)): failed_count += 1 standard_deviation = math.sqrt( calculated_prob * (1 - calculated_prob) * susceptible_population) # 99% confidence interval lower_bound = expected_new_infection - 3 * standard_deviation upper_bound = expected_new_infection + 3 * standard_deviation outfile.write( "WARNING: at timestep {0}, new infections for {1} group is {2}, expected " "within 99% binomial interval ({3}, {4}) with mean = {5}\n" .format(t, susceptible_group, actual_new_infection, lower_bound, upper_bound, expected_new_infection)) sft.plot_data( np.array(contagion_list)[:, 0], np.array(contagion_list)[:, 1], label1='contagion from logging', label2="calculated contagion", title="actual vs. expected contagion", xlabel='day', ylabel='contagion', category="contagion_{}".format(susceptible_group), line=True, alpha=0.5, overlap=True) sft.plot_data( np.array(prob_list)[:, 0], np.array(prob_list)[:, 1], label1='probability from logging', label2="calculated probability", title="actual vs. expected probability", xlabel='day', ylabel='probability', category="probability_{}".format(susceptible_group), line=True, alpha=0.5, overlap=True) sft.plot_data( new_infection_sus.ix[:, 0].tolist(), expected_new_infection_list, label1='from property report', label2="calculated data", title="new infections for {}".format(susceptible_group), xlabel='day', ylabel='new_infections', category="new_infections_{}".format(susceptible_group), line=False, alpha=0.5, overlap=True, sort=False) message_template = "{0}: binomial test for {1} failed {2} times within {3} total timesteps, which is " \ "{4}% fail rate, test is {5}.\n" if failed_count / duration > 5e-2: result_3 = success = False outfile.write( message_template.format( "BAD", new_infection_sus.columns.values, failed_count, duration, (failed_count / duration) * 100, "failed")) else: outfile.write( message_template.format( "GOOD", new_infection_sus.columns.values, failed_count, duration, (failed_count / duration) * 100, "passed")) outfile.write("Test 2: result is: {}.\n".format(result_2)) outfile.write("Test 3: result is: {}.\n".format(result_3)) outfile.write(sft.format_success_msg(success)) if debug: print(sft.format_success_msg(success)) return success
def create_report_file(Resistances, initial_resistances, drug_start_time, param_obj, report_name, inset_days, debug): with open(report_name, "w") as outfile: starting_pop = inset_days[0][ dts.InsetChart.Channels.KEY_StatisticalPopulation] # success = sft.test_binomial_95ci( initial_resistances, starting_pop, param_obj["TB_Drug_Resistance_Rate_HIV"], outfile, "???" ) success = True progression = [] bad_msgs = [] for x in range(len(inset_days)): inset_day = inset_days[x] inset_mdr_prevalence = inset_day[ dts.InsetChart.Channels.KEY_MdrTbPrevalence] stdout_resistants = Resistances[x] if x >= drug_start_time: progression.append(stdout_resistants) if debug: outfile.write("Day: {0}\n".format(x)) outfile.write(str(inset_day) + "\n") outfile.write( "StdOut resistants: {0}\n".format(stdout_resistants)) stdout_predicted_prevalence = stdout_resistants / float( inset_day[dts.InsetChart.Channels.KEY_StatisticalPopulation]) if abs(inset_mdr_prevalence - stdout_predicted_prevalence) > 0.03: bad_msgs.append( "BAD: at timestep {0}, expected MDR prevalence: {1}, InsetChart had: {2}\n" .format(x, stdout_predicted_prevalence, inset_mdr_prevalence)) tb_drug_resistance_rate_hiv = param_obj["TB_Drug_Resistance_Rate_HIV"] new_resistances = [] pre_resistance = 0 failed_count = 0 total_test = 0 for x in range(drug_start_time + 1, len(Resistances)): resistance = Resistances[x] new_resistance = resistance - pre_resistance pre_resistance = resistance new_resistances.append(new_resistance) expected_mean = (starting_pop - resistance) * tb_drug_resistance_rate_hiv total_test += 1 if expected_mean >= 5: # advoid failing with too small mean result = sft.test_binomial_99ci( new_resistance, starting_pop - resistance, tb_drug_resistance_rate_hiv, outfile, category="time step {}".format(x + 1)) if not result: failed_count += 1 outfile.write( "Warning: New Resistance test fails for rate = {0} at time step {1}.\n" .format(tb_drug_resistance_rate_hiv, x + 1)) else: error_tolerance = 3 * math.sqrt( tb_drug_resistance_rate_hiv * (1 - tb_drug_resistance_rate_hiv) * (starting_pop - resistance)) # 3 sigma result = math.fabs(new_resistance - expected_mean) <= error_tolerance if not result: failed_count += 1 outfile.write( "Warning: New Resistance test fails for rate = {0} at time step {1}, " "new resistance = {2}, expected mean = {3}, error tolerance = {4}.\n" .format(tb_drug_resistance_rate_hiv, x + 1, new_resistance, expected_mean, error_tolerance)) if failed_count > math.ceil(total_test * 0.01): success = False outfile.write( "BAD: test failed {0} times out of {1} timestep, please check the warning message.\n" "".format(failed_count, total_test)) if debug: sft.plot_data(new_resistances, title="new resistance over time", category="new_resistance", show=True) series = sft.create_geometric_dis( param_obj["TB_Drug_Resistance_Rate_HIV"], starting_pop, len(progression), test_decay=False) sft.plot_data(progression, series, label1="progression", label2="geomatric dis", xlabel="days", ylabel="resistance", title="progression vs geomatric", category="progression_vs_geomatric", show=True, line=True) sft.plot_cdf(progression, series, label1="progression", label2="geomatric dis", title="progression vs geomatric cdf", category="progression_vs_geomatric_cdf", show=True) # success = sft.test_geometric_decay(progression, param_obj["TB_Drug_Resistance_Rate_HIV"], starting_pop, test_decay=False, report_file=outfile, debug=debug) if len(bad_msgs) > 0: success = False outfile.writelines(bad_msgs) outfile.write(sft.format_success_msg(success))