def create_report_file(data): report_name = data[0] lines = data[1] coinfection_mortality_rate_off_art = data[2] coinfection_mortality_rate_on_art = data[3] died_of_coinfection = "died of CoInfection" state_active_symptomatic = "infectionstatechange TBActivation " time_to_death_data = [] active_infections_dictionary = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if died_of_coinfection in line: ind_id = int(dtk_sft.get_val("Individual ", line)) time_stamp = int(dtk_sft.get_val("time= ", line)) if ind_id in active_infections_dictionary.keys(): time_to_death_data.append(time_stamp - active_infections_dictionary[ind_id]) else: success = False outfile.write("BAD: Individual {} died of coinfection without going active, at time {}." "\n".format(ind_id, time_stamp)) elif state_active_symptomatic in line: ind_id = int(dtk_sft.get_val("Individual ", line)) start_time_stamp = int(dtk_sft.get_val("time= ", line)) if ind_id in active_infections_dictionary.keys(): outfile.write("Individual {} went active symptomatic while already being active symptomatic" "at time {}. \n".format(ind_id, start_time_stamp)) else: active_infections_dictionary[ind_id] = start_time_stamp # expected_data here only used for graphing purposes expected_data = map(int, np.random.exponential(1/coinfection_mortality_rate_off_art, len(time_to_death_data))) if not dtk_sft.test_exponential(time_to_death_data, coinfection_mortality_rate_off_art, outfile, integers=True, roundup=False, round_nearest=False): success = False outfile.write("Data points checked = {}.\n".format(len(time_to_death_data))) outfile.write("SUMMARY: Success={0}\n".format(success)) dtk_sft.plot_data(sorted(time_to_death_data), sorted(expected_data), label1="Actual", label2="Expected", title="Time from Smear Negative Off ART TBHIV to Death", xlabel="Data Points", ylabel="Days", category="tbhiv_mortality_smear_negative_off_art", line = True, overlap=True)
def create_report_file(report_data_obj, report_name, debug): with open(report_name, "w") as outfile: success = True immunity = calc_immunity(debug) new_infections = [] expected_new_infections = [] timestep = Outbreak_Start_Day if not report_data_obj: success = False outfile.write("BAD: There is no data in the PropertyReport report") else: for i in range(Number_Repetitions): for j in range(len(KEY_NEW_INFECTIONS_GROUP)): new_infection = report_data_obj[ KEY_NEW_INFECTIONS_GROUP[j]][timestep] statistical_population = report_data_obj[ KEY_STATISTICAL_POPULATION_GROUP[j]][timestep] expected_new_infection = statistical_population * ( 1.0 - immunity[j][i]) tolerance = 0.0 if expected_new_infection == 0.0 else 2e-2 * statistical_population if math.fabs(new_infection - expected_new_infection) > tolerance: success = False outfile.write( "BAD: At time step {0}, {1} has {2} reported, expected {3}.\n" .format(timestep, KEY_NEW_INFECTIONS_GROUP[j], new_infection, expected_new_infection)) new_infections.append(new_infection) expected_new_infections.append(expected_new_infection) timestep += Timesteps_Between_Repetitions outfile.write(sft.format_success_msg(success)) sft.plot_data(new_infections, expected_new_infections, label1="Actual", label2="Expected", xlabel="group: 0-4 outbreak 1, 5-9 outbreak 2", ylabel="new infection", title="Actual new infection vs. expected new infection", category='New_infections', show=True) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def application(output_folder="output", stdout_filename="test.txt", config_filename="config.json", chart_name="InsetChart.json", report_name=sft.sft_output_filename, debug=False): if debug: print("output_folder: " + output_folder) print("stdout_filename: " + stdout_filename + "\n") print("config_filename: " + config_filename + "\n") print("chart_name: " + chart_name + "\n") print("report_name: " + report_name + "\n") print("debug: " + str(debug) + "\n") sft.wait_for_done() param_obj = load_emod_parameters(config_filename) total_timesteps = param_obj[dts.ConfigKeys.KEY_SimulationDuration] drug_start_time = param_obj["Drug_Start_Time"] start_timestep = param_obj[dts.ConfigKeys.KEY_StartTime] # Now process log output (probably) and compare to theory (not in this example) or to another report. cum_resistances, initial_resistances = parse_stdout_file( drug_start_time, stdout_filename, debug) sft.plot_data(cum_resistances, label1="Cumulative Resistances", label2="NA", title="Cumulative Resistances over Time", xlabel="Timestep", ylabel="Resistances", category="Cumulative_Resistances", show=True) inset_days = parse_json_report(start_timestep, output_folder, chart_name, debug) # Now we've ingested all relevant inputs, let's do analysis create_report_file(cum_resistances, initial_resistances, drug_start_time, param_obj, report_name, inset_days, debug) return None
def parse_json_report(keys, insetchart_name="InsetChart.json", output_folder="output", debug=False): """ creates report_data_obj structure with keys :param insetchart_name: file to parse (InsetChart.json) :param output_folder: :return: report_data_obj structure, dictionary with KEY_NEW_INFECTION etc., keys (e.g.) """ insetchart_path = os.path.join(output_folder, insetchart_name) with open(insetchart_path) as infile: icj = json.load(infile)["Channels"] report_data_obj = {} try: for key in keys: data = icj[key]["Data"] report_data_obj[key] = data if debug: # this plot is for debugging only dtk_sft.plot_data(keys[0], dist2=None, label1=keys[0] + " channel", label2="NA", title=keys[0], xlabel="time step", ylabel=keys[0], category=keys[0], show=True, line=False) with open("DEBUG_data_InsetChart.json", "w") as outfile: json.dump(report_data_obj, outfile, indent=4) except Exception as ex: print("Failed to parse {0}, got exception: {1}".format( insetchart_name, ex)) return report_data_obj
def create_report_file(param_obj, report_data_obj, report_name, debug): with open(report_name, "w") as outfile: success = True new_infection_portions = calc_expected_new_infection_portion( param_obj, debug) new_infections = [] expected_new_infections = [] # skip the first outbreak, which gives the natual immunity timestep = Outbreak_Start_Day + Timesteps_Between_Repetitions for i in range(len(KEY_NEW_INFECTIONS_GROUP)): new_infection = report_data_obj[ KEY_NEW_INFECTIONS_GROUP[i]][timestep] statistical_population = report_data_obj[ KEY_STATISTICAL_POPULATION_GROUP[i]][timestep] expected_new_infection = statistical_population * ( new_infection_portions[i]) tolerance = 0.0 if expected_new_infection == 0.0 else 2e-2 * statistical_population if math.fabs(new_infection - expected_new_infection) > tolerance: success = False outfile.write( "BAD: At time step {0}, {1} has {2} reported, expected {3}.\n" .format(timestep, KEY_NEW_INFECTIONS_GROUP[i], new_infection, expected_new_infection)) new_infections.append(new_infection) expected_new_infections.append(expected_new_infection) outfile.write(sft.format_success_msg(success)) sft.plot_data(new_infections, expected_new_infections, label1="Actual", label2="Expected", xlabel="0: Control group, 1: Test group", ylabel="new infection", title="Actual new infection vs. expected new infection", category='New_infections', show=True) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def application( output_folder="output", stdout_filename="test.txt", config_filename="config.json",campaign_filename="campaign.json", demographics_filename = "demographics_multiplenodes.json", insetchart_name="InsetChart.json", report_name=sft.sft_output_filename, debug=False): if debug: print( "output_folder: " + output_folder ) print( "stdout_filename: " + stdout_filename+ "\n" ) print( "config_filename: " + config_filename + "\n" ) print( "campaign_filename: " + campaign_filename + "\n" ) print( "demographics_filename: " + demographics_filename + "\n" ) print( "insetchart_name: " + insetchart_name + "\n" ) print( "report_name: " + report_name + "\n" ) print( "debug: " + str(debug) + "\n" ) sft.wait_for_done() param_obj = ips.load_emod_parameters(config_filename, debug) campaign_obj = load_campaign_file(campaign_filename, debug) demographics_obj = load_demographics_file(demographics_filename, debug) report_data_obj = ips.parse_json_report(output_folder, insetchart_name, debug) sft.plot_data(report_data_obj[KEY_NEW_INFECTIONS], title="new infections", label1= "New Infections", label2 = "NA", xlabel="time steps", ylabel="new infection", category = 'New_infections', show = True ) sft.plot_data(report_data_obj[KEY_STATISTICAL_POPULATION], title="Statistical Population", label1= "Statistical Population", label2 = "NA", xlabel = "time steps", ylabel="Statistical Population", category = 'Statistical_popupation', show = True, line = True) create_report_file(param_obj, campaign_obj, demographics_obj, report_data_obj, report_name, debug)
def create_report_file(param_obj, multipliers, infectiousness, report_name, debug): with open(report_name, "w") as outfile: success = True if not multipliers: outfile.write(sft.sft_no_test_data) sigma = param_obj[Param_keys.LOGNORMAL_SCALE] base_infectivity = param_obj[Param_keys.BASE_INFECTIVITY] if sigma > 0: mu = - sigma**2 / 2.0 # test log_normal distribution success = sft.test_lognorm(multipliers,mu=mu, sigma=sigma,report_file=outfile,round = False) # test mean_l = 1 mean_l = np.mean(multipliers) mean_infectiousness = np.mean(infectiousness) outfile.write("mean of the multipliers is {}, expected 1.0.\n".format(mean_l)) outfile.write("mean of the Infectiousness is {0}, while base infectivity is {1}.\n".format(mean_infectiousness, base_infectivity)) tolerance = 2e-2 if math.fabs(mean_l - 1.0) > tolerance: outfile.write("BAD: mean of the multipliers is {}, expected 1.0.\n".format(mean_l)) success = False # plotting size = len(multipliers) outfile.write("size is {}\n".format(size)) scale = math.exp(mu) dist_lognormal = stats.lognorm.rvs(sigma, 0, scale, size) sft.plot_data(multipliers, dist_lognormal, label1="Emod", label2="Scipy", ylabel="Multiplier", xlabel="data point", category="Emod_vs_Scipy", title="Emod_vs_Scipy, sigma = {}".format(sigma), show=True) sft.plot_probability(multipliers, dist_lognormal, precision=1, label1="Emod", label2="Scipy", category="Probability_mass_function_Emod_vs_Scipy", title="Emod_vs_Scipy, sigma = {}".format(sigma), show=True) sft.plot_cdf(multipliers,dist_lognormal,label1="Emod", label2="Scipy", category="cdf", title="cdf, sigma = {}".format(sigma), show=True, line = False) if debug: with open("scipy_data.txt", "w") as file: for n in sorted(dist_lognormal): file.write(str(n) + "\n") with open("emod_data.txt", "w") as file: for n in sorted(multipliers): file.write(str(n) + "\n") else: # sigma = 0, this feature is disabled for multiplier in multipliers: if multiplier != 1.0: success = False outfile.write("BAD: multiplier is {0} when {1} set to {2}, expected 1.0.\n".format(multiplier, Param_keys.LOGNORMAL_SCALE, sigma)) # plotting sft.plot_data(multipliers, label1="Multiplier", label2="NA", category="Multiplier", title="Multiplier_Sigma={}".format(sigma), ylabel="Multiplier", xlabel="data point", show=True) sft.plot_data(infectiousness, label1="Infectiousness", label2="NA",category="Infectiousness", title="Infectiousness_Sigma={0}_BaseInfectivity={1}".format(sigma,base_infectivity), ylabel="Infectiousness",xlabel="data point", show=True) outfile.write(sft.format_success_msg(success)) if debug: print "SUMMARY: Success={0}\n".format(success) return success
def create_report_file_incidence(column_diseasedeath, column_hivdeath, column_year, json_obj, param_obj, reporter_df, migration_df, node_list, stdout_filename, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[trs.Config.config_name] outfile.write("Config_name = {}\n".format(config_name)) success = True simulation_duration = param_obj[Config.duration] # timestep = param_obj[Config.simulation_timestep] outfile.write( "Group the column_to_test by year and get the sum for all age bins:\n" ) # the year column becomes the index of the groupby_df groupby_df = reporter_df.groupby(column_year).sum() if debug: with open("DEBUG_groupby_dataframe.csv", "w") as groupby_file: groupby_df.to_csv(groupby_file, header=True) outfile.write("checking some test conditions:\n") success = tms.check_test_condition(param_obj[Config.num_core], node_list, migration_df, outfile) if platform.system() is 'Windows': # in Windows, there is issue when merging multiple stdouts(from different cores) into a single one. outfile.write( "OS is {0}, let's only compare {1} column in report with {2} channel in insetchart output." "\n".format(platform.system(), column_diseasedeath, InsetChart.disease_death)) # actual test code is outside the if block, result = compare_report_json() # skip the following if OS is Windows else: outfile.write( "OS is {}, let's compare report with insetchart and stdout output.\n" .format(platform.system())) outfile.write("parse stdout({}) file:\n".format(stdout_filename)) output_dict, exception_message = parse_output_file( stdout_filename, debug) if exception_message: success = False outfile.write(exception_message + '\n') outfile.write( "parse stdout file failed, let's still compare report with insetchart output.\n" ) # actual test code is outside the first if block, result = compare_report_json() # skip the following if we can't parse the stdout file else: outfile.write( "parse stdout file succeed, let's compare report with both stdout and insetchart output.\n" ) outfile.write("-- compare report with stdout file:\n") result1 = True if not groupby_df[column_diseasedeath].sum( ) and not groupby_df[column_hivdeath].sum(): success = False outfile.write( "BAD: there in no {0} or {1} in the report, please check the test.\n" .format(column_diseasedeath, column_hivdeath)) # skip the following if no interested data in report. else: if not len(output_dict): success = False outfile.write(dtk_sft.sft_no_test_data) outfile.write("BAD: stdout file has no test data") # skip the following if parsing stdout doesn't throw exception but there is no test data in stdout file. else: outfile.write( "Testing the {} count with log_valid for all year buckets:\n" .format(column_diseasedeath + " and " + column_hivdeath)) test_columns = [ column_diseasedeath, column_hivdeath ] # two test columns that we are looking at for n in range(len(test_columns)): column_to_test = test_columns[n] i = incidence_count = 0 years = groupby_df.index.values incidence_counts = [] for t in output_dict: if i < len(years): year = years[i] if t <= round(year * dtk_sft.DAYS_IN_YEAR): for core in output_dict[t]: incidence_count += output_dict[t][ core][n] else: # after the last time step of the reporting window reporter_sum = int( groupby_df[groupby_df.index == year][column_to_test]) incidence_counts.append( incidence_count ) # collected for plot method if incidence_count != reporter_sum: success = result1 = False outfile.write( "BAD: in year {0} the {1} count get from reporter is {2}, while test.txt reports" " {3} cases.\n".format( year, column_to_test, reporter_sum, incidence_count)) incidence_count = 0 # initialize for next test window for core in output_dict[ t]: # collect the first time step data for each time window incidence_count += output_dict[t][ core][n] i += 1 else: break dtk_sft.plot_data( incidence_counts, dist2=np.array(groupby_df[column_to_test]), label1="log_valid", label2="reporter", title=str(column_to_test), xlabel="every half year", ylabel=str(column_to_test), category=str(column_to_test) + "_log_valid", show=True, line=False, alpha=0.8, overlap=True) outfile.write( "Testing whether the reporter year matches the simulation duration:\n" ) if i != len(years): success = result1 = False outfile.write( "BAD: the reporter has data up to year {0} but the simulation duration is {1}, " "we are expecting not more than year {2} from reporter." "".format( max(years), simulation_duration, math.floor(simulation_duration / 180))) if simulation_duration > round( max(years) * dtk_sft.DAYS_IN_YEAR) + 180: success = result1 = False outfile.write( "BAD: the reporter has data up to year {0} but the simulation duration is {1}, " "we are expecting data after year {0} from reporter." "".format(max(years), simulation_duration)) outfile.write( "compare report with stdout file result is {}.\n". format(result1)) outfile.write( "-- checking if all cores are reporting in every time step in stdout file:\n" ) core_list = [ int(n) for n in (range(param_obj[Config.num_core])) ] result2 = True for t, cores in output_dict.items(): cores = list(cores.keys()) if core_list != sorted( cores): # compare two list of cores result2 = success = False outfile.write( "BAD: at time step {0}, these cores reported to stdout.txt are: {1}, while " "expected cores are: {2}.\n".format( t, cores, core_list)) outfile.write( "checking if all cores are reporting in every time step in stdout file: result is " "{}.\n".format(result2)) outfile.write( "-- compare {} report with json output(insetchart).\n". format(column_diseasedeath)) # actual test code is outside the first if block, result = compare_report_json() result = tms.compare_report_json(column_diseasedeath, InsetChart.disease_death, groupby_df, json_obj, outfile) if not result: success = False outfile.write("BAD: report doesn't match insetchart.\n") outfile.write( "compare {0} in report with json output result is {1}.\n".format( column_diseasedeath, result)) outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(param_obj, campaign_obj, report_data_obj, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True base_infectivity = param_obj[KEY_BASE_INFECTIVITY] start_day = campaign_obj[KEY_START_DAY] new_infection = report_data_obj[KEY_NEW_INFECTION] immunity_acquisition_factor = param_obj[ KEY_IMMUNITY_ACQUISITION_FACTOR] # calculate expected number of infections for a time period of 3 months: number_of_month = 3 expected_new_infection = base_infectivity * dtk_sft.DAYS_IN_MONTH * number_of_month * immunity_acquisition_factor expected = [expected_new_infection ] * (dtk_sft.MONTHS_IN_YEAR // number_of_month) # group new infections for every 3 months: value_to_test = [] if len(new_infection) < start_day + dtk_sft.DAYS_IN_YEAR: success = False outfile.write( "BAD: the simulation duration is too short, please make sure it's at least {} days.\n" .format(start_day + dtk_sft.DAYS_IN_YEAR)) outfile.write( "running chi-squared test for expected new infections for {0} {1}-months time bins: \n" "base_infectivity = {2}, immunity_acquisition_factor = {3}.\n". format(dtk_sft.MONTHS_IN_YEAR // number_of_month, number_of_month, base_infectivity, immunity_acquisition_factor)) actual_new_infection = 0 i = 0 for t in range(start_day, len(new_infection)): actual_new_infection += new_infection[t] i += 1 if not i % (number_of_month * dtk_sft.DAYS_IN_MONTH): value_to_test.append(actual_new_infection) actual_new_infection = 0 dtk_sft.plot_data( value_to_test, dist2=expected, label1="actual_new_infections", label2="expected_new_infection", title="actual vs. expected new infection for every {} months". format(number_of_month), xlabel="every {} months".format(number_of_month), ylabel="# of new infections", category='actual_vs_expected_new_infections', show=True, line=True) result = dtk_sft.test_multinomial(dist=value_to_test, proportions=expected, report_file=outfile, prob_flag=False) if not result: success = False outfile.write( "BAD: The Chi-squared test for number of new infections in every {} months failed.\n" .format(number_of_month)) else: outfile.write( "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n" .format(number_of_month)) outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(param_obj, output_dict, reporter_df, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[Config.config_name] outfile.write("Config_name = {}\n".format(config_name)) success = True simulation_duration = param_obj[Config.duration] timestep = param_obj[Config.simulation_timestep] if not len(output_dict): success = False outfile.write(dtk_sft.sft_no_test_data) else: outfile.write( "Group the incidence by year and get the sum for all age bins:\n" ) # the year column becomes the index of the groupby_df groupby_df = reporter_df.groupby(ReportColumn.year).sum() if debug: with open("DEBUG_groupby_dataframe.csv", "w") as groupby_file: groupby_df.to_csv(groupby_file, header=True) expected_max_time_step = math.floor( simulation_duration / timestep) * timestep if simulation_duration <= 180 or expected_max_time_step <= 180: success = False outfile.write( "BAD: the simulation duration is too short, please increase the duration.\n" ) elif not groupby_df[ReportColumn.incidence].sum(): success = False outfile.write( "BAD: there in no TB incidence in the test, please check the test.\n" ) else: outfile.write( "Testing the incidence count with log_valid for all year buckets:\n" ) i = incidence_count = 0 years = groupby_df.index.values incidence_counts = [] for t in output_dict: if i < len(years): year = years[i] if t <= round(year * dtk_sft.DAYS_IN_YEAR): incidence_count += output_dict[t] else: reporter_sum = int( groupby_df[groupby_df.index == year][ ReportColumn.incidence]) incidence_counts.append(incidence_count) if incidence_count != reporter_sum: success = False outfile.write( "BAD: in year {0} the incidence count get from reporter is {1}, while test.txt reports" " {2} cases.\n".format( year, reporter_sum, incidence_count)) incidence_count = output_dict[t] i += 1 else: break dtk_sft.plot_data(incidence_counts, dist2=np.array( groupby_df[ReportColumn.incidence]), label1="reporter", label2="log_valid", title="incidence", xlabel="every half year", ylabel="incidence", category='incidence', show=True, line=False, alpha=0.8) outfile.write( "Testing whether the time step in log mathces the simulation duration:\n" ) max_time_step = max(output_dict.keys()) if max_time_step != expected_max_time_step: success = False outfile.write( "BAD: the last time step in simulation is {0}, expected {1}." "\n".format(max_time_step, expected_max_time_step)) outfile.write( "Testing whether the reporter year matches the simulation duration:\n" ) if i != len(years): success = False outfile.write( "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting " "not more than year {2} from reporter.\n".format( max(years), simulation_duration, math.floor(simulation_duration / dtk_sft.DAYS_IN_YEAR))) outfile.write("i={0}, len(years)={1}\n".format( i, len(years))) if simulation_duration > round( max(years) * dtk_sft.DAYS_IN_YEAR) + 180: success = False outfile.write( "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting " "data after year {0} from reporter.\n".format( max(years), simulation_duration)) outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(param_obj, campaign_obj, output_df, reporter, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[Config.config_name] outfile.write("Config_name = {}\n".format(config_name)) success = True simulation_duration = param_obj[Config.duration] timestep = param_obj[Config.simulation_timestep] if not len(output_df): success = False outfile.write(dtk_sft.sft_no_test_data) # reporter[1] is a boolean. # True means parse_custom_reporter succeed and reporter[0] is a dataframe collected from the csv report. # False measn parse_custom_reporter failed and reporter[0] is an error message elif not reporter[1]: success = False outfile.write( "BAD: failed to parse report, get exception : {}.\n".format( reporter[0])) else: outfile.write("GOOD: parse report successfully.\n") reporter_df = reporter[0] outfile.write( "Group the custom envent column by year and get the sum for all age bins:\n" ) # the year column becomes the index of the groupby_df groupby_df = reporter_df.groupby(ReportColumn.year).sum() if debug: with open("DEBUG_groupby_dataframe.csv", "w") as groupby_file: groupby_df.to_csv(groupby_file, header=True) outfile.write("Checking whether we have enough test data:\n") expected_max_time_step = math.floor( simulation_duration / timestep) * timestep if simulation_duration <= 180 or expected_max_time_step <= 180: success = False outfile.write( "BAD: the simulation duration is too short, please increase the duration.\n" ) elif (not groupby_df[ReportColumn.negative].sum()) and ( not groupby_df[ReportColumn.default].sum()): success = False outfile.write( "BAD: there in no {0} and {1} in the test, please check the test.\n" .format(ReportColumn.negative, ReportColumn.default)) else: outfile.write("Checking more test condition:\n") treatment_fraction = float(campaign_obj[Campaign.treatment]) outfile.write("{0} in Campaign.json is {1}.\n".format( Campaign.treatment, treatment_fraction)) if treatment_fraction == 1: test_treatment_only = False outfile.write( "Testing the {0} and {1} count with log_valid for all year buckets:\n" .format(ReportColumn.negative, ReportColumn.default)) else: test_treatment_only = True outfile.write( "Testing the {0} count with log_valid for all year buckets:\n" .format(ReportColumn.default)) i = default_count = negative_count = 0 years = groupby_df.index.values negative_counts = [] default_counts = [] for t in output_df.index.values.tolist(): if i < len(years): year = years[i] if t <= round(year * dtk_sft.DAYS_IN_YEAR): default_count += output_df.loc[t][ ReportColumn.default] negative_count += output_df.loc[t][ ReportColumn.negative] else: reporter_negative_sum = int( groupby_df[groupby_df.index == year][ ReportColumn.negative]) reporter_default_sum = int( groupby_df[groupby_df.index == year][ ReportColumn.default]) negative_counts.append(negative_count) default_counts.append(default_count) if not test_treatment_only: if negative_count != reporter_negative_sum: success = False outfile.write( "BAD: in year {0} the {1} count get from reporter is {2}, while " "test.txt reports {3} cases.\n".format( year, ReportColumn.negative, reporter_negative_sum, negative_count)) if default_count != reporter_default_sum: success = False outfile.write( "BAD: in year {0} the {1} count get from reporter is {2}, while test.txt reports" " {3} cases.\n".format( year, ReportColumn.default, reporter_default_sum, default_count)) default_count = output_df.loc[t][ ReportColumn.default] negative_count = output_df.loc[t][ ReportColumn.negative] i += 1 else: break if not test_treatment_only: dtk_sft.plot_data(negative_counts, dist2=np.array( groupby_df[ReportColumn.negative]), label1="reporter", label2="log_valid", title=ReportColumn.negative, xlabel="every half year", ylabel=ReportColumn.negative, category=ReportColumn.negative, show=True, line=True, alpha=0.8, overlap=True) dtk_sft.plot_data(default_counts, dist2=np.array( groupby_df[ReportColumn.default]), label1="reporter", label2="log_valid", title=ReportColumn.default, xlabel="every half year", ylabel=ReportColumn.default, category=ReportColumn.default, show=True, line=True, alpha=0.8, overlap=True) outfile.write( "Testing whether the time step in log mathces the simulation duration:\n" ) max_time_step = max(output_df.index.values) if abs(max_time_step - expected_max_time_step) > 1: success = False outfile.write( "BAD: the last time step in simulation is {0}, expected {1}." "\n".format(max_time_step, expected_max_time_step)) outfile.write( "Testing whether the reporter year matches the simulation duration:\n" ) if i != len(years): success = False outfile.write( "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting " "not more than year {2} from reporter.".format( max(years), simulation_duration, math.floor(simulation_duration / 180))) if simulation_duration > round( max(years) * dtk_sft.DAYS_IN_YEAR) + 180: success = False outfile.write( "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting " "data after year {0} from reporter.".format( max(years), simulation_duration)) outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(report_data_obj, report_name, debug): with open(report_name, "w") as outfile: success = True timestep = Outbreak_Start_Day effect = prime new_infections = [] statistical_populations = [] disease_deaths = [] for i in range(len(KEY_NEW_INFECTIONS_GROUP)): new_infection = report_data_obj[ KEY_NEW_INFECTIONS_GROUP[i]][timestep] statistical_population = report_data_obj[ KEY_STATISTICAL_POPULATION_GROUP[i]][timestep] disease_death = report_data_obj[KEY_DISEASE_DEATHS_GROUP[i]][int( timestep + i / 2 )] # disease death in the last two groups happen 1 day later than the first two groups. new_infections.append(new_infection) statistical_populations.append(statistical_population) disease_deaths.append(disease_death) # test acquisition blocking new_infection_seed_test = new_infections[1] statistical_population_seed_test = statistical_populations[1] expected_new_infection_seed_test = statistical_population_seed_test * ( 1.0 - effect) tolerance_1 = 0.0 if expected_new_infection_seed_test == 0.0 else 2e-2 * statistical_population_seed_test if math.fabs(new_infection_seed_test - expected_new_infection_seed_test) > tolerance_1: success = False outfile.write( "BAD: At time step {0}, {1} reported new infections in Group 2_Seed_Test, expected {2}.\n" .format(timestep, new_infection_seed_test, expected_new_infection_seed_test)) # test transmission blocking new_infection_seed_control = new_infections[0] new_infection_control = new_infections[2] new_infection_test = new_infections[3] expected_new_infection_test = ( 1.0 - effect) * new_infection_control * new_infection_seed_test / float( new_infection_seed_control) statistical_population_test = statistical_populations[3] tolerance_2 = 0.0 if expected_new_infection_test == 0.0 else 2e-2 * statistical_population_test if math.fabs(new_infection_test - expected_new_infection_test) > tolerance_2: success = False outfile.write( "BAD: At time step {0}, {1} reported new infections in Group 4_Test, expected {2}.\n" .format(timestep, new_infectio_test, expected_new_infection_test)) #test mortality blocking disease_death_seed_test = disease_deaths[1] expected_disease_death_seed_test = new_infection_seed_test * (1.0 - effect) tolerance_3 = 0.0 if expected_disease_death_seed_test == 0.0 else 2e-2 * new_infection_seed_test if math.fabs(disease_death_seed_test - expected_disease_death_seed_test) > tolerance_3: success = False outfile.write( "BAD: At time step {0}, {1} reported disease deaths in Group 2_Seed_Test, expected {2}.\n" .format(timestep, disease_death_seed_test, expected_disease_death_seed_test)) outfile.write(sft.format_success_msg(success)) sft.plot_data( new_infections, disease_deaths, label1="new_infections", label2="disease_death", xlabel="0:1_Seed_Control, 1:2_Seed_Test, 2:3_Control, 4:3_Test", title="new_infections vs. disease_death", category='New_infections_vs_disease_death', show=True) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(report_data_obj, report_name, debug): with open(report_name, "w") as outfile: success = True timestep = Outbreak_Start_Day effects = calc_effect(debug) new_infections = [] new_disease_deaths = [] expected_new_disease_deaths = [] actual_effects = [] pre_disease_death = 0 for i in range(len(Interventions)): new_infection = report_data_obj[KEY_NEW_INFECTIONS][timestep] disease_death = report_data_obj[KEY_DISEASE_DEATHS][timestep] new_disease_death = disease_death - pre_disease_death effect = effects[i] expected_new_disease_death = (1.0 - effect) * new_infection tolerance = 0.0 if expected_new_disease_death == 0.0 else 3e-2 * new_infection actual_effect = 1.0 - new_disease_death / float( new_infection) if new_infection != 0 else 0.0 if math.fabs(new_disease_death - expected_new_disease_death) > tolerance: success = False outfile.write( "BAD: At time step {0}, outbreak {1}, {2} reported new disease death, expected {3}.\n" .format(timestep, Interventions[i], new_disease_death, expected_new_disease_death)) outfile.write( "actual MortalityBlocking effect is {0}, expected {1}.\n". format(actual_effect, effect)) new_disease_deaths.append(new_disease_death) expected_new_disease_deaths.append(expected_new_disease_death) actual_effects.append(actual_effect) new_infections.append(new_infection) timestep += Timesteps_Between_Repetitions pre_disease_death = disease_death sft.plot_data(new_disease_deaths, expected_new_disease_deaths, label1="Actual", label2="Expected", xlabel="outbreak", ylabel="disease death", title="Actual disease death vs. expected disease death", category='Disease_death', show=True) sft.plot_data(new_disease_deaths, new_infections, label1="death", label2="infection", xlabel="outbreak", ylabel="population", title="Actual disease death vs. new infections", category='disease_death_vs_new_infections', show=True) if debug: with open("New_disease_death.txt", "w") as file: for i in range(len(new_disease_deaths)): file.write("{0}, {1}.\n".format( new_disease_deaths[i], expected_new_disease_deaths[i])) with open("Effects.txt", "w") as file: for i in range(len(actual_effects)): file.write("{0}, {1}.\n".format(actual_effects[i], effects[i])) outfile.write(sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(param_obj, node_list, campaign_obj, migration_df, report_data_obj, stdout_filename, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True base_infectivity = param_obj[KEY_BASE_INFECTIVITY] start_day = campaign_obj[KEY_START_DAY] new_infection = report_data_obj[KEY_NEW_INFECTION] immunity_acquisition_factor = param_obj[ KEY_IMMUNITY_ACQUISITION_FACTOR] decay_rate = param_obj[KEY_DECAY_RATE] outfile.write("checking some test conditions:\n") outfile.write(" -- simulation duration: {} days\n".format( len(new_infection))) if len(new_infection) < start_day + 1 + dtk_sft.DAYS_IN_YEAR: success = False outfile.write( "BAD: the simulation duration is too short, please make sure it's at least {} days.\n" .format(start_day + 1 + dtk_sft.DAYS_IN_YEAR)) result = tms.check_test_condition(param_obj[KEY_NUM_CORES], node_list, migration_df, outfile) if not result: success = False # summary message is writen to the report file in the check_test_condition function number_of_month = 1 outfile.write( "calculate expected number of infections for a time period of {} month(" "unit is 1/years):\n".format((number_of_month))) t_initial = 0 expected = [] decay_rate *= dtk_sft.DAYS_IN_YEAR base_infectivity *= dtk_sft.DAYS_IN_YEAR step = number_of_month / dtk_sft.MONTHS_IN_YEAR for t_final in np.arange(step, 1.01, step): expected_new_infection = base_infectivity * ( t_final - t_initial) - base_infectivity * ( 1.0 - immunity_acquisition_factor) / decay_rate * math.exp( -1 * decay_rate * t_initial) * (1.0 - math.exp(-1 * decay_rate * (t_final - t_initial))) expected_new_infection *= len(node_list) expected.append(expected_new_infection) t_initial = t_final # group new infections for every month: value_to_test = [] outfile.write( "running chi-squared test for actual vs expected new infections for {0} {1}-months time bins: \n" "base_infectivity = {2}, immunity_acquisition_factor = {3}, decay rate = {4}.(unit is 1/years)\n" .format(dtk_sft.MONTHS_IN_YEAR // number_of_month, number_of_month, base_infectivity, immunity_acquisition_factor, decay_rate)) actual_new_infection = 0 i = 0 for t in range(start_day + 1, len(new_infection)): actual_new_infection += new_infection[t] i += 1 if not i % (number_of_month * dtk_sft.DAYS_IN_MONTH): value_to_test.append(actual_new_infection) actual_new_infection = 0 dtk_sft.plot_data( value_to_test, dist2=expected, label1="actual_new_infections", label2="expected_new_infection", title="actual vs. expected new infection for every {} month". format(number_of_month), xlabel="month", ylabel="# of new infections", category='actual_vs_expected_new_infections', show=True, line=False) result = dtk_sft.test_multinomial(dist=value_to_test, proportions=expected, report_file=outfile, prob_flag=False) if not result: success = False outfile.write( "BAD: The Chi-squared test for number of new infections in every {} months failed.\n" .format(number_of_month)) else: outfile.write( "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n" .format(number_of_month)) output_dict = parse_output_file(stdout_filename, debug) outfile.write( "checking if all cores are reporting in every time step in stdout file:\n" ) core_list = [str(n) for n in (range(param_obj[KEY_NUM_CORES]))] for t, cores in output_dict.items(): if core_list != sorted(cores): success = False outfile.write( "BAD: at time step {0}, these cores reported to stdout.txt are: {1}, while " "expected cores are: {2}.\n".format(t, cores, core_list)) outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file_incidence(column_to_test, column_year, param_obj, output_dict, reporter_df, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[Config.config_name] outfile.write("Config_name = {}\n".format(config_name)) success = True simulation_duration = param_obj[Config.duration] # timestep = param_obj[Config.simulation_timestep] if not len(output_dict): success = False outfile.write(dtk_sft.sft_no_test_data) else: outfile.write( "Group the {} by year and get the sum for all age bins:\n". format(column_to_test)) # the year column becomes the index of the groupby_df groupby_df = reporter_df.groupby(column_year).sum() if debug: with open("DEBUG_groupby_dataframe.csv", "w") as groupby_file: groupby_df.to_csv(groupby_file, header=True) if not groupby_df[column_to_test].sum(): success = False outfile.write( "BAD: there in no {} in the test, please check the test.\n" .format(column_to_test)) else: outfile.write( "Testing the {} count with log_valid for all year buckets:\n" .format(column_to_test)) i = incidence_count = 0 years = groupby_df.index.values incidence_counts = [] for t in output_dict: if i < len(years): year = years[i] if t <= round(year * dtk_sft.DAYS_IN_YEAR): incidence_count += output_dict[t] else: reporter_sum = int(groupby_df[ groupby_df.index == year][column_to_test]) incidence_counts.append(incidence_count) if incidence_count != reporter_sum: success = False outfile.write( "BAD: in year {0} the {1} count get from reporter is {2}, while test.txt reports" " {3} cases.\n".format( year, column_to_test, reporter_sum, incidence_count)) incidence_count = output_dict[t] i += 1 else: break dtk_sft.plot_data(incidence_counts, dist2=np.array(groupby_df[column_to_test]), label1="reporter", label2="log_valid", title=str(column_to_test), xlabel="every half year", ylabel=str(column_to_test), category=str(column_to_test), show=True, line=False, alpha=0.8, overlap=True) outfile.write( "Testing whether the reporter year matches the simulation duration:\n" ) if i != len(years): success = False outfile.write( "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting " "not more than year {2} from reporter.".format( max(years), simulation_duration, math.floor(simulation_duration / 180))) if simulation_duration > round( max(years) * dtk_sft.DAYS_IN_YEAR) + 180: success = False outfile.write( "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting " "data after year {0} from reporter.".format( max(years), simulation_duration)) outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file_prevalence(column_to_test, column_year, param_obj, output_dict, reporter_df, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[Config.config_name] outfile.write("Config_name = {}\n".format(config_name)) success = True # simulation_duration = param_obj[Config.duration] # timestep = param_obj[Config.simulation_timestep] if not len(output_dict): success = False outfile.write(dtk_sft.sft_no_test_data) outfile.write( "Group the {} prevalence by year and get the sum for all age bins:\n" .format(column_to_test)) # the year column becomes the index of the groupby_df groupby_df = reporter_df.groupby(column_year).sum() if not groupby_df[column_to_test].sum(): success = False outfile.write( "BAD: there in no {} prevalence in the test, please check the test.\n" .format(column_to_test)) if debug: with open("DEBUG_groupby_dataframe.csv", "w") as groupby_file: groupby_df.to_csv(groupby_file, header=True) outfile.write( "Testing the {} prevalence count with log_valid for all year buckets:\n" .format(column_to_test)) i = prevalence_sum = 0 years = groupby_df.index.values prevalence_counts = [] prevalence_at_last_time_step = {} for t in output_dict: if i < len(years): year = years[i] if t <= round(year * dtk_sft.DAYS_IN_YEAR): prevalence_sum += output_dict[t] if t == round(year * dtk_sft.DAYS_IN_YEAR): prevalence_at_last_time_step[t] = output_dict[t] else: reporter_sum = int( groupby_df[groupby_df.index == year][column_to_test]) prevalence = prevalence_sum / (years[0] * dtk_sft.DAYS_IN_YEAR) prevalence_counts.append(prevalence) # uncomment the following lines to test average prevalence # if prevalence != reporter_sum: # success = False # outfile.write("BAD: in year {0} the HIV prevalence count get from reporter is {1}, while test.txt reports" # " {2} cases.\n".format(year, reporter_sum, prevalence)) if prevalence_at_last_time_step[sorted( prevalence_at_last_time_step.keys()) [-1]] != reporter_sum: success = False outfile.write( "BAD: in year {0} the {1} prevalence count get from reporter is {2}, while test.txt reports" " {3} cases at the last time step of this report time window.\n" .format( year, column_to_test, reporter_sum, prevalence_at_last_time_step[sorted( prevalence_at_last_time_step.keys())[-1]])) prevalence_sum = output_dict[t] i += 1 else: break # uncomment the following lines to plot average prevalence from logging and prevalence from reporter # dtk_sft.plot_data(prevalence_counts, dist2=np.array(groupby_df[ReportColumn.HIV]), label1="log_valid_on_average", # label2="reporter", title="HIV prevalence", # xlabel="every half year", ylabel="HIV prevalence", category='HIV_prevalence', # show=True, line=True, alpha=0.8, overlap=True) dtk_sft.plot_data( [ prevalence_at_last_time_step[key] for key in sorted(prevalence_at_last_time_step.keys()) ], dist2=np.array(groupby_df[column_to_test]), label1="log_valid", label2="reporter", title="{} prevalence at last timestep of each report time window". format(column_to_test), xlabel="every half year", ylabel="{} prevalence".format(column_to_test), category='{}_prevalence_last_time_step'.format(column_to_test), show=True, line=True, alpha=0.8, overlap=True) if debug: with open('DEBUG_prevalence_at_last_time_step.json', 'w') as file: json.dump(prevalence_at_last_time_step, file, indent=4) outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(report_data_obj, report_name, debug): with open(report_name, "w") as outfile: success = True timestep = Outbreak_Start_Day effect_a = Prime_Acquire effect_t = Prime_Transmit effect_m = Prime_Mortality new_infections = [] statistical_populations = [] new_disease_deaths = [] for x in range(Number_Repetitions): num_group = len(KEY_NEW_INFECTIONS_GROUP) for i in range(num_group): new_infection = report_data_obj[KEY_NEW_INFECTIONS_GROUP[i]][timestep] statistical_population = report_data_obj[KEY_STATISTICAL_POPULATION_GROUP[i]][timestep] pre_disease_death = report_data_obj[KEY_DISEASE_DEATHS_GROUP[i]][int(timestep + i/2 - 1)] # disease death in the last 2 groups happen 1 day later than the first 2 groups. disease_death = report_data_obj[KEY_DISEASE_DEATHS_GROUP[i]][int(timestep + i/2)] new_disease_death = disease_death - pre_disease_death new_infections.append(new_infection) statistical_populations.append(statistical_population) new_disease_deaths.append(new_disease_death) # test acquisition blocking new_infection_seed_test = new_infections[1 + x * num_group] statistical_population_seed_test = statistical_populations[1 + x * num_group] expected_new_infection_seed_test = statistical_population_seed_test * (1.0 - effect_a) * Outbreak_Demographic_Coverage tolerance_1 = 0.0 if expected_new_infection_seed_test == 0.0 else 2e-2 * statistical_population_seed_test if math.fabs(new_infection_seed_test - expected_new_infection_seed_test) > tolerance_1: success = False outfile.write("BAD: At time step {0}, {1} reported new infections in Group 2_Seed_Test, expected {2}.\n".format( timestep, new_infection_seed_test, expected_new_infection_seed_test)) # test transmission blocking new_infection_seed_control = new_infections[0 + x * num_group] new_infection_control = new_infections[2 + x * num_group] new_infection_test = new_infections[3+ x * num_group] expected_new_infection_test = (1.0 - effect_t) * new_infection_control * new_infection_seed_test/float(new_infection_seed_control) statistical_population_test = statistical_populations[3] tolerance_2 = 0.0 if expected_new_infection_test == 0.0 else 2e-2 * statistical_population_test if math.fabs(new_infection_test - expected_new_infection_test) > tolerance_2: success = False outfile.write("BAD: At time step {0}, {1} reported new infections in Group 4_Test, expected {2}.\n".format( timestep, new_infection_test, expected_new_infection_test)) #test mortality blocking disease_death_seed_test = new_disease_deaths[1 + x * num_group] expected_disease_death_seed_test = new_infection_seed_test * (1.0 - effect_m) tolerance_3 = 0.0 if expected_disease_death_seed_test == 0.0 else 2e-2 * new_infection_seed_test if math.fabs(disease_death_seed_test - expected_disease_death_seed_test) > tolerance_3: success = False outfile.write("BAD: At time step {0}, {1} reported disease deaths in Group 2_Seed_Test, expected {2}.\n".format( timestep, disease_death_seed_test, expected_disease_death_seed_test)) timestep += Timesteps_Between_Repetitions effect_a = effect_a + (1.0 - effect_a) * Boost_Acquire effect_t = effect_t + (1.0 - effect_t) * Boost_Transmit effect_m = effect_m + (1.0 - effect_m) * Boost_Mortality outfile.write(sft.format_success_msg(success)) sft.plot_data(new_infections,new_disease_deaths, label1= "new_infections", label2 = "disease_death", xlabel= "0&4:Seed_Control, 1&5:Seed_Test, 2&6:Control, 4&7:Test", title = "new_infections vs. new_disease_death", category = 'New_infections_vs_new_disease_death',show = True ) if debug: print( "SUMMARY: Success={0}\n".format(success) ) return success
def create_report_file(param_obj, output_df, report_df, report_name, debug): total_timesteps = int(param_obj[KEY_TOTAL_TIMESTEPS]) simulation_timestep = float(param_obj[KEY_SIMULATION_TIMESTEP]) base_infectivity = float(param_obj[KEY_BASE_INFECTIVITY]) baseline = float(param_obj[KEY_INFECTIVITY_EXPONENTIAL_BASELINE]) delay = float(param_obj[KEY_INFECTIVITY_EXPONENTIAL_DELAY]) rate = float(param_obj[KEY_INFECTIVITY_EXPONENTIAL_RATE]) infected = output_df[KEY_INFECTED] infectiousness = output_df[KEY_INFECTIOUSNESS] statpop = output_df[KEY_STAT_POP] new_infections = report_df[KEY_NEW_INFECTIONS] cumulative_infections = report_df[KEY_CUMULATIVE_INFECTIONS] dtk_sft.plot_data( new_infections, cumulative_infections, label1="new infections", label2="cumulative infections", title="Exponential_Delay: {0} days, Exponential_Rate: {1} ".format( delay, rate), xlabel="time step / simulation_timestep{0}".format( simulation_timestep), ylabel=None, category='New_infections_vs_cumulative_infections', show=True, line=True) with open(report_name, "w") as outfile: expected_infectiousness = [0] * ( int(total_timesteps / simulation_timestep) + 1) pre_infected = int(infected[0]) for index in range(1, len(infected)): new_infected = int(infected[index]) - pre_infected pre_infected = int(infected[index]) if new_infected: new_expected_infectiousness = calculate_infectiousness( new_infected, index, simulation_timestep, total_timesteps, base_infectivity, baseline, delay, rate, debug) expected_infectiousness = list( map( sum, zip(expected_infectiousness, new_expected_infectiousness))) success = True actual_infectiousness_all = [] calc_infectiousness_all = [] for index in range(len(infectiousness)): timestep = index * simulation_timestep actual_infectiousness = float(infectiousness[index]) calc_infectiousness = expected_infectiousness[index] / float( statpop[index]) actual_infectiousness_all.append(actual_infectiousness) calc_infectiousness_all.append(calc_infectiousness) tolerance = 0 if calc_infectiousness == 0 else 3e-2 * calc_infectiousness if math.fabs(actual_infectiousness - calc_infectiousness) > tolerance: success = False outfile.write( "BAD: actual infectiousness at time step {0} is {1}, expected {2}.\n" .format(timestep, actual_infectiousness, calc_infectiousness)) if debug: with open("actual_vs_calc_infectiousness.txt", "w") as file: for i in range(len(actual_infectiousness_all)): file.write("Time Step: {0}, actual infectiousnes: {1}," " expected_infectiousness: {2}.\n".format( i * simulation_timestep, actual_infectiousness_all[i], calc_infectiousness_all[i])) dtk_sft.plot_data( actual_infectiousness_all, calc_infectiousness_all, label1="actual infectiousness", label2="calc infectiousness", title="Exponential_Delay: {0} days, Exponential_Rate: {1} ".format( delay, rate), xlabel="time step / simulation_timestep{0}".format( simulation_timestep), ylabel="Infectiousness", category='Infectiousness', show=True, line=True) outfile.write(dtk_sft.format_success_msg(success)) return success
def create_report_file(data): report_name = data[0] lines = data[1] tb_cd4_activation_vector = data[ 2] # this test assumes the vector is constant # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than # what StartedArt distribution would extremely likely give us big_magic_number = 2000 stopped_art_latency_data = [] started_art_latency_data = [] art_events_dict = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if "has event" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) art_status = line.split(" ")[9].strip( ".") # get_val only gets digits art_events_dict[ind_id] = art_status if "LifeCourseLatencyTimerUpdate" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) new_incubation_timer = float(dtk_sft.get_val("timer ", line)) if ind_id in art_events_dict.keys(): if art_events_dict.get(ind_id) == "StartedART": started_art_latency_data.append(new_incubation_timer) else: stopped_art_latency_data.append(new_incubation_timer) art_events_dict.pop(ind_id) else: success = False outfile.write( "BAD: No art-related event found in the logs for this timer update for Individual {}," " at time {}.\n".format( ind_id, int(dtk_sft.get_val("time= ", line)))) # we want the stopped art latency data to NOT match the started art latency data # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector if dtk_sft.test_exponential(stopped_art_latency_data, tb_cd4_activation_vector[2], integers=False, roundup=False, round_nearest=False): outfile.write( "BAD: The StoppedArt latency data distribution matches the StartedArt latency data" " distribution, but shouldn't.\n") success = False expected_stopped_art_data = np.random.exponential( 1 / tb_cd4_activation_vector[0], len(stopped_art_latency_data)) small_duration_count = 0 for duration in stopped_art_latency_data: if duration < big_magic_number: small_duration_count += 1 proportion_small = small_duration_count / float( len(stopped_art_latency_data)) if proportion_small > 0.01: outfile.write( "BAD: More than 0.5% of our durations are suspiciously small, it is {}. " "Please Investigate.\n".format(proportion_small)) success = False outfile.write("Data points checked = {}.\n".format( len(stopped_art_latency_data))) outfile.write("SUMMARY: Success={0}\n".format(success)) dtk_sft.plot_data( sorted(stopped_art_latency_data), sorted(expected_stopped_art_data), label1="Actual", label2="Expected", title= "StoppedART Latency data should have a similar shape/scale of duration but will not " "match", xlabel="Data Points", ylabel="Days", category="tb_activation_and_cd4_hiv_first_on_art_off_art", line=True, overlap=True)
def create_report_file(param_obj, campaign_obj, output_dict, report_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True sensitivity = campaign_obj[KEY_BASE_SENSITIVITY] treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION] prob = sensitivity * treatment_fraction binomial_test_count = 0 positive = [] total = [] if not len(report_dict): success = False outfile.write(dtk_sft.sft_no_test_data) for t in report_dict: num_success = report_dict[t][KEY_POSITIVE] num_trials = output_dict[t][KEY_POSITIVE_HUMAN] + output_dict[t][ KEY_NEGATIVE_HUMAN] positive.append(num_success) total.append(num_trials) # the logging includes positive, negative and default, please see issue #2279 if num_trials * prob < 5 or num_trials * (1 - prob) < 5: outfile.write( "At timestep {0}, there is not enough sample size : mean = {1}, sample size - mean = {2}" ".\n".format(t, num_trials * prob, num_trials * (1 - prob))) else: result = dtk_sft.test_binomial_95ci( num_success, num_trials, prob, report_file=outfile, category="TB test positive") outfile.write( "At timestep {0}, the binomial 95% test result is {1}.\n". format(t, result)) binomial_test_count += 1 if not result: success = False if not binomial_test_count: success = False outfile.write( "BAD: There is not enough sample size for binomial test in every time step, please fix the test.\n" ) dtk_sft.plot_data( positive, dist2=total, label1="TBTestPositive", label2="Total tested", title="Test positive vs. total, positive proportion = {}".format( prob), xlabel="time step", ylabel="# of individuals", category='Test_positive_vs_total', show=True, line=False) # TODO: write test to check if report matches debug logging. Pending on #2279. May not need this part. outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(data, debug=False): report_name = data[0] lines = data[1] tb_cd4_activation_vector = data[ 2] # this test assumes the vector is constant latency_data = {} duration_data = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if "InitializeLatentInfection" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) start_time_stamp = int(dtk_sft.get_val("time= ", line)) if ind_id in latency_data.keys(): outfile.write( "Individual {} incubation timer reset at time {}. Please check. " "\n".format(ind_id, start_time_stamp)) latency_data[ind_id] = start_time_stamp elif "TBActivationPresymptomatic" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) end_time_stamp = int(dtk_sft.get_val("time= ", line)) if ind_id not in latency_data.keys(): outfile.write( "Individual {} went presymptomatic without incubation timer update at time {}. " "Please check. \n".format(ind_id, end_time_stamp)) else: duration = end_time_stamp - latency_data.get(ind_id) duration_data[ind_id] = duration if debug: with open("DEBUG_duration_data.json", "w") as debug_outfile: json.dump(duration_data, debug_outfile, indent=4) durations = list(duration_data.values()) # expected_data here only used for graphing purposes expected_data = [ int(x + 1) for x in np.random.exponential( 1 / tb_cd4_activation_vector[0], len(duration_data)) ] success = dtk_sft.test_exponential(durations, tb_cd4_activation_vector[0], outfile, integers=True, roundup=True, round_nearest=False) outfile.write("Data points checked = {}.\n".format(len(duration_data))) outfile.write("SUMMARY: Success={0}\n".format(success)) dtk_sft.plot_data(sorted(durations), sorted(expected_data), label1="Actual", label2="Expected", title="Latency Duration HIV then TB (Sorted)", xlabel="Data Points", ylabel="Days", category="tb_activation_and_cd4_hiv_first", line=True, overlap=True)
def create_report_file(param_obj, campaign_obj, output_dict, report_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True sensitivity = campaign_obj[KEY_BASE_SENSITIVITY] treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION] proportions = [ sensitivity * treatment_fraction, (1.0 - sensitivity) * treatment_fraction, 1.0 - treatment_fraction ] positive = [] negative = [] default = [] total = [] failed_timestep = [] if not len(report_dict): success = False outfile.write(dtk_sft.sft_no_test_data) for t in report_dict: value_to_test = [ report_dict[t][KEY_POSITIVE], report_dict[t][KEY_NEGATIVE], report_dict[t][KEY_DEFAULT] ] positive.append(report_dict[t][KEY_POSITIVE]) negative.append(report_dict[t][KEY_NEGATIVE]) default.append(report_dict[t][KEY_DEFAULT]) total.append(sum(value_to_test)) outfile.write("Run Chi-squared test at time step {}.\n".format(t)) result = dtk_sft.test_multinomial(dist=value_to_test, proportions=proportions, report_file=outfile) if not result: failed_timestep.append(t) outfile.write( "Warning: At timestep {0}, the Chi-squared test failed.\n". format(t)) if len(failed_timestep) > math.ceil(0.05 * len(report_dict)): success = False outfile.write( "BAD: the Chi-squared test failed at timestep {0}.\n".format( ', '.join(str(x) for x in failed_timestep))) else: outfile.write( "GOOD: the Chi-squared test failed {} times, less than 5% of the total timestep.\n" .format(len(failed_timestep))) dtk_sft.plot_data( positive, dist2=total, label1="TBTestPositive", label2="Total tested", title="Test positive vs. total, positive proportion = {}".format( sensitivity * treatment_fraction), xlabel="time step", ylabel="# of individuals", category='Test_positive_vs_total', show=True, line=False) dtk_sft.plot_data( negative, dist2=total, label1="TBTestNegative", label2="Total tested", title="Test negative vs. total, negative proportion = {}".format( (1.0 - sensitivity) * treatment_fraction), xlabel="time step", ylabel="# of individuals", category='Test_negative_vs_total', show=True, line=False) dtk_sft.plot_data( default, dist2=total, label1="TBTestDefault", label2="Total tested", title="Test default vs. total, default proportion = {}".format( 1.0 - treatment_fraction), xlabel="time step", ylabel="# of individuals", category='Test_default_vs_total', show=True, line=False) # TODO: write test to check if report matches debug logging. Pending on #2279. May not need this part. outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(Resistances, initial_resistances, drug_start_time, param_obj, report_name, inset_days, debug): with open(report_name, "w") as outfile: starting_pop = inset_days[0][ dts.InsetChart.Channels.KEY_StatisticalPopulation] # success = sft.test_binomial_95ci( initial_resistances, starting_pop, param_obj["TB_Drug_Resistance_Rate_HIV"], outfile, "???" ) success = True progression = [] bad_msgs = [] for x in range(len(inset_days)): inset_day = inset_days[x] inset_mdr_prevalence = inset_day[ dts.InsetChart.Channels.KEY_MdrTbPrevalence] stdout_resistants = Resistances[x] if x >= drug_start_time: progression.append(stdout_resistants) if debug: outfile.write("Day: {0}\n".format(x)) outfile.write(str(inset_day) + "\n") outfile.write( "StdOut resistants: {0}\n".format(stdout_resistants)) stdout_predicted_prevalence = stdout_resistants / float( inset_day[dts.InsetChart.Channels.KEY_StatisticalPopulation]) if abs(inset_mdr_prevalence - stdout_predicted_prevalence) > 0.03: bad_msgs.append( "BAD: at timestep {0}, expected MDR prevalence: {1}, InsetChart had: {2}\n" .format(x, stdout_predicted_prevalence, inset_mdr_prevalence)) tb_drug_resistance_rate_hiv = param_obj["TB_Drug_Resistance_Rate_HIV"] new_resistances = [] pre_resistance = 0 failed_count = 0 total_test = 0 for x in range(drug_start_time + 1, len(Resistances)): resistance = Resistances[x] new_resistance = resistance - pre_resistance pre_resistance = resistance new_resistances.append(new_resistance) expected_mean = (starting_pop - resistance) * tb_drug_resistance_rate_hiv total_test += 1 if expected_mean >= 5: # advoid failing with too small mean result = sft.test_binomial_99ci( new_resistance, starting_pop - resistance, tb_drug_resistance_rate_hiv, outfile, category="time step {}".format(x + 1)) if not result: failed_count += 1 outfile.write( "Warning: New Resistance test fails for rate = {0} at time step {1}.\n" .format(tb_drug_resistance_rate_hiv, x + 1)) else: error_tolerance = 3 * math.sqrt( tb_drug_resistance_rate_hiv * (1 - tb_drug_resistance_rate_hiv) * (starting_pop - resistance)) # 3 sigma result = math.fabs(new_resistance - expected_mean) <= error_tolerance if not result: failed_count += 1 outfile.write( "Warning: New Resistance test fails for rate = {0} at time step {1}, " "new resistance = {2}, expected mean = {3}, error tolerance = {4}.\n" .format(tb_drug_resistance_rate_hiv, x + 1, new_resistance, expected_mean, error_tolerance)) if failed_count > math.ceil(total_test * 0.01): success = False outfile.write( "BAD: test failed {0} times out of {1} timestep, please check the warning message.\n" "".format(failed_count, total_test)) if debug: sft.plot_data(new_resistances, title="new resistance over time", category="new_resistance", show=True) series = sft.create_geometric_dis( param_obj["TB_Drug_Resistance_Rate_HIV"], starting_pop, len(progression), test_decay=False) sft.plot_data(progression, series, label1="progression", label2="geomatric dis", xlabel="days", ylabel="resistance", title="progression vs geomatric", category="progression_vs_geomatric", show=True, line=True) sft.plot_cdf(progression, series, label1="progression", label2="geomatric dis", title="progression vs geomatric cdf", category="progression_vs_geomatric_cdf", show=True) # success = sft.test_geometric_decay(progression, param_obj["TB_Drug_Resistance_Rate_HIV"], starting_pop, test_decay=False, report_file=outfile, debug=debug) if len(bad_msgs) > 0: success = False outfile.writelines(bad_msgs) outfile.write(sft.format_success_msg(success))
def create_report_file(param_obj, multipliers, infectiousness, report_name, debug): with open(report_name, "w") as outfile: success = True if not multipliers: outfile.write(sft.sft_no_test_data) sigma = param_obj[Param_keys.LOGNORMAL_SCALE] base_infectivity = param_obj[Param_keys.BASE_INFECTIVITY] if sigma > 0: mu = -sigma**2 / 2.0 # test log_normal distribution success = sft.test_lognorm(multipliers, mu=mu, sigma=sigma, report_file=outfile, round=False) # test mean_l = 1 mean_l = np.mean(multipliers) mean_infectiousness = np.mean(infectiousness) outfile.write( "mean of the multipliers is {}, expected 1.0.\n".format( mean_l)) outfile.write( "mean of the Infectiousness is {0}, while base infectivity is {1}.\n" .format(mean_infectiousness, base_infectivity)) tolerance = 2e-2 if math.fabs(mean_l - 1.0) > tolerance: outfile.write( "BAD: mean of the multipliers is {}, expected 1.0.\n". format(mean_l)) success = False # plotting size = len(multipliers) outfile.write("size is {}\n".format(size)) scale = math.exp(mu) dist_lognormal = stats.lognorm.rvs(sigma, 0, scale, size) sft.plot_data(multipliers, dist_lognormal, label1="Emod", label2="Scipy", ylabel="Multiplier", xlabel="data point", category="Emod_vs_Scipy", title="Emod_vs_Scipy, sigma = {}".format(sigma), show=True) sft.plot_probability( multipliers, dist_lognormal, precision=1, label1="Emod", label2="Scipy", category="Probability_mass_function_Emod_vs_Scipy", title="Emod_vs_Scipy, sigma = {}".format(sigma), show=True) sft.plot_cdf(multipliers, dist_lognormal, label1="Emod", label2="Scipy", category="cdf", title="cdf, sigma = {}".format(sigma), show=True, line=False) if debug: with open("scipy_data.txt", "w") as file: for n in sorted(dist_lognormal): file.write(str(n) + "\n") with open("emod_data.txt", "w") as file: for n in sorted(multipliers): file.write(str(n) + "\n") else: # sigma = 0, this feature is disabled for multiplier in multipliers: if multiplier != 1.0: success = False outfile.write( "BAD: multiplier is {0} when {1} set to {2}, expected 1.0.\n" .format(multiplier, Param_keys.LOGNORMAL_SCALE, sigma)) # plotting sft.plot_data(multipliers, label1="Multiplier", label2="NA", category="Multiplier", title="Multiplier_Sigma={}".format(sigma), ylabel="Multiplier", xlabel="data point", show=True) sft.plot_data( infectiousness, label1="Infectiousness", label2="NA", category="Infectiousness", title="Infectiousness_Sigma={0}_BaseInfectivity={1}".format( sigma, base_infectivity), ylabel="Infectiousness", xlabel="data point", show=True) outfile.write(sft.format_success_msg(success)) if debug: print "SUMMARY: Success={0}\n".format(success) return success
def create_report_file(param_obj, output_df, report_df, report_name, debug): total_timesteps = int(param_obj[KEY_TOTAL_TIMESTEPS]) simulation_timestep = float(param_obj[KEY_SIMULATION_TIMESTEP]) base_infectivity = float(param_obj[KEY_BASE_INFECTIVITY]) amplitude = float(param_obj[KEY_AMPLITUDE]) phase = float(param_obj[KEY_PHASE]) infected = output_df[KEY_INFECTED] infectiousness = output_df[KEY_INFECTIOUSNESS] statpop = output_df[KEY_STAT_POP] new_infections = report_df[KEY_NEW_INFECTIONS] if debug: dtk_sft.plot_data( new_infections, label1="new infections", label2="NA", title="Phase: {0} day, amplitude: {1}, base_infectivity: {2}". format(phase, amplitude, base_infectivity), xlabel="Time_Step_{0}_Days".format(simulation_timestep), ylabel=None, category='New_infections', show=True, line=True) with open(report_name, "w") as outfile: expected_infectiousness = [] for index in range(len(infected)): infected_pop = int(infected[index]) expected_infectiousness.append( calculate_infectiousness(infected_pop, index, simulation_timestep, phase, base_infectivity, amplitude, debug)) success = True actual_infectiousness_all = [] calc_infectiousness_all = [] for index in range(len(infectiousness)): timestep = index * simulation_timestep actual_infectiousness = float(infectiousness[index]) calc_infectiousness = expected_infectiousness[index] / float( statpop[index]) actual_infectiousness_all.append(actual_infectiousness) calc_infectiousness_all.append(calc_infectiousness) tolerance = 0 if calc_infectiousness == 0 else 5e-2 * calc_infectiousness if math.fabs(actual_infectiousness - calc_infectiousness) > tolerance: success = False outfile.write( "BAD: actual infectiousness at time step {0} is {1}, expected {2}.\n" .format(timestep, actual_infectiousness, calc_infectiousness)) outfile.write(dtk_sft.format_success_msg(success)) dtk_sft.plot_data( actual_infectiousness_all, calc_infectiousness_all, label1="actual infectiousness", label2="calc infectiousness", title="Phase: {0} day, amplitude: {1}, base_infectivity: {2}".format( phase, amplitude, base_infectivity), xlabel="Time_Step_{0}_Days".format(simulation_timestep), ylabel="Infectiousness", category='Infectiousness', show=True, line=True) return success
def create_report_file(drug_start_timestep, inactivation_times, active_count, inactivations, drug_inactivation_rate, report_name, debug = False): with open(report_name, "w") as outfile: success = True # ks exponential test doesn't work very well with large rate, use chi squared test instead. # while rate is small ks test for exponential distribution is more sensitive to catch the difference if drug_inactivation_rate < 0.1: outfile.write( "Testing inactivation times as draws from exponential distrib with rate {0}. " "Dataset size = {1}.\n".format( drug_inactivation_rate, len( inactivation_times ) ) ) success = dtk_sft.test_exponential( inactivation_times, drug_inactivation_rate, outfile, integers=True, roundup=True, round_nearest=False ) if not success: outfile.write("BAD: ks test for rate {} is False.\n".format(drug_inactivation_rate)) size = len(inactivation_times) scale = 1.0 / drug_inactivation_rate dist_exponential_np = numpy.random.exponential(scale, size) dist_exponential_np = [math.ceil(x) for x in dist_exponential_np] dtk_sft.plot_data_sorted(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_actual_vs_numpy", xlabel="data points", ylabel="Inactivation times", category="inactivation_times", show = True, line = True, overlap=True) dtk_sft.plot_cdf(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_cdf", xlabel="days", ylabel="probability", category="inactivation_times_cdf", show = True) dtk_sft.plot_probability(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_pdf", xlabel="days", ylabel="probability", category="inactivation_times_pdf", show = True) else: outfile.write("Testing inactivation count per day with rate {0}. \n".format( drug_inactivation_rate) ) expected_inactivation = [] for t in range( len(inactivations)): if t < drug_start_timestep : if inactivations[t] > 0: success = False outfile.write("BAD: expected no inactivations on drugs before day {0}, get {1} cases at timestep {2}.\n" "".format(drug_start_timestep , inactivations[t], t)) elif active_count[t] > 0: expected_inactivation.append(drug_inactivation_rate * active_count[t]) if len(inactivations) <= len(expected_inactivation) + drug_start_timestep: test_inactivation_dates = inactivations[drug_start_timestep+1:] expected_inactivation = expected_inactivation[:len(test_inactivation_dates)] else: test_inactivation_dates = inactivations[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_inactivation)] #print (len(inactivations), len(test_inactivation_dates), len(expected_inactivation)) #print (test_inactivation_dates, expected_inactivation) dtk_sft.plot_data(test_inactivation_dates, expected_inactivation, label1="actual inactivation", label2="expected inactivation", title="inactivation per day", xlabel="date after drug start day", ylabel="inactivation per day", category="inactivation_counts", show=True, line=True, overlap=True, sort=False) chi_result = dtk_sft.test_multinomial(dist=test_inactivation_dates, proportions=expected_inactivation, report_file=outfile, prob_flag=False) if not chi_result: success = False outfile.write("BAD: Chi-squared test reuslt is False.\n") outfile.write(dtk_sft.format_success_msg(success)) if debug: print(dtk_sft.format_success_msg(success)) return success
def create_report_file(param_obj, campaign_obj, output_dict, report_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True specificity = campaign_obj[KEY_BASE_SPECIFICITY] treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION] prob = (1.0 - specificity) * treatment_fraction binomial_test_count = 0 positive = [] total = [] if not len(report_dict): success = False outfile.write(dtk_sft.sft_no_test_data) for t in report_dict: num_success = report_dict[t][KEY_POSITIVE] num_trials = report_dict[t][KEY_NEGATIVE] + num_success positive.append(num_success) total.append(num_trials) if num_trials * prob < 5 or num_trials * (1 - prob) < 5: outfile.write( "At timestep {0}, there is not enough sample size : mean = {1}, sample size - mean = {2}" ".\n".format(t, num_trials * prob, num_trials * (1 - prob))) else: result = dtk_sft.test_binomial_95ci(num_success, num_trials, prob, report_file=outfile, category="TB positive") outfile.write( "At timestep {0}, the binomial 95% test result is {1}.\n". format(t, result)) binomial_test_count += 1 if not result: success = False if not binomial_test_count: success = False outfile.write( "BAD: There is not enough sample size for binomial test in every time step, please fix the test.\n" ) dtk_sft.plot_data( positive, dist2=total, label1="TBTestPositive", label2="Total tested", title="Test positive vs. total, positive proportion = {}".format( prob), xlabel="time step", ylabel="# of individuals", category='Test_positive_vs_total', show=True, line=False) # When Treatment_fraction is set to 1, the report should match debug log. Here is the test for it: for t in output_dict: log_positive = output_dict[t][KEY_POSITIVE] log_negative = output_dict[t][KEY_NEGATIVE] match_error = 0 if log_negative or log_positive: report_positive = report_dict[t][KEY_POSITIVE] report_negative = report_dict[t][KEY_NEGATIVE] if report_positive != log_positive: match_error += 1 success = False outfile.write( "BAD: at time step {0} the TBTestPositive is {1} from ReportEventRecorder.csv and {2} from" "debug logging.\n".format(t, report_positive, log_positive)) if report_negative != log_negative: match_error += 1 success = False outfile.write( "BAD: at time step {0} the TBTestNegative is {1} from ReportEventRecorder.csv and {2} from" "debug logging.\n".format(t, report_negative, log_negative)) else: if t in report_dict: report_positive = report_dict[t][KEY_POSITIVE] report_negative = report_dict[t][KEY_NEGATIVE] match_error += 1 success = False outfile.write( "BAD: at time step {0} the TBTestPositive and TBTestNegative are {1} and {2} from " "ReportEventRecorder.csv and {2} and {3} from debug logging. They should be matched\n" "".format(t, report_positive, report_negative, log_positive, log_negative)) if not match_error: outfile.write( "GOOD: The ReportEventRecorder.csv matches the debug logging.\n" ) else: outfile.write( "BAD: The ReportEventRecorder.csv doesn't match the debug logging.\n" ) outfile.write(dtk_sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(report_data_obj, report_name, debug): with open(report_name, "w") as outfile: success = True timestep = Outbreak_Start_Day tb_effects = calc_tb_effect(debug) tb_effect_baseline = float( tb_effects[0] ) # use the number of new infection from the 1st outbreak as a baseline new_infection_baseline = report_data_obj[ KEY_NEW_INFECTIONS_GROUP[1]][timestep] statistical_population = report_data_obj[ KEY_STATISTICAL_POPULATION_GROUP[1]][timestep] # no any death new_infections = [] expected_new_infections = [] new_infections.append(new_infection_baseline) expected_new_infections.append(new_infection_baseline) actual_tb_effects = [] actual_tb_effects.append(tb_effect_baseline) for i in range(1, len(Interventions)): # no need to test the 1st outbreak timestep += Timesteps_Between_Repetitions new_infection = report_data_obj[ KEY_NEW_INFECTIONS_GROUP[1]][timestep] tb_effect = tb_effects[i] # because expected_new_infection / (1.0 - tb_effect) = new_infection_baseline / (1.0- tb_effect_baseline), so expected_new_infection = (1.0 - tb_effect) * new_infection_baseline / ( 1.0 - tb_effect_baseline) tolerance = 0.0 if expected_new_infection == 0.0 else 2e-2 * statistical_population actual_tb_effect = 1.0 - (new_infection * (1.0 - tb_effect_baseline) / new_infection_baseline) if math.fabs(new_infection - expected_new_infection) > tolerance: success = False outfile.write( "BAD: At time step {0}, outbreak {1}, {2} reported new infections, expected {3}.\n" .format(timestep, Interventions[i], new_infection, expected_new_infection)) outfile.write( "actual TransmissionBlocking effect is {0}, expected {1}.\n" .format(actual_tb_effect, tb_effect)) new_infections.append(new_infection) expected_new_infections.append(expected_new_infection) actual_tb_effects.append(actual_tb_effect) sft.plot_data(new_infections, expected_new_infections, label1="Actual", label2="Expected", xlabel="outbreak", ylabel="new infection", title="Actual new infection vs. expected new infection", category='New_infections', show=True) if debug: with open("New_infections.txt", "w") as file: for i in range(len(new_infections)): file.write("{0}, {1}.\n".format( new_infections[i], expected_new_infections[i])) with open("Effects.txt", "w") as file: for i in range(len(actual_tb_effects)): file.write("{0}, {1}.\n".format(actual_tb_effects[i], tb_effects[i])) outfile.write(sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(param_obj, campaign_obj, demographics_obj, report_data_obj, report_name, debug): with open(report_name, "w") as outfile: success = True total_timesteps = param_obj[KEY_TOTAL_TIMESTEPS] start_timestep = param_obj[KEY_START_TIME] initial_population = demographics_obj[KEY_INITIAL_POPULATION] rates = campaign_obj[KEY_CAMPAIGN_DIP] durations = campaign_obj[KEY_CAMPAIGN_DURATIONS] if not report_data_obj: # todo: maybe use try success = False outfile.write("BAD: There is no data in the InsetChart report") else: new_infections = report_data_obj[KEY_NEW_INFECTIONS] statistical_population = report_data_obj[ KEY_STATISTICAL_POPULATION] length = len(rates) start_duration = start_timestep new_infections_dict = {} calculate_new_population = initial_population for i in range(length): rate = rates[i] duration = durations[i] calculate_new_population = rate * duration + calculate_new_population end_duration = duration + start_duration if rate not in new_infections_dict: new_infections_dict[rate] = [] for j in range(start_duration + 1, end_duration + 1): if j < total_timesteps + start_timestep: new_infections_dict[rate].append(new_infections[j]) j += 1 else: break if end_duration > total_timesteps + start_timestep: calculate_new_population -= rate * ( end_duration - total_timesteps - start_timestep) break start_duration = end_duration if end_duration < total_timesteps + start_timestep: rate = 0.0 if rate not in new_infections_dict: new_infections_dict[rate] = [] for j in range(end_duration + 1, len(new_infections)): new_infections_dict[rate].append(new_infections[j]) with open("new_infections_parsed.json", "w") as file: json.dump(new_infections_dict, file, indent=4) # test statistical population channel diff_population = math.fabs(calculate_new_population - statistical_population[-1]) if debug: print( "calculated population is {0}, statistical population " \ "from InsetChart is {1}.".format(calculate_new_population, statistical_population[-1]) ) error_tolerance = math.fabs(calculate_new_population - initial_population) * 0.1 if debug: print("diff_population is {0}, error_tolerance is {1}".format( diff_population, error_tolerance)) if diff_population > error_tolerance: success = False outfile.write( "BAD: statistical population is {0}, expected about {1}.\n" .format(statistical_population[-1], calculate_new_population)) # test poisson distribution for new infections for rate in new_infections_dict: dist = new_infections_dict[rate] title = "rate = " + str(rate) result = sft.test_poisson(dist, rate, route=title, report_file=outfile, normal_approximation=False) # print result, rate, len(dist) if not result: success = False outfile.write( "BAD: ks poisson test for {0} is {1}.\n".format( title, result)) numpy_distro = np.random.poisson(rate, len(dist)) sft.plot_data( dist, numpy_distro, title="new infections for {}".format(title), label1="new infection from model, {}".format(title), label2="Poisson distro from numpy", xlabel="data points", ylabel="new infection", category="plot_data_{0}".format(title), show=True) sft.plot_probability( dist, numpy_distro, title="probability mass function for {}".format(title), label1="new infection probability from model", label2="new infection probability from numpy distro", category="plot_probability_{0}".format(title), show=True) outfile.write(sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(param_obj, campaign_obj, demographics_obj, report_data_obj, report_name, debug): with open(report_name, "w") as outfile: success = True total_timesteps = param_obj[KEY_TOTAL_TIMESTEPS] start_timestep = param_obj[KEY_START_TIME] initial_population = demographics_obj[KEY_INITIAL_POPULATION] rates = campaign_obj[KEY_CAMPAIGN_DIP] durations = campaign_obj[KEY_CAMPAIGN_DURATIONS] if not report_data_obj: # todo: maybe use try success = False outfile.write("BAD: There is no data in the InsetChart report") else: new_infections = report_data_obj[KEY_NEW_INFECTIONS] statistical_population = report_data_obj[KEY_STATISTICAL_POPULATION] length = len(rates) start_duration = start_timestep new_infections_dict = {} calculate_new_population = initial_population for i in range(length): rate = rates[i] duration = durations[i] calculate_new_population = rate * duration + calculate_new_population end_duration = duration + start_duration if rate not in new_infections_dict: new_infections_dict[rate] = [] for j in range(start_duration + 1, end_duration + 1): if j < total_timesteps + start_timestep: new_infections_dict[rate].append(new_infections[j]) j += 1 else: break if end_duration > total_timesteps + start_timestep: calculate_new_population -= rate * (end_duration - total_timesteps - start_timestep) break start_duration = end_duration if end_duration < total_timesteps + start_timestep: rate = 0.0 if rate not in new_infections_dict: new_infections_dict[rate] = [] for j in range(end_duration + 1, len(new_infections)): new_infections_dict[rate].append(new_infections[j]) with open("new_infections_parsed.json","w") as file: json.dump(new_infections_dict, file, indent = 4) # test statistical population channel diff_population = math.fabs(calculate_new_population - statistical_population[-1]) if debug: print "calculated population is {0}, statistical population " \ "from InsetChart is {1}.".format(calculate_new_population, statistical_population[-1]) error_tolerance = math.fabs(calculate_new_population - initial_population)* 0.1 if debug: print "diff_population is {0}, error_tolerance is {1}".format(diff_population, error_tolerance) if diff_population > error_tolerance: success = False outfile.write("BAD: statistical population is {0}, expected about {1}.\n".format(statistical_population[-1], calculate_new_population)) # test poisson distribution for new infections for rate in new_infections_dict: dist = new_infections_dict[rate] title = "rate = " + str(rate) result = sft.test_poisson(dist, rate, route = title, report_file = outfile, normal_approximation = False) # print result, rate, len(dist) if not result: success = False outfile.write("BAD: ks poisson test for {0} is {1}.\n".format(title, result)) numpy_distro = np.random.poisson(rate, len(dist)) sft.plot_data(dist, sorted(numpy_distro), title="new infections for {}".format(title), label1="new infection from model, {}".format(title), label2="Poisson distro from numpy", xlabel="data points", ylabel="new infection", category="plot_data_{0}".format(title), show=True) sft.plot_probability(dist, numpy_distro, title="probability mass function for {}".format(title), label1="new infection probability from model", label2="new infection probability from numpy distro", category="plot_probability_{0}".format(title), show=True) outfile.write(sft.format_success_msg(success)) if debug: print "SUMMARY: Success={0}\n".format(success) return success
def create_report_file(data): report_name = data[0] lines = data[1] tb_cd4_activation_vector = data[2] # this test assumes the vector is constant # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than # what StartedArt distribution would extremely likely give us big_magic_number = 2000 stopped_art_latency_data = [] started_art_latency_data = [] tb_on_art_latency_data = [] art_events_dict = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if "has event" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) art_status = line.split(" ")[9].strip(".") # get_val only gets digits art_events_dict[ind_id] = art_status if "Incubation_timer calculated as" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) infection_timer = float(dtk_sft.get_val("calculated as ", line)) reconstitute = int(dtk_sft.get_val("reconstitute=", line)) if reconstitute: # ignore people who are not reconstituting. tb_on_art_latency_data.append(infection_timer) if "LifeCourseLatencyTimerUpdate" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) new_incubation_timer = float(dtk_sft.get_val("timer ", line)) if ind_id in art_events_dict.keys(): if art_events_dict.get(ind_id) == "StartedART": # we ignore this for this test, people are already on art when they get TB started_art_latency_data.append(new_incubation_timer) else: stopped_art_latency_data.append(new_incubation_timer) art_events_dict.pop(ind_id) else: success = False outfile.write("BAD: No art-related event found in the logs for this timer update for Individual {}," " at time {}.\n".format(ind_id, int(dtk_sft.get_val("time= ", line)))) # we want the stopped art latency data to NOT match the started art latency data # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector if dtk_sft.test_exponential(stopped_art_latency_data, tb_cd4_activation_vector[2], integers=True, roundup=True, round_nearest=False): outfile.write("BAD: The StoppedArt latency data distribution matches the initial latency data" " distribution, but shouldn't.\n") success = False small_duration_count = 0 for duration in stopped_art_latency_data: if duration < big_magic_number: small_duration_count += 1 proportion_small = small_duration_count / float(len(stopped_art_latency_data)) if proportion_small > 0.006: outfile.write("BAD: More than 0.006 of our durations are suspiciously small, it is {}. " "Please Investigate.\n".format(proportion_small)) success = False if not dtk_sft.test_exponential(tb_on_art_latency_data, tb_cd4_activation_vector[2], outfile, integers=False, roundup=False, round_nearest=False): # this is testing the internal timer which is float type # so 'integers=False' success = False outfile.write("BAD: Initial TB infection (with HIV and ART) latency doesn't match expected distribution.") outfile.write("Data points checked = {}." "\n".format(len(tb_on_art_latency_data), 0)) outfile.write("SUMMARY: Success={0}\n".format(success)) # for graphing purposes only expected_tb_on_art_latency_data = np.random.exponential(1/tb_cd4_activation_vector[2], len(tb_on_art_latency_data)) dtk_sft.plot_data(sorted(tb_on_art_latency_data), sorted(expected_tb_on_art_latency_data), label1="Actual", label2="Expected", title="HIV+ART then TB latency data", xlabel="Data Points", ylabel="Days", category="tb_activation_and_cd4_hiv_art_tb_offart", line = True, overlap=True)
def create_report_file(drug_start_timestep, disease_deaths, cum_deaths, deaths, infected_individuals, death_times, drug_mortality_rate_HIV, report_name ): with open(report_name, "w") as outfile: success = True length = len(cum_deaths) if sum(disease_deaths)==0 or sum(cum_deaths)==0 or len(death_times)==0: success = False outfile.write(dtk_sft.no_test_data) for x in range(length): if disease_deaths[x] != cum_deaths[x]: success = False outfile.write("BAD: at timestep {0}, disease deaths is {1} in InsetChart.json and {2} in stdout.txt.\n".format(x+1, disease_deaths[x], cum_deaths[x])) # ks exponential test doesn't work very well with large rate, use chi squared test instead # while rate is small ks test for exponential distribution is more sensitive to catch the difference if drug_mortality_rate_HIV < 0.1: outfile.write("Testing death times as draws from exponential distrib with rate {0}. " "Dataset size = {1}.\n".format(drug_mortality_rate_HIV, len(death_times))) ks_result = dtk_sft.test_exponential( death_times, drug_mortality_rate_HIV, report_file = outfile, integers=True, roundup=True, round_nearest=False ) if not ks_result: success = False outfile.write("BAD: ks test reuslt is False.\n") size = len(death_times) scale = 1.0 / drug_mortality_rate_HIV dist_exponential_np = numpy.random.exponential(scale, size) dist_exponential_np = [math.ceil(x) for x in dist_exponential_np] dtk_sft.plot_data_sorted(death_times, dist_exponential_np, label1="death times", label2="numpy data", title="death_times_actual_vs_numpy", xlabel="data points", ylabel="death times", category="death_times", show=True, line = True, overlap=True) dtk_sft.plot_cdf(death_times, dist_exponential_np, label1="death times", label2="numpy data", title="death_times_cdf", xlabel="days", ylabel="probability", category="death_times_cdf", show=True) else: outfile.write("Testing death count per day with rate {0}. \n".format(drug_mortality_rate_HIV)) expected_mortality = [] for t in range( len(deaths)): if t < drug_start_timestep + 1: if deaths[t] > 0: success = False outfile.write("BAD: expected no disease death on drugs before day {0}, get {1} cases at timestep {2}.\n" "".format(drug_start_timestep + 1, deaths[t], t)) elif infected_individuals[t] > 0: expected_mortality.append(drug_mortality_rate_HIV * infected_individuals[t]) expected_mortality.pop(0) # the Infected is off by one day test_death_dates = deaths[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_mortality)] dtk_sft.plot_data(test_death_dates, expected_mortality, label1="actual death", label2="expected death", title="death per day", xlabel="date after drug start day", ylabel="death per day", category="death_counts", show=True, line=True, overlap=True, sort=False) chi_result = dtk_sft.test_multinomial(dist=test_death_dates, proportions=expected_mortality, report_file=outfile, prob_flag=False) if not chi_result: success = False outfile.write("BAD: Chi-squared test reuslt is False.\n") outfile.write(dtk_sft.format_success_msg(success)) return success