def create_report_file(data): report_name = data[0] lines = data[1] tb_cd4_activation_vector = data[ 2] # this test assumes the vector is constant latency_update_data = [] original_latency_data = [] success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if "Incubation_timer calculated as" in line: incubation_timer = float(sft.get_val("as ", line)) original_latency_data.append(incubation_timer) if "LifeCourseLatencyTimerUpdate" in line: new_incubation_timer = float(sft.get_val("timer ", line)) latency_update_data.append(new_incubation_timer) # expecting the original distribution to NOT match the art-triggered update distribution if sft.test_exponential(original_latency_data, tb_cd4_activation_vector[2], integers=True, roundup=True, round_nearest=False): outfile.write( "BAD: The updated latency data matches the original distribution.\n" ) success = False expected_update_data = np.random.exponential( 1 / tb_cd4_activation_vector[2], len(latency_update_data)) if not sft.test_exponential(latency_update_data, tb_cd4_activation_vector[2], outfile, integers=True, roundup=True, round_nearest=False): # as it should fail , success = bad. outfile.write( "BAD: The updated latency data does not match the expected distribution.\n" ) success = False outfile.write("Data points checked = {}.\n".format( len(latency_update_data))) outfile.write("SUMMARY: Success={0}\n".format(success)) sft.plot_data(sorted(latency_update_data), sorted(expected_update_data), label1="Actual", label2="Expected", title="Latency Duration recalculated for ART", xlabel="Data Points", ylabel="Days", category="tb_activation_and_cd4_hiv_first_on_art", line=True, overlap=True)
def create_report_file(data, debug=False): report_name = data[0] lines = data[1] tb_cd4_activation_vector = data[2] # this test assumes the vector is constant latency_data = {} duration_data = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if "LifeCourseLatencyTimerUpdate" in line: ind_id = int(sft.get_val("Individual ", line)) start_time_stamp = int(sft.get_val("time= ", line)) if ind_id in latency_data.keys(): outfile.write("Individual {} incubation timer reset at time {}. Please check. " "\n".format(ind_id, start_time_stamp)) latency_data[ind_id] = start_time_stamp elif "TBActivationPresymptomatic" in line: ind_id = int(sft.get_val("Individual ", line)) end_time_stamp = int(sft.get_val("time= ", line)) if ind_id not in latency_data.keys(): outfile.write("Individual {} went presymptomatic without incubation timer update at time {}. " "Please check. \n".format(ind_id, end_time_stamp)) else: duration = end_time_stamp - latency_data.get(ind_id) duration_data[ind_id] = duration if debug: with open("DEBUG_duration_data.json","w") as debug_outfile: json.dump(duration_data, debug_outfile, indent=4) durations = list(duration_data.values()) if not sft.test_exponential(durations, tb_cd4_activation_vector[0], outfile, integers=True, roundup=True, round_nearest=False): success = False outfile.write("Data points checked = {}.\n".format(len(duration_data))) outfile.write("SUMMARY: Success={0}\n".format(success)) # only used for graphing purposes expected_data = map(math.ceil, np.random.exponential(1/tb_cd4_activation_vector[0], len(duration_data))) expected_durations = list(expected_data) sft.plot_data_sorted(durations, expected_durations, label1="Actual", label2="Expected", title="Recalculated Latency Duration TB then HIV(Sorted)", xlabel="Data Points", ylabel="Days", category="tb_activation_and_cd4_tb_first", line = True, overlap=True)
def create_report_file(data): report_name = data[0] lines = data[1] coinfection_mortality_rate_off_art = data[2] coinfection_mortality_rate_on_art = data[3] died_of_coinfection = "died of CoInfection" state_active_symptomatic = "infectionstatechange TBActivation " time_to_death_data = [] active_infections_dictionary = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if died_of_coinfection in line: ind_id = int(sft.get_val("Individual ", line)) time_stamp = int(sft.get_val("time= ", line)) if ind_id in active_infections_dictionary.keys(): time_to_death_data.append(time_stamp - active_infections_dictionary[ind_id]) else: success = False outfile.write("BAD: Individual {} died of coinfection without going active, at time {}." "\n".format(ind_id, time_stamp)) elif state_active_symptomatic in line: ind_id = int(sft.get_val("Individual ", line)) start_time_stamp = int(sft.get_val("time= ", line)) if ind_id in active_infections_dictionary.keys(): outfile.write("Individual {} went active symptomatic while already being active symptomatic" "at time {}. \n".format(ind_id, start_time_stamp)) else: active_infections_dictionary[ind_id] = start_time_stamp # expected_data here only used for graphing purposes expected_data = map(int, np.random.exponential(1/coinfection_mortality_rate_on_art, len(time_to_death_data))) if not sft.test_exponential(time_to_death_data, coinfection_mortality_rate_on_art, outfile, integers=True, roundup=False, round_nearest=False): success = False outfile.write("Data points checked = {}.\n".format(len(time_to_death_data))) outfile.write("SUMMARY: Success={0}\n".format(success)) sft.plot_data(sorted(time_to_death_data), sorted(expected_data), label1="Actual", label2="Expected", title="Time from Smear Positive On ART TBHIV to Death", xlabel="Data Points", ylabel="Days", category="tbhiv_mortality_smear_positive_on_art",line = True, overlap=True)
def create_report_file(clearance_times, param_obj, report_name, stdout_days, inset_days, debug): with open(report_name, "w") as outfile: outfile.write("Cleared infections: {0} \n".format( len(clearance_times))) # success = sft.test_exponential( numpy.array( clearance_times ).astype(float), param_obj["TB_Drug_Cure_Rate_HIV"] ) success = sft.test_exponential(clearance_times, param_obj["TB_Drug_Cure_Rate_HIV"], outfile, integers=True, roundup=True, round_nearest=False) for day in sorted(inset_days.keys()): inset_count = inset_days[day] stdout_count = stdout_days[day] if inset_count != stdout_count: outfile.write( "BAD: day {0} std out has {1} clears, inset chart has {2}\n" .format(day, stdout_count, inset_count)) success = False outfile.write("SUMMARY: Success={0}\n".format(success))
def create_report_file(param_obj, output_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True slow_progressor_rate = param_obj[KEY_SLOW_PROGRESSOR_RATE] latent_cure_rate = param_obj[KEY_LATENT_CURE_RATE] presymptomatic_cure_rate = param_obj[KEY_PRESYMPTOMATIC_CURE_RATE] presymptomatic_rate = param_obj[KEY_PRESYMPTOMATIC_RATE] base_infectivity = param_obj[KEY_BASE_INFECTIVITY] simulation_duration = param_obj[KEY_DURATION] if not len(output_dict): success = False outfile.write(sft.sft_no_test_data) outfile.write("checking test conditions: \n") dist_exponential_np_slow = np.random.exponential( 1 / slow_progressor_rate, 100) if min(dist_exponential_np_slow) < simulation_duration: success = False outfile.write( "BAD: expected a small {0} to avoid moving individual to active disease state, got {1} from config.json. Please " "fix the test.\n".format(KEY_SLOW_PROGRESSOR_RATE, slow_progressor_rate)) dist_exponential_np_latent_cure = np.random.exponential( 1 / latent_cure_rate, 100) if min(dist_exponential_np_latent_cure) < simulation_duration: success = False outfile.write( "BAD: expected a small {0} to avoid Latent to Cleared state transition(all Latent state will progress to " "PreSymptomatic), got {1} from config.json. Please fix the test.\n" .format(KEY_LATENT_CURE_RATE, latent_cure_rate)) dist_exponential_np_presymptomatic = np.random.exponential( 1 / presymptomatic_rate, 100) if min(dist_exponential_np_presymptomatic) < simulation_duration: success = False outfile.write( "BAD: expected a small {0} to avoid PreSymptomatic to Symptomatic state transition(all PreSymptomatic " "state will progress to Cleared), got {1} from config.json. Please fix the test.\n" .format(KEY_PRESYMPTOMATIC_RATE, presymptomatic_rate)) if base_infectivity: success = False outfile.write( "BAD: expected {0} = 0 to look only at progression, got {1} from config.json. Please fix" "the test.\n".format(KEY_BASE_INFECTIVITY, base_infectivity)) outfile.write("conditional check result is {}.\n".format(success)) actual_timer = [] internal_timer = [] outfile.write( "collecting the actual timestep between PreSymptomatic and Cleared:\n" ) outfile.write( "checking if the internal timer matches the PreSymptomatic to Cleared duration:\n" ) for id in output_dict: cleared_time = presymptomatic_time = timer = None if KEY_CLEARED in output_dict[id]: cleared_time = output_dict[id][KEY_CLEARED][0] timer = output_dict[id][KEY_CLEARED][1] internal_timer.append(timer) if KEY_PRESYMPTOMATIC in output_dict[id]: presymptomatic_time = output_dict[id][KEY_PRESYMPTOMATIC][0] if presymptomatic_time: if cleared_time: # some individual may not move to cleared state at the end of the simulation actual_timer.append(cleared_time - presymptomatic_time) if cleared_time - presymptomatic_time != math.ceil(timer): success = False outfile.write( "BAD: individual {0} has internal timer = {1} but the actual timer is {2} (enter " "PreSymptomatic state at timestep {3}, moved to Cleared state at " "timestep {4}).\n".format( id, timer, cleared_time - presymptomatic_time, presymptomatic_time, cleared_time)) else: outfile.write( "Individual {0} moved to PreSymptomatic state at timestep {1} and is not cleared yet at the " "end of simulation (duration = {2})." "\n".format(id, presymptomatic_time, simulation_duration)) else: success = False outfile.write( "BAD: individual {0} moved to cleared state at timerstep {1} before entering " "PreSymptomatic state.\n".format(id, cleared_time)) if not len(actual_timer): success = False outfile.write( "BAD: There is no PreSymptomatic to cleared transition in this test, please fix the test.\n" ) outfile.write( "Running ks test for PreSymptomatic to cleared internal timer and numpy exponential distribution: \n" ) size = len(internal_timer) scale = 1.0 / presymptomatic_cure_rate dist_exponential_np = np.random.exponential(scale, size) sft.plot_data_sorted( internal_timer, dist2=np.array(dist_exponential_np), label1="PreSymptomatic to cleared duration", label2="numpy exponential", title="exponential rate = {}".format(presymptomatic_cure_rate), xlabel="data point", ylabel="PreSymptomatic to cleared duration", category='PreSymptomatic_to_cleared_duration', show=True, line=True, overlap=True) result = sft.test_exponential(internal_timer, p1=presymptomatic_cure_rate, report_file=outfile, integers=False, roundup=False, round_nearest=False) outfile.write( "ks test result is {0}, exponential rate = {1}, # of data point = {2}.\n" .format(result, presymptomatic_cure_rate, size)) if not result: success = False outfile.write( "BAD: test exponential for PreSymptomatic to cleared duration failed with {0} " "= {1}.\n".format(KEY_PRESYMPTOMATIC_CURE_RATE, presymptomatic_cure_rate)) else: outfile.write( "GOOD: test exponential for PreSymptomatic to cleared duration passed with {0} " "= {1}.\n".format(KEY_PRESYMPTOMATIC_CURE_RATE, presymptomatic_cure_rate)) outfile.write(sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(drug_start_timestep, disease_deaths, cum_deaths, deaths, infected_individuals, death_times, drug_mortality_rate_HIV, report_name ): with open(report_name, "w") as outfile: success = True length = len(cum_deaths) if sum(disease_deaths)==0 or sum(cum_deaths)==0 or len(death_times)==0: success = False outfile.write(sft.no_test_data) for x in range(length): if disease_deaths[x] != cum_deaths[x]: success = False outfile.write("BAD: at timestep {0}, disease deaths is {1} in InsetChart.json and {2} in stdout.txt.\n".format(x+1, disease_deaths[x], cum_deaths[x])) # ks exponential test doesn't work very well with large rate, use chi squared test instead # while rate is small ks test for exponential distribution is more sensitive to catch the difference if drug_mortality_rate_HIV < 0.1: outfile.write("Testing death times as draws from exponential distrib with rate {0}. " "Dataset size = {1}.\n".format(drug_mortality_rate_HIV, len(death_times))) ks_result = sft.test_exponential( death_times, drug_mortality_rate_HIV, report_file = outfile, integers=True, roundup=True, round_nearest=False ) if not ks_result: success = False outfile.write("BAD: ks test reuslt is False.\n") size = len(death_times) scale = 1.0 / drug_mortality_rate_HIV dist_exponential_np = numpy.random.exponential(scale, size) dist_exponential_np = [math.ceil(x) for x in dist_exponential_np] sft.plot_data_sorted(death_times, dist_exponential_np, label1="death times", label2="numpy data", title="death_times_actual_vs_numpy", xlabel="data points", ylabel="death times", category="death_times", show=True, line = True, overlap=True) sft.plot_cdf(death_times, dist_exponential_np, label1="death times", label2="numpy data", title="death_times_cdf", xlabel="days", ylabel="probability", category="death_times_cdf", show=True) else: outfile.write("Testing death count per day with rate {0}. \n".format(drug_mortality_rate_HIV)) expected_mortality = [] for t in range( len(deaths)): if t < drug_start_timestep + 1: if deaths[t] > 0: success = False outfile.write("BAD: expected no disease death on drugs before day {0}, get {1} cases at timestep {2}.\n" "".format(drug_start_timestep + 1, deaths[t], t)) elif infected_individuals[t] > 0: expected_mortality.append(drug_mortality_rate_HIV * infected_individuals[t]) expected_mortality.pop(0) # the Infected is off by one day test_death_dates = deaths[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_mortality)] sft.plot_data(test_death_dates, expected_mortality, label1="actual death", label2="expected death", title="death per day", xlabel="date after drug start day", ylabel="death per day", category="death_counts", show=True, line=True, overlap=True, sort=False) chi_result = sft.test_multinomial(dist=test_death_dates, proportions=expected_mortality, report_file=outfile, prob_flag=False) if not chi_result: success = False outfile.write("BAD: Chi-squared test reuslt is False.\n") outfile.write(sft.format_success_msg(success)) return success
def create_report_file(param_obj, output_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True slow_progressor_rate = param_obj[KEY_SLOW_PROGRESSOR_RATE] fast_progressor_rate = param_obj[KEY_FAST_PROGRESSOR_RATE] child_fast_fraction = param_obj[KEY_CHILD_FRACTION] adult_fast_fraction = param_obj[KEY_ADULT_FRACTION] progression_multiplier = np.mean(param_obj[KEY_CD4_PROGRESSION_MULTIPLIER]) simulation_duration = param_obj[KEY_DURATION] if not len(output_dict): success = False outfile.write(sft.sft_no_test_data) outfile.write("checking test conditions: \n") if child_fast_fraction: success = False outfile.write("BAD: expected {0} = 0, got {1} from config.json. " "Please fix the test.\n".format(KEY_CHILD_FRACTION, child_fast_fraction)) dist_exponential_np_slow = np.random.exponential(1/slow_progressor_rate, 100) if min(dist_exponential_np_slow) < simulation_duration: success = False outfile.write("BAD: expected a small {0} to distinguish fast and slow progress TB, got {1} from config.json. Please " "fix the test.\n".format(KEY_SLOW_PROGRESSOR_RATE, slow_progressor_rate)) outfile.write("conditional check result is {}.\n".format(success)) actual_timer = [] internal_timer = [] slow_count = 0 outfile.write("collecting the actual timestep between latent and presymptomatic:\n") outfile.write("checking if the internal timer matches the PreSymptomatic to Cleared duration:\n") for id in output_dict: presymptomatic_time = timer = latent_time = None if KEY_PRESYMPTOMATIC in output_dict[id]: presymptomatic_time = output_dict[id][KEY_PRESYMPTOMATIC][0] timer = output_dict[id][KEY_PRESYMPTOMATIC][1] internal_timer.append(timer) if KEY_LATENT in output_dict[id]: latent_time = output_dict[id][KEY_LATENT] if latent_time: if presymptomatic_time: # some individual may not move to presymptomatic state at the end of the simulation actual_timer.append(presymptomatic_time - latent_time) if presymptomatic_time - latent_time != math.ceil(timer): success = False outfile.write("BAD: individual {0} has internal timer = {1} but the actual timer is {2} (enter " "latent state at timestep {3}, enter presymptomatic active state at " "timestep {4}).\n".format(id, timer, presymptomatic_time - latent_time, latent_time, presymptomatic_time)) else: slow_count += 1 if debug: outfile.write("Individual {0} moved to latent state at timestep {1} and is not move to " "presymptomatic active yet at the end of simulation (duration = {2})." "\n".format(id, latent_time, simulation_duration)) else: success = False outfile.write("BAD: individual {0} moved to presymptomatic active state at timerstep {1} before entering " "latent state.\n".format(id, presymptomatic_time)) if not len(actual_timer): success = False outfile.write("BAD: There is no latent to presymptomatic state transition in this test, please fix the test.\n") outfile.write("Running ks test for latent to presymptomatic internal timer and numpy exponential distribution: \n") size = len(internal_timer) scale = 1.0 / fast_progressor_rate dist_exponential_np = np.random.exponential(scale, size) sft.plot_data_sorted(internal_timer, dist2=np.array(dist_exponential_np), label1="internal timer", label2="numpy exponential", title="exponential rate = {}".format(fast_progressor_rate), xlabel="data point", ylabel="latent to presymptomatic internal timer", category='latent_to_presymptomatic_internal_timer', show=True, line=False, overlap=True) result = sft.test_exponential(internal_timer, p1=fast_progressor_rate, report_file=outfile, integers=True, roundup=True, round_nearest=False) outfile.write("ks test result is {0}, exponential rate = {1}, # of data point = {2}.\n".format(result, fast_progressor_rate, size)) if not result: success = False outfile.write("BAD: test exponential for latent to presymptomatic duration failed with fast_progressor_rate " "= {}.\n".format(fast_progressor_rate)) else: outfile.write( "GOOD: test exponential for latent to presymptomatic duration passed with fast_progressor_rate " "= {}.\n".format(fast_progressor_rate)) outfile.write("running binomial test with 95% confidence for Fast_Progressor_Fraction_Adult:\n") result2 = sft.test_binomial_95ci(num_success=len(internal_timer), num_trials=len(internal_timer) + slow_count, prob=adult_fast_fraction * progression_multiplier, report_file=outfile, category="Fast_Progressor_Fraction_Adult") outfile.write("number of slow progressor is {0} and number of fast progressor is {1}.\n".format(slow_count, len(internal_timer))) if not result2: success = False outfile.write("BAD: binomial test for Fast_Progressor_Fraction_Adult = {0} and TB_CD4_Primary_Progression= {1} failed" ".\n".format(adult_fast_fraction, progression_multiplier)) else: outfile.write("GOOD: binomial test for Fast_Progressor_Fraction_Adult = {0} and TB_CD4_Primary_Progression= {1} passed" ".\n".format(adult_fast_fraction, progression_multiplier)) outfile.write(sft.format_success_msg(success)) if debug: print( "SUMMARY: Success={0}\n".format(success) ) return success
def create_report_file(param_obj, output_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True slow_progressor_rate = param_obj[KEY_SLOW_PROGRESSOR_RATE] simulation_duration = param_obj[KEY_DURATION] if not len(output_dict): success = False outfile.write(sft.sft_no_test_data) actual_timer = [] internal_timer = [] outfile.write("collecting the actual timestep between latent and presymptomatic:\n") outfile.write("checking if the internal timer matches the PreSymptomatic to Cleared duration:\n") for id in output_dict: presymptomatic_time = timer = latent_time = None if KEY_PRESYMPTOMATIC in output_dict[id]: presymptomatic_time = output_dict[id][KEY_PRESYMPTOMATIC][0] timer = output_dict[id][KEY_PRESYMPTOMATIC][1] internal_timer.append(timer) if KEY_LATENT in output_dict[id]: latent_time = output_dict[id][KEY_LATENT] if latent_time: if presymptomatic_time: # some individual may not move to presymptomatic state at the end of the simulation actual_timer.append(presymptomatic_time - latent_time) if presymptomatic_time - latent_time != math.ceil(timer): success = False outfile.write("BAD: individual {0} has internal timer = {1} but the actual timer is {2} (enter " "latent state at timestep {3}, enter presymptomatic active state at " "timestep {4}).\n".format(id, timer, presymptomatic_time - latent_time, latent_time, presymptomatic_time)) else: outfile.write("Individual {0} moved to latent state at timestep {1} and is not move to " "presymptomatic active yet at the end of simulation (duration = {2})." "\n".format(id, latent_time, simulation_duration)) else: success = False outfile.write("BAD: individual {0} moved to presymptomatic active state at timerstep {1} before entering " "latent state.\n".format(id, presymptomatic_time)) if not len(actual_timer): success = False outfile.write("BAD: There is no latent to presymptomatic state transition in this test, please fix the test.\n") outfile.write("Running ks test for latent to presymptomatic internal timer and numpy exponential distribution: \n") size = len(internal_timer) scale = 1.0 / slow_progressor_rate dist_exponential_np = np.random.exponential(scale, size) sft.plot_data_sorted(internal_timer, dist2=np.array(dist_exponential_np), label1="latent to presymptomatic internal timer", label2="numpy exponential", title="exponential rate = {}".format(slow_progressor_rate), xlabel="data point", ylabel="latent to presymptomatic internal timer", category='latent_to_presymptomatic_internal_timer', show=True, line=False, overlap=True) result = sft.test_exponential(internal_timer, p1=slow_progressor_rate, report_file=outfile, integers=True, roundup=True, round_nearest=False) outfile.write("ks test result is {0}, exponential rate = {1}, # of data point = {2}.\n".format(result, slow_progressor_rate, size)) if not result: success = False outfile.write("BAD: test exponential for latent to presymptomatic internal timer failed with slow_progressor_rate " "= {}.\n".format(slow_progressor_rate)) else: outfile.write("GOOD: test exponential for latent to presymptomatic internal timer passed with slow_progressor_rate " "= {}.\n".format(slow_progressor_rate)) outfile.write(sft.format_success_msg(success)) if debug: print( "SUMMARY: Success={0}\n".format(success) ) return success
def create_report_file(drug_start_timestep, inactivation_times, active_count, inactivations, drug_inactivation_rate, report_name, debug=False): with open(report_name, "w") as outfile: success = True # ks exponential test doesn't work very well with large rate, use chi squared test instead. # while rate is small ks test for exponential distribution is more sensitive to catch the difference if drug_inactivation_rate < 0.1: outfile.write( "Testing inactivation times as draws from exponential distrib with rate {0}. " "Dataset size = {1}.\n".format(drug_inactivation_rate, len(inactivation_times))) success = sft.test_exponential(inactivation_times, drug_inactivation_rate, outfile, integers=True, roundup=True, round_nearest=False) if not success: outfile.write("BAD: ks test for rate {} is False.\n".format( drug_inactivation_rate)) size = len(inactivation_times) scale = 1.0 / drug_inactivation_rate dist_exponential_np = numpy.random.exponential(scale, size) dist_exponential_np = [math.ceil(x) for x in dist_exponential_np] sft.plot_data_sorted(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_actual_vs_numpy", xlabel="data points", ylabel="Inactivation times", category="inactivation_times", show=True, line=True, overlap=True) sft.plot_cdf(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_cdf", xlabel="days", ylabel="probability", category="inactivation_times_cdf", show=True) sft.plot_probability(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_pdf", xlabel="days", ylabel="probability", category="inactivation_times_pdf", show=True) else: outfile.write( "Testing inactivation count per day with rate {0}. \n".format( drug_inactivation_rate)) expected_inactivation = [] for t in range(len(inactivations)): if t < drug_start_timestep: if inactivations[t] > 0: success = False outfile.write( "BAD: expected no inactivations on drugs before day {0}, get {1} cases at timestep {2}.\n" "".format(drug_start_timestep, inactivations[t], t)) elif active_count[t] > 0: expected_inactivation.append(drug_inactivation_rate * active_count[t]) if len(inactivations ) <= len(expected_inactivation) + drug_start_timestep: test_inactivation_dates = inactivations[drug_start_timestep + 1:] expected_inactivation = expected_inactivation[:len( test_inactivation_dates)] else: test_inactivation_dates = inactivations[ drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_inactivation)] #print (len(inactivations), len(test_inactivation_dates), len(expected_inactivation)) #print (test_inactivation_dates, expected_inactivation) sft.plot_data(test_inactivation_dates, expected_inactivation, label1="actual inactivation", label2="expected inactivation", title="inactivation per day", xlabel="date after drug start day", ylabel="inactivation per day", category="inactivation_counts", show=True, line=True, overlap=True, sort=False) chi_result = sft.test_multinomial( dist=test_inactivation_dates, proportions=expected_inactivation, report_file=outfile, prob_flag=False) if not chi_result: success = False outfile.write("BAD: Chi-squared test reuslt is False.\n") outfile.write(sft.format_success_msg(success)) if debug: print(sft.format_success_msg(success)) return success
def create_report_file(param_obj, output_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True cure_rate = param_obj[KEY_CURE_RATE] simulation_duration = param_obj[KEY_DURATION] if not len(output_dict): success = False outfile.write(sft.sft_no_test_data) cure_timer = [] actual_timer = [] outfile.write( "Checking the timer and actual timestep between active and cure:\n" ) outfile.write( "checking if the internal timer matches the PreSymptomatic to Cleared duration:\n" ) for id in output_dict: cure_time = timer = active_time = None if KEY_CLEARED in output_dict[id]: cure_time = output_dict[id][KEY_CLEARED][0] timer = output_dict[id][KEY_CLEARED][1] cure_timer.append(timer) if KEY_SYMPTOMATIC in output_dict[id]: active_time = output_dict[id][KEY_SYMPTOMATIC][0] if active_time: if cure_time: # some individual may not be cleared at the end of the simulation actual_timer.append(cure_time - active_time) if cure_time - active_time != math.ceil(timer): success = False outfile.write( "BAD: individual {0} has cure timer = {1} but the actual cure time is {2} (enter " "symptomatic active state at timestep {3}, cleared at timestep {4}).\n" .format(id, timer, cure_time - active_time, active_time, cure_time)) else: outfile.write( "Individual {0} moved to symptomatic active at timestep {1} and is not cleared yet at " "the end of simulation (duration = {2}).\n".format( id, active_time, simulation_duration)) else: success = False outfile.write( "BAD: individual {0} is cleared before entering active symptomatic state.\n" .format(id)) if not len(actual_timer): success = False outfile.write( "BAD: There is no recovered individual in this test, please fix the test.\n" ) outfile.write( "Result is {0}. # of recovered individual = {1}\n".format( success, len(actual_timer))) outfile.write( "Running ks test for timer and numpy exponential distribution: \n") size = len(cure_timer) scale = 1.0 / cure_rate dist_exponential_np = np.random.exponential(scale, size) sft.plot_data_sorted(cure_timer, dist2=np.array(dist_exponential_np), label1="cure timer", label2="numpy exponential", title="exponential rate = {}".format(cure_rate), xlabel="data point", ylabel="cure timer", category='Cure_timer', show=True, line=False, overlap=True) result = sft.test_exponential(cure_timer, p1=cure_rate, report_file=outfile, integers=False, roundup=False, round_nearest=False) outfile.write( "ks test result is {0}, exponential rate = {1}, # of data point = {2}.\n" .format(result, cure_rate, size)) if not result: success = False outfile.write( "BAD: test exponential for cure timer failed with cure rate = {}.\n" .format(cure_rate)) outfile.write(sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(data): report_name = data[0] lines = data[1] tb_cd4_activation_vector = data[ 2] # this test assumes the vector is constant # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than # what StartedArt distribution would extremely likely give us big_magic_number = 2000 stopped_art_latency_data = [] started_art_latency_data = [] art_events_dict = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if "has event" in line: ind_id = int(sft.get_val("Individual ", line)) art_status = line.split(" ")[9].strip( ".") # get_val only gets digits art_events_dict[ind_id] = art_status if "LifeCourseLatencyTimerUpdate" in line: ind_id = int(sft.get_val("Individual ", line)) new_incubation_timer = float(sft.get_val("timer ", line)) if ind_id in art_events_dict.keys(): if art_events_dict.get(ind_id) == "StartedART": started_art_latency_data.append(new_incubation_timer) else: stopped_art_latency_data.append(new_incubation_timer) art_events_dict.pop(ind_id) else: success = False outfile.write( "BAD: No art-related event found in the logs for this timer update for Individual {}," " at time {}.\n".format( ind_id, int(sft.get_val("time= ", line)))) # we want the stopped art latency data to NOT match the started art latency data # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector if sft.test_exponential(stopped_art_latency_data, tb_cd4_activation_vector[2], integers=False, roundup=False, round_nearest=False): outfile.write( "BAD: The StoppedArt latency data distribution matches the StartedArt latency data" " distribution, but shouldn't.\n") success = False expected_stopped_art_data = np.random.exponential( 1 / tb_cd4_activation_vector[0], len(stopped_art_latency_data)) small_duration_count = 0 for duration in stopped_art_latency_data: if duration < big_magic_number: small_duration_count += 1 proportion_small = small_duration_count / float( len(stopped_art_latency_data)) if proportion_small > 0.01: outfile.write( "BAD: More than 0.5% of our durations are suspiciously small, it is {}. " "Please Investigate.\n".format(proportion_small)) success = False outfile.write("Data points checked = {}.\n".format( len(stopped_art_latency_data))) outfile.write("SUMMARY: Success={0}\n".format(success)) sft.plot_data( sorted(stopped_art_latency_data), sorted(expected_stopped_art_data), label1="Actual", label2="Expected", title= "StoppedART Latency data should have a similar shape/scale of duration but will not " "match", xlabel="Data Points", ylabel="Days", category="tb_activation_and_cd4_hiv_first_on_art_off_art", line=True, overlap=True)
def create_report_file(param_obj, output_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True fast_progressor_rate = param_obj[KEY_FAST_PROGRESSOR_RATE] latent_cure_rate = param_obj[KEY_LATENT_CURE_RATE] child_fast_fraction = param_obj[KEY_CHILD_FRACTION] adult_fast_fraction = param_obj[KEY_ADULT_FRACTION] simulation_duration = param_obj[KEY_DURATION] if not len(output_dict): success = False outfile.write(sft.sft_no_test_data) outfile.write("checking test conditions: \n") if not child_fast_fraction or not adult_fast_fraction: success = False outfile.write("BAD: expected {0} and {1} = 1, got {2} and {3} from config.json. " "Please fix the test.\n".format(KEY_CHILD_FRACTION, KEY_ADULT_FRACTION, child_fast_fraction, adult_fast_fraction)) dist_exponential_np_fast = np.random.exponential(1 / fast_progressor_rate, 100) if min(dist_exponential_np_fast) < simulation_duration: success = False outfile.write("BAD: expected a small {0} to avoid moving individual to active disease state, got {1} from config.json. Please " "fix the test.\n".format(KEY_FAST_PROGRESSOR_RATE, fast_progressor_rate)) outfile.write("conditional check result is {}.\n".format(success)) actual_timer = [] outfile.write("collecting the actual timestep between latent and cleared:\n") for id in output_dict: cleared_time = presymptomatic_time = latent_time = None if KEY_CLEARED in output_dict[id]: cleared_time = output_dict[id][KEY_CLEARED] if KEY_LATENT in output_dict[id]: latent_time = output_dict[id][KEY_LATENT] if KEY_PRESYMPTOMATIC in output_dict[id]: presymptomatic_time = output_dict[id][KEY_PRESYMPTOMATIC] if latent_time: if cleared_time: # some individual may not move to cleared state at the end of the simulation actual_timer.append(cleared_time - latent_time) else: outfile.write("Individual {0} moved to latent state at timestep {1} and is not cleared yet at the " "end of simulation (duration = {2})." "\n".format(id, latent_time, simulation_duration)) else: success = False outfile.write("BAD: individual {0} moved to cleared state at timerstep {1} before entering " "latent state.\n".format(id, cleared_time)) if presymptomatic_time: success = False outfile.write("BAD: individual {0} moved to presymptomatic at timestep {1}, expected no active disease" " in this simulation, please double check the config.\n".format(id, presymptomatic_time)) if not len(actual_timer): success = False outfile.write("BAD: There is no latent to cleared transition in this test, please fix the test.\n") outfile.write("Running ks test for latent to cleared duration and numpy exponential distribution: \n") size = len(actual_timer) scale = 1.0 / latent_cure_rate dist_exponential_np = np.random.exponential(scale, size) sft.plot_data_sorted(actual_timer, dist2=np.array(dist_exponential_np), label1="latent to cleared duration", label2="numpy exponential", title="exponential rate = {}".format(latent_cure_rate), xlabel="data point", ylabel="latent to cleared duration", category='latent_to_cleared_duration', show=True, line=False, overlap=True) result = sft.test_exponential(actual_timer, p1=latent_cure_rate, report_file=outfile, integers=True, roundup=True, round_nearest=False) outfile.write("ks test result is {0}, exponential rate = {1}, # of data point = {2}.\n".format(result, latent_cure_rate, size)) if not result: success = False outfile.write("BAD: test exponential for latent to cleared duration failed with {0} " "= {1}.\n".format(KEY_LATENT_CURE_RATE, latent_cure_rate)) else: outfile.write("GOOD: test exponential for latent to cleared duration passed with {0} " "= {1}.\n".format(KEY_LATENT_CURE_RATE, latent_cure_rate)) outfile.write(sft.format_success_msg(success)) if debug: print( "SUMMARY: Success={0}\n".format(success) ) return success
def create_report_file(data): report_name = data[0] lines = data[1] tb_cd4_activation_vector = data[ 2] # this test assumes the vector is constant # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than # what StartedArt distribution would extremely likely give us big_magic_number = 2000 stopped_art_latency_data = [] started_art_latency_data = [] tb_on_art_latency_data = [] art_events_dict = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if "has event" in line: ind_id = int(sft.get_val("Individual ", line)) art_status = line.split(" ")[9].strip( ".") # get_val only gets digits art_events_dict[ind_id] = art_status if "Incubation_timer calculated as" in line: ind_id = int(sft.get_val("Individual ", line)) infection_timer = float(sft.get_val("calculated as ", line)) reconstitute = int(sft.get_val("reconstitute=", line)) if reconstitute: # ignore people who are not reconstituting. tb_on_art_latency_data.append(infection_timer) if "LifeCourseLatencyTimerUpdate" in line: ind_id = int(sft.get_val("Individual ", line)) new_incubation_timer = float(sft.get_val("timer ", line)) if ind_id in art_events_dict.keys(): if art_events_dict.get(ind_id) == "StartedART": # we ignore this for this test, people are already on art when they get TB started_art_latency_data.append(new_incubation_timer) else: stopped_art_latency_data.append(new_incubation_timer) art_events_dict.pop(ind_id) else: success = False outfile.write( "BAD: No art-related event found in the logs for this timer update for Individual {}," " at time {}.\n".format( ind_id, int(sft.get_val("time= ", line)))) # we want the stopped art latency data to NOT match the started art latency data # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector if sft.test_exponential(stopped_art_latency_data, tb_cd4_activation_vector[2], integers=True, roundup=True, round_nearest=False): outfile.write( "BAD: The StoppedArt latency data distribution matches the initial latency data" " distribution, but shouldn't.\n") success = False small_duration_count = 0 for duration in stopped_art_latency_data: if duration < big_magic_number: small_duration_count += 1 proportion_small = small_duration_count / float( len(stopped_art_latency_data)) if proportion_small > 0.006: outfile.write( "BAD: More than 0.006 of our durations are suspiciously small, it is {}. " "Please Investigate.\n".format(proportion_small)) success = False if not sft.test_exponential(tb_on_art_latency_data, tb_cd4_activation_vector[2], outfile, integers=False, roundup=False, round_nearest=False): # this is testing the internal timer which is float type # so 'integers=False' success = False outfile.write( "BAD: Initial TB infection (with HIV and ART) latency doesn't match expected distribution." ) outfile.write("Data points checked = {}." "\n".format(len(tb_on_art_latency_data), 0)) outfile.write("SUMMARY: Success={0}\n".format(success)) # for graphing purposes only expected_tb_on_art_latency_data = np.random.exponential( 1 / tb_cd4_activation_vector[2], len(tb_on_art_latency_data)) sft.plot_data(sorted(tb_on_art_latency_data), sorted(expected_tb_on_art_latency_data), label1="Actual", label2="Expected", title="HIV+ART then TB latency data", xlabel="Data Points", ylabel="Days", category="tb_activation_and_cd4_hiv_art_tb_offart", line=True, overlap=True)