def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, [matches[0]]): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 core = None output_dict = {} for line in filtered_lines: time_step = dtk_sft.get_val(matches[0], line) core = dtk_sft.get_val(matches[1], line) if time_step not in output_dict: output_dict[time_step] = [core] else: output_dict[time_step].append(core) if debug: res_path = r'./DEBUG_core_per_time_step.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def create_report_file(data): report_name = data[0] lines = data[1] cd4_strata = data[2] mod_array = data[3] actual_data = [] expected_data = [] success = True epsilon = 0.000002 with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: cd4_count = float(dtk_sft.get_val("CD4count=", line)) cd4_mod_actual = float(dtk_sft.get_val("CD4mod=", line)) cd4_mod_expected = tb_cd4_susceptibility_calc([mod_array, cd4_strata, cd4_count]) actual_data.append(cd4_mod_actual) expected_data.append(cd4_mod_expected) if abs(cd4_mod_actual-cd4_mod_expected) > epsilon: success = False outfile.write("BAD: At Time: {} for Individual {} with CD4 count {} Expected susceptibility modifier " "was {}, but actual was {}.\n".format(dtk_sft.get_val("time= ", line), dtk_sft.get_val("Individual ", line), cd4_count, cd4_mod_expected, cd4_mod_actual)) outfile.write("Data points checked = {} .\n".format(len(lines))) outfile.write("SUMMARY: Success={0}\n".format(success)) dtk_sft.plot_data_sorted(actual_data, expected_data, label1="Actual", label2="Expected", title="Susceptibility Modifier", xlabel="Data Points", ylabel="Modifying Multiplier", category="tb_susceptibility_and_cd4", line = True, overlap=True)
def create_report_file(data): report_name = data[0] lines = data[1] tb_cd4_activation_vector = data[ 2] # this test assumes the vector is constant latency_update_data = [] original_latency_data = [] success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if "Incubation_timer calculated as" in line: incubation_timer = float(dtk_sft.get_val("as ", line)) original_latency_data.append(incubation_timer) if "LifeCourseLatencyTimerUpdate" in line: new_incubation_timer = float(dtk_sft.get_val("timer ", line)) latency_update_data.append(new_incubation_timer) # expecting the original distribution to NOT match the art-triggered update distribution if dtk_sft.test_exponential(original_latency_data, tb_cd4_activation_vector[2], integers=True, roundup=True, round_nearest=False): outfile.write( "BAD: The updated latency data matches the original distribution.\n" ) success = False expected_update_data = np.random.exponential( 1 / tb_cd4_activation_vector[2], len(latency_update_data)) if not dtk_sft.test_exponential(latency_update_data, tb_cd4_activation_vector[2], outfile, integers=True, roundup=True, round_nearest=False): # as it should fail , success = bad. outfile.write( "BAD: The updated latency data does not match the expected distribution.\n" ) success = False outfile.write("Data points checked = {}.\n".format( len(latency_update_data))) outfile.write("SUMMARY: Success={0}\n".format(success)) dtk_sft.plot_data(sorted(latency_update_data), sorted(expected_update_data), label1="Actual", label2="Expected", title="Latency Duration recalculated for ART", xlabel="Data Points", ylabel="Days", category="tb_activation_and_cd4_hiv_first_on_art", line=True, overlap=True)
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store stat populations, infected population, and MDR test result for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 infected = 0 statpop = 0 simulation_timestep = 1 positive = 0 negative = 0 default = 0 output_dict = {} for line in filtered_lines: if matches[0] in line: output_dict[time_step] = { KEY_STAT_POP: statpop, KEY_INFECTED: infected, KEY_POSITIVE: positive, KEY_NEGATIVE: negative, KEY_DEFAULT: default } infected = dtk_sft.get_val(matches[2], line) statpop = dtk_sft.get_val(matches[1], line) time_step += simulation_timestep positive = 0 negative = 0 default = 0 if matches[3] in line: result = int(dtk_sft.get_val(matches[3], line)) if result: positive += 1 else: negative += 1 if matches[4] in line: default += 1 res_path = r'./tb_test_result_from_logging.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_output_file(output_filename="test.txt", simulation_timestep=1, debug=False): """ creates a dataframe of time step, infected, infectiouness and stat populations :param output_filename: file to parse (test.txt) :param simulation_timestep: simulation time step, * days :return: output_df: data frame contains: 1, time step, 2, # of infected population, 3, infectiousness, 4, statistical populations, at each time step """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 # infectiousness = 0 infected = 0 statpop = 0 output_df = pd.DataFrame(columns=[ KEY_SIMULATION_TIMESTEP, KEY_INFECTED, KEY_INFECTIOUSNESS, KEY_STAT_POP ]) output_df.index.name = "index" index = 0 for line in filtered_lines: if matches[0] in line: infected = dtk_sft.get_val(matches[1], line) statpop = dtk_sft.get_val(matches[3], line) output_df.loc[index] = [ time_step, infected, infectiousness, statpop ] index += 1 time_step += simulation_timestep infectiousness = 0 continue if matches[2] in line: infectiousness = dtk_sft.get_val(matches[2], line) continue res_path = r'./infected_vs_infectiousness.csv' if not os.path.exists(os.path.dirname(res_path)): os.makedirs(os.path.dirname(res_path)) output_df.to_csv(res_path) return output_df
def parse_stdout_file(initial_timestep=0, stdout_filename="test.txt", debug=False): """ :param initial_timestep: first timestep from config :param stdout_filename: file to parse (test.txt) :param debug: whether or not we write an additional file that's full of the matched lines :return: array of lines of interest """ exogenous = "EXOGENOUS infected" # "EXOGENOUS infected" infectiousness = "UpdateInfectiousness" state_latent = "state= Latent" filtered_lines = [] exogenous_infected_count = 0 latent_count = 0 initial_population = 5000 # placeholder until we get to the time: 1 and get it from there exogenous_infected_dict = {} exogenous_infected_list = [] update_time = "Update(): Time: " time = initial_timestep with open(stdout_filename) as logfile: for line in logfile: if update_time in line: exogenous_infected_list.append(exogenous_infected_count) exogenous_infected_dict[time] = [ exogenous_infected_count, latent_count ] time += 1 exogenous_infected_count = 0 # resetting for the next time step latent_count = 0 filtered_lines.append(line) elif exogenous in line: ind_id = int(float(dtk_sft.get_val("Individual ", line))) if ind_id <= initial_population: # ignoring imported people exogenous_infected_count += 1 filtered_lines.append(line) elif infectiousness in line and state_latent in line: ind_id = int(float(dtk_sft.get_val("Individual ", line))) fast_progressor = int( float(dtk_sft.get_val("progressor=", line))) if not fast_progressor and ind_id <= initial_population: # ignoring imported people latent_count += 1 filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: for line in filtered_lines: outfile.write(line) return [exogenous_infected_dict, exogenous_infected_list]
def parse_stdout_file(curr_timestep=0, stdout_filename="test.txt", debug=False): """ :param curr_timestep: first timestep from config :param stdout_filename: file to parse (test.txt) :param debug: whether or not we write an additional file that's full of the matched lines :return: array of lines of interest """ expose = "Expose: Individual" filtered_lines = [] update_time = "Update(): Time:" time = 0 with open(stdout_filename) as logfile: for line in logfile: if update_time in line: time += 1 if expose in line: ind = int(dtk_sft.get_val("Individual ", line)) if ind <= 100: # do not look at imported people new_line = dtk_sft.add_time_stamp(time, line) filtered_lines.append(new_line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) return filtered_lines
def parse_output_file(output_filename="test.txt", simulation_timestep = 1, debug=False): """ creates a dataframe of time step, infected, infectiouness and stat populations :param output_filename: file to parse (test.txt) :param simulation_timestep: simulation time step, * days :return: output_df: data frame contains: 1, time step, 2, # of infected population, 3, infectiousness, 4, statistical populations, at each time step """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 # infectiousness = 0 infected = 0 statpop = 0 output_df = pd.DataFrame( columns = [KEY_SIMULATION_TIMESTEP, KEY_INFECTED, KEY_INFECTIOUSNESS, KEY_STAT_POP]) output_df.index.name = "index" index = 0 for line in filtered_lines: if matches[0] in line: infected = dtk_sft.get_val(matches[1], line) statpop = dtk_sft.get_val(matches[3], line) output_df.loc[index] = [time_step, infected, infectiousness, statpop] index += 1 time_step += simulation_timestep infectiousness = 0 continue if matches[2] in line: infectiousness = dtk_sft.get_val(matches[2], line) continue res_path = r'./infected_vs_infectiousness.csv' if not os.path.exists(os.path.dirname(res_path)): os.makedirs(os.path.dirname(res_path)) output_df.to_csv(res_path) return output_df
def create_report_file(data): report_name = data[0] lines = data[1] coinfection_mortality_rate_off_art = data[2] coinfection_mortality_rate_on_art = data[3] died_of_coinfection = "died of CoInfection" state_active_symptomatic = "infectionstatechange TBActivation " time_to_death_data = [] active_infections_dictionary = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if died_of_coinfection in line: ind_id = int(dtk_sft.get_val("Individual ", line)) time_stamp = int(dtk_sft.get_val("time= ", line)) if ind_id in active_infections_dictionary.keys(): time_to_death_data.append(time_stamp - active_infections_dictionary[ind_id]) else: success = False outfile.write("BAD: Individual {} died of coinfection without going active, at time {}." "\n".format(ind_id, time_stamp)) elif state_active_symptomatic in line: ind_id = int(dtk_sft.get_val("Individual ", line)) start_time_stamp = int(dtk_sft.get_val("time= ", line)) if ind_id in active_infections_dictionary.keys(): outfile.write("Individual {} went active symptomatic while already being active symptomatic" "at time {}. \n".format(ind_id, start_time_stamp)) else: active_infections_dictionary[ind_id] = start_time_stamp # expected_data here only used for graphing purposes expected_data = map(int, np.random.exponential(1/coinfection_mortality_rate_off_art, len(time_to_death_data))) if not dtk_sft.test_exponential(time_to_death_data, coinfection_mortality_rate_off_art, outfile, integers=True, roundup=False, round_nearest=False): success = False outfile.write("Data points checked = {}.\n".format(len(time_to_death_data))) outfile.write("SUMMARY: Success={0}\n".format(success)) dtk_sft.plot_data(sorted(time_to_death_data), sorted(expected_data), label1="Actual", label2="Expected", title="Time from Smear Negative Off ART TBHIV to Death", xlabel="Data Points", ylabel="Days", category="tbhiv_mortality_smear_negative_off_art", line = True, overlap=True)
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 simulation_timestep = 1 output_dict = {} for line in filtered_lines: if matches[0] in line: time_step += simulation_timestep elif matches[1] in line: # this individual is Cleared individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line) if individual_id in output_dict: output_dict[individual_id][KEY_CLEARED] = time_step else: output_dict[individual_id] = {KEY_CLEARED: time_step} elif matches[2] in line: # this individual is Latent individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line) if individual_id in output_dict: output_dict[individual_id][KEY_LATENT] = time_step else: output_dict[individual_id] = {KEY_LATENT: time_step} elif matches[3] in line: # this individual is PreSymptomatic active individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line) if individual_id in output_dict: output_dict[individual_id][KEY_PRESYMPTOMATIC] = time_step else: output_dict[individual_id] = {KEY_PRESYMPTOMATIC: time_step} res_path = r'./SEIR_Latent_Cure_from_logging.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 simulation_timestep = 1 output_dict = {} for line in filtered_lines: if matches[0] in line: time_step += simulation_timestep if matches[1] in line: individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line) if individual_id in output_dict: output_dict[individual_id][KEY_DEATH] = time_step else: output_dict[individual_id] = {KEY_DEATH: time_step} if matches[2] in line: individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line) timer = float(dtk_sft.get_val(KEY_TIMER, line)) if individual_id in output_dict: output_dict[individual_id][KEY_SYMPTOMATIC] = [ time_step, timer ] else: output_dict[individual_id] = { KEY_SYMPTOMATIC: [time_step, timer] } res_path = r'./SEIR_Death_from_logging.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_stdout_file(output_filename="test.txt", debug=False): """ Reads stdout file and creates an individual ID indexed DataFrame of ages and mod_aquire :param output_filename: stdout filename (test.txt) :param debug: generate :return: dataframe of all individuals from a single timestep """ matches = ["{} = ".format(DataframeKeys.MOD_ACQUIRE), "{} = ".format(DataframeKeys.AGE), "{} = ".format(DataframeKeys.ID), "Update(): Time: "] filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if sft.has_match(line, matches): if matches[-1] in line: break filtered_lines.append(line) if debug: with open("filtered_lines.txt","w") as outfile: outfile.writelines(filtered_lines) individuals_df = pd.DataFrame( columns=[DataframeKeys.AGE, DataframeKeys.MOD_ACQUIRE]) individuals_df.index.name = 'index' for line in filtered_lines: age = float(sft.get_val(matches[1], line)) acquire = float(sft.get_val(matches[0], line)) id = int(sft.get_val(matches[2], line)) individuals_df.loc[id] = [age, acquire] if debug: with open("DEBUG_individuals_dataframe.csv","w") as outfile: outfile.write(individuals_df.to_csv()) return individuals_df
def parse_output_file(output_filename="test.txt", simulation_timestep=1, debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 positive = 0 negative = 0 default = 0 output_df = pd.DataFrame(columns=[ ReportColumn.negative, ReportColumn.default, ReportColumn.positive ]) output_df.index.name = Config.simulation_timestep for line in filtered_lines: if matches[0] in line: output_df.loc[time_step] = pd.Series({ ReportColumn.positive: positive, ReportColumn.negative: negative, ReportColumn.default: default }) time_step += simulation_timestep positive = 0 negative = 0 default = 0 if matches[1] in line: result = int(dtk_sft.get_val(matches[1], line)) if result: positive += 1 else: negative += 1 if matches[2] in line: default += 1 res_path = r'./DEBUG_tb_test_result_from_logging.csv' with open(res_path, "w") as file: output_df.to_csv(file) return output_df
def parse_stdout_file( start_timestep, duration_of_interest, stdout_filename="test.txt", debug=False): """creates cum_death and death_times array :param start_timestep: drug start time :param stdout_filename: file to parse (test.txt) :return: cum_deaths, death_times """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables cum_deaths = [] deaths = [] death_times = [] timestep = 0 cum = death_daily = infected = 0 infected_individuals = [] for line in filtered_lines: if "Time:" in line: timestep += 1 infected_individuals.append(infected) cum_deaths.append(cum) deaths.append(death_daily) death_daily = 0 infected = int(dtk_sft.get_val("Infected: ", line)) else: cum += 1 death_daily += 1 # cum_deaths[-1] = cum if timestep <= duration_of_interest + start_timestep: death_times.append( timestep-start_timestep ) if debug: with open("cum_deaths.txt","w") as file: file.writelines( str( cum_deaths ) ) with open("death_times.txt","w") as file: file.writelines( str( death_times ) ) return cum_deaths, deaths, infected_individuals, death_times
def parse_output_file(output_filename="test.txt", debug=False): """ creates an object which contains the heterogeneity multiplier and infectiousness :param output_filename: file to parse (test.txt) :return: output_obj: heterogeneity multiplier and infectiousness for each infection """ filtered_lines = [] output_obj= {} for match in matches: output_obj[match] = [] with open(output_filename) as logfile: for line in logfile: for match in matches: if match in line: output_obj[match].append(float(sft.get_val(match, line))) filtered_lines.append(line) break if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) return output_obj
def parse_output_file(output_filename="test.txt", debug=False): """ creates an object which contains the heterogeneity multiplier and infectiousness :param output_filename: file to parse (test.txt) :return: output_obj: heterogeneity multiplier and infectiousness for each infection """ filtered_lines = [] output_obj= {} for match in matches: output_obj[match] = [] with open(output_filename) as logfile: for line in logfile: for match in matches: if match in line: output_obj[match].append(float(sft.get_val(match, line))) filtered_lines.append(line) break if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) return output_obj
def create_report_file(data): report_name = data[0] lines = data[1] tb_cd4_activation_vector = data[2] # this test assumes the vector is constant # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than # what StartedArt distribution would extremely likely give us big_magic_number = 2000 stopped_art_latency_data = [] started_art_latency_data = [] tb_on_art_latency_data = [] art_events_dict = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if "has event" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) art_status = line.split(" ")[9].strip(".") # get_val only gets digits art_events_dict[ind_id] = art_status if "Incubation_timer calculated as" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) infection_timer = float(dtk_sft.get_val("calculated as ", line)) reconstitute = int(dtk_sft.get_val("reconstitute=", line)) if reconstitute: # ignore people who are not reconstituting. tb_on_art_latency_data.append(infection_timer) if "LifeCourseLatencyTimerUpdate" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) new_incubation_timer = float(dtk_sft.get_val("timer ", line)) if ind_id in art_events_dict.keys(): if art_events_dict.get(ind_id) == "StartedART": # we ignore this for this test, people are already on art when they get TB started_art_latency_data.append(new_incubation_timer) else: stopped_art_latency_data.append(new_incubation_timer) art_events_dict.pop(ind_id) else: success = False outfile.write("BAD: No art-related event found in the logs for this timer update for Individual {}," " at time {}.\n".format(ind_id, int(dtk_sft.get_val("time= ", line)))) # we want the stopped art latency data to NOT match the started art latency data # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector if dtk_sft.test_exponential(stopped_art_latency_data, tb_cd4_activation_vector[2], integers=True, roundup=True, round_nearest=False): outfile.write("BAD: The StoppedArt latency data distribution matches the initial latency data" " distribution, but shouldn't.\n") success = False small_duration_count = 0 for duration in stopped_art_latency_data: if duration < big_magic_number: small_duration_count += 1 proportion_small = small_duration_count / float(len(stopped_art_latency_data)) if proportion_small > 0.006: outfile.write("BAD: More than 0.006 of our durations are suspiciously small, it is {}. " "Please Investigate.\n".format(proportion_small)) success = False if not dtk_sft.test_exponential(tb_on_art_latency_data, tb_cd4_activation_vector[2], outfile, integers=False, roundup=False, round_nearest=False): # this is testing the internal timer which is float type # so 'integers=False' success = False outfile.write("BAD: Initial TB infection (with HIV and ART) latency doesn't match expected distribution.") outfile.write("Data points checked = {}." "\n".format(len(tb_on_art_latency_data), 0)) outfile.write("SUMMARY: Success={0}\n".format(success)) # for graphing purposes only expected_tb_on_art_latency_data = np.random.exponential(1/tb_cd4_activation_vector[2], len(tb_on_art_latency_data)) dtk_sft.plot_data(sorted(tb_on_art_latency_data), sorted(expected_tb_on_art_latency_data), label1="Actual", label2="Expected", title="HIV+ART then TB latency data", xlabel="Data Points", ylabel="Days", category="tb_activation_and_cd4_hiv_art_tb_offart", line = True, overlap=True)
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict, message """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 core = 0 output_dict = {} exception_message = None for line in filtered_lines: try: if matches[0] in line: #this may raise LookupError value = dtk_sft.get_val(matches[0], line) if debug: print("time value I get is '{}'".format(value)) # this may raise ValueError time_step = int(float(value)) if matches[1] in line: # this may raise ValueError or LookupError core = int(dtk_sft.get_val(matches[1], line)) else: print(line) raise Exception( "at timestep = {0}, {1} and {2) are not in the same line.\n" .format(time_step, matches[0], matches[1])) if debug: print("core is {}".format(core)) if time_step not in output_dict: output_dict[time_step] = {core: [0, 0]} elif core not in output_dict[time_step]: output_dict[time_step][core] = [0, 0] elif matches[ 2] in line: # this individual is died from TB Symptomatic active output_dict[time_step][core][0] += 1 elif matches[3] in line: # this individual died from HIV output_dict[time_step][core][1] += 1 except Exception as ex: exception_message = "failed to parse {0}, got exception: {1}.".format( output_filename, ex) print(exception_message) return None, exception_message if debug: res_path = r'./DEBUG_stdout_parsed.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict, exception_message
def parse_stdout_file(drug_start_time, start_timestep, stdout_filename="test.txt", debug=False): """creates cum_inactivations, inactivation_times arrays :param drug_start_time: drug start time :param start_timestep: first timestep from config :param stdout_filename: file to parse (test.txt) :return: infected_mosquitoes_per_day dictionary, total_infections int """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables cum_inactivations = [] inactivation_times = [] timestep = 0 cum = inactivation_daily = infected = active = 0 infected_individuals = [] inactivations = [] active_count = [] reactivation_times = {} for line in filtered_lines: if matches[2] in line: cum_inactivations.append(cum) infected_individuals.append(infected) inactivations.append(inactivation_daily) active_count.append(active) infected = int(dtk_sft.get_val("Infected: ", line)) inactivation_daily = 0 timestep += 1 elif matches[0] in line: # deactivation: have to track individual individual = int(dtk_sft.get_val("TB drug deactivated my \(", line)) # if timestep <= duration_of_interest + start_timestep + drug_start_time: inactivation_time = timestep - start_timestep - drug_start_time if individual in reactivation_times: inactivation_time = timestep-reactivation_times[individual] reactivation_times.pop( individual ) # not including the reactivations: there are some data point lost due to simulation end before timers end # inactivation_times.append( inactivation_time ) else: cum += 1 active -= 1 inactivation_daily += 1 inactivation_times.append( inactivation_time ) elif matches[1] in line: # "progressing from Latent to Active Presymptomatic while on TB Drugs" # activation: have to track individual individual = int(dtk_sft.get_val("Individual ", line)) reactivation_times[individual] = timestep elif matches[3] in line: # move to active active += 1 else: #die from HIV individual = int(dtk_sft.get_val("individual ", line)) if individual in reactivation_times: active -= 1 reactivation_times.pop(individual) if debug: with open ("Cum_Inactivations.txt" ,"w") as outfile: outfile.write(str( cum_inactivations ) ) with open ("Inactivations.txt" ,"w") as outfile: outfile.write(str( inactivation_times ) ) print( "there are {} individual in reactivation state at the end of simulation.\n".format(len(reactivation_times)) ) return cum_inactivations, inactivation_times, active_count, inactivations
def create_report_file(data): report_name = data[0] lines = data[1] cd4_strata = data[2] mod_array = data[3] base_inf = data[4] presymp_mult = data[5] smear_neg_mult = data[6] latent_data_points = 0 smear_negative_data_points = 0 smear_positive_data_points = 0 presymptomatic_data_points = 0 extrapulmonary_data_points = 0 success = True epsilon = 0.000002 with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: tot_inf_actual = float( dtk_sft.get_val("total_infectiousness= ", line)) cd4_count = float(dtk_sft.get_val("CD4count= ", line)) cd4_mod_actual = float(dtk_sft.get_val("CD4mod= ", line)) cd4_mod_expected = tb_cd4_infectiousness_calc( [mod_array, cd4_strata, cd4_count]) if "Latent" in line: latent_data_points += 1 if tot_inf_actual != 0: success = False outfile.write( "BAD, found Latent infection with total_infectiousness= {} at time= {}. " "Expected 0. \n".format( tot_inf_actual, dtk_sft.get_val("time= ", line))) elif "SmearNegative" in line: smear_negative_data_points += 1 tot_inf_expected = cd4_mod_expected * base_inf * smear_neg_mult if abs(tot_inf_expected - tot_inf_actual) > epsilon: success = False outfile.write( "BAD, found SmearNegative infection with total_infectiousness= {} at time= {}, " "Expected {}. \n {} \n ".format( tot_inf_actual, dtk_sft.get_val("time= ", line), tot_inf_expected, line)) elif "SmearPositive" in line: smear_positive_data_points += 1 tot_inf_expected = cd4_mod_expected * base_inf if abs(tot_inf_expected - tot_inf_actual) > epsilon: success = False outfile.write( "BAD, found SmearPositive infection with total_infectiousness= {} at time= {}, " "Expected {}. \n {} \n".format( tot_inf_actual, dtk_sft.get_val("time= ", line), tot_inf_expected, line)) elif "Presymptomatic" in line: presymptomatic_data_points += 1 tot_inf_expected = cd4_mod_expected * base_inf * presymp_mult if abs(tot_inf_expected - tot_inf_actual) > epsilon: success = False outfile.write( "BAD, found Presymptomatic infection with total_infectiousness= {} at time= {}, " "Expected {}. \n {}\n ".format( tot_inf_actual, dtk_sft.get_val("time= ", line), tot_inf_expected, line)) elif "Extrapulmonary" in line: extrapulmonary_data_points += 1 if tot_inf_actual != 0: success = False outfile.write( "BAD, found Extrapulmonary infection with total_infectiousness= {} at time= {}. " "Should be 0. \n".format( tot_inf_actual, dtk_sft.get_val("time= ", line))) outfile.write( "Data points for each TBHIV infection state:\nLatent = {} \nPresymptomatic = {} " "\nSmear Negative = {} \nSmear Positive = {} \nExtrapulmonary = {} " "\n".format(latent_data_points, presymptomatic_data_points, smear_negative_data_points, smear_positive_data_points, extrapulmonary_data_points)) outfile.write("SUMMARY: Success={0}\n".format(success))
def create_report_file(data, debug=False): report_name = data[0] lines = data[1] tb_cd4_activation_vector = data[ 2] # this test assumes the vector is constant latency_data = {} duration_data = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if "LifeCourseLatencyTimerUpdate" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) start_time_stamp = int(dtk_sft.get_val("time= ", line)) if ind_id in latency_data.keys(): outfile.write( "Individual {} incubation timer reset at time {}. Please check. " "\n".format(ind_id, start_time_stamp)) latency_data[ind_id] = start_time_stamp elif "TBActivationPresymptomatic" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) end_time_stamp = int(dtk_sft.get_val("time= ", line)) if ind_id not in latency_data.keys(): outfile.write( "Individual {} went presymptomatic without incubation timer update at time {}. " "Please check. \n".format(ind_id, end_time_stamp)) else: duration = end_time_stamp - latency_data.get(ind_id) duration_data[ind_id] = duration if debug: with open("DEBUG_duration_data.json", "w") as debug_outfile: json.dump(duration_data, debug_outfile, indent=4) durations = list(duration_data.values()) if not dtk_sft.test_exponential(durations, tb_cd4_activation_vector[0], outfile, integers=True, roundup=True, round_nearest=False): success = False outfile.write("Data points checked = {}.\n".format(len(duration_data))) outfile.write("SUMMARY: Success={0}\n".format(success)) # only used for graphing purposes expected_data = map( math.ceil, np.random.exponential(1 / tb_cd4_activation_vector[0], len(duration_data))) expected_durations = list(expected_data) dtk_sft.plot_data_sorted( durations, expected_durations, label1="Actual", label2="Expected", title="Recalculated Latency Duration TB then HIV(Sorted)", xlabel="Data Points", ylabel="Days", category="tb_activation_and_cd4_tb_first", line=True, overlap=True)
def create_report_file(data): report_name = data[0] lines = data[1] tb_cd4_activation_vector = data[ 2] # this test assumes the vector is constant # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than # what StartedArt distribution would extremely likely give us big_magic_number = 2000 stopped_art_latency_data = [] started_art_latency_data = [] art_events_dict = {} success = True with open(report_name, "w") as outfile: if not lines: outfile.write("BAD: No relevant test data found.\n") success = False for line in lines: if "has event" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) art_status = line.split(" ")[9].strip( ".") # get_val only gets digits art_events_dict[ind_id] = art_status if "LifeCourseLatencyTimerUpdate" in line: ind_id = int(dtk_sft.get_val("Individual ", line)) new_incubation_timer = float(dtk_sft.get_val("timer ", line)) if ind_id in art_events_dict.keys(): if art_events_dict.get(ind_id) == "StartedART": started_art_latency_data.append(new_incubation_timer) else: stopped_art_latency_data.append(new_incubation_timer) art_events_dict.pop(ind_id) else: success = False outfile.write( "BAD: No art-related event found in the logs for this timer update for Individual {}," " at time {}.\n".format( ind_id, int(dtk_sft.get_val("time= ", line)))) # we want the stopped art latency data to NOT match the started art latency data # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector if dtk_sft.test_exponential(stopped_art_latency_data, tb_cd4_activation_vector[2], integers=False, roundup=False, round_nearest=False): outfile.write( "BAD: The StoppedArt latency data distribution matches the StartedArt latency data" " distribution, but shouldn't.\n") success = False expected_stopped_art_data = np.random.exponential( 1 / tb_cd4_activation_vector[0], len(stopped_art_latency_data)) small_duration_count = 0 for duration in stopped_art_latency_data: if duration < big_magic_number: small_duration_count += 1 proportion_small = small_duration_count / float( len(stopped_art_latency_data)) if proportion_small > 0.01: outfile.write( "BAD: More than 0.5% of our durations are suspiciously small, it is {}. " "Please Investigate.\n".format(proportion_small)) success = False outfile.write("Data points checked = {}.\n".format( len(stopped_art_latency_data))) outfile.write("SUMMARY: Success={0}\n".format(success)) dtk_sft.plot_data( sorted(stopped_art_latency_data), sorted(expected_stopped_art_data), label1="Actual", label2="Expected", title= "StoppedART Latency data should have a similar shape/scale of duration but will not " "match", xlabel="Data Points", ylabel="Days", category="tb_activation_and_cd4_hiv_first_on_art_off_art", line=True, overlap=True)