def parse_output_file(output_filename="test.txt", simulation_timestep=1, debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 output_dict = {} for line in filtered_lines: if matches[0] in line: time_step += simulation_timestep output_dict[time_step] = 0 elif matches[1] in line: # this individual is Symptomatic active output_dict[time_step] += 1 if debug: res_path = r'./DEBUG_incidence.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, [matches[0]]): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 core = None output_dict = {} for line in filtered_lines: time_step = dtk_sft.get_val(matches[0], line) core = dtk_sft.get_val(matches[1], line) if time_step not in output_dict: output_dict[time_step] = [core] else: output_dict[time_step].append(core) if debug: res_path = r'./DEBUG_core_per_time_step.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store stat populations, infected population, and MDR test result for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 infected = 0 statpop = 0 simulation_timestep = 1 positive = 0 negative = 0 default = 0 output_dict = {} for line in filtered_lines: if matches[0] in line: output_dict[time_step] = { KEY_STAT_POP: statpop, KEY_INFECTED: infected, KEY_POSITIVE: positive, KEY_NEGATIVE: negative, KEY_DEFAULT: default } infected = dtk_sft.get_val(matches[2], line) statpop = dtk_sft.get_val(matches[1], line) time_step += simulation_timestep positive = 0 negative = 0 default = 0 if matches[3] in line: result = int(dtk_sft.get_val(matches[3], line)) if result: positive += 1 else: negative += 1 if matches[4] in line: default += 1 res_path = r'./tb_test_result_from_logging.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_output_file(output_filename="test.txt", simulation_timestep=1, debug=False): """ creates a dataframe of time step, infected, infectiouness and stat populations :param output_filename: file to parse (test.txt) :param simulation_timestep: simulation time step, * days :return: output_df: data frame contains: 1, time step, 2, # of infected population, 3, infectiousness, 4, statistical populations, at each time step """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 # infectiousness = 0 infected = 0 statpop = 0 output_df = pd.DataFrame(columns=[ KEY_SIMULATION_TIMESTEP, KEY_INFECTED, KEY_INFECTIOUSNESS, KEY_STAT_POP ]) output_df.index.name = "index" index = 0 for line in filtered_lines: if matches[0] in line: infected = dtk_sft.get_val(matches[1], line) statpop = dtk_sft.get_val(matches[3], line) output_df.loc[index] = [ time_step, infected, infectiousness, statpop ] index += 1 time_step += simulation_timestep infectiousness = 0 continue if matches[2] in line: infectiousness = dtk_sft.get_val(matches[2], line) continue res_path = r'./infected_vs_infectiousness.csv' if not os.path.exists(os.path.dirname(res_path)): os.makedirs(os.path.dirname(res_path)) output_df.to_csv(res_path) return output_df
def parse_output_file(output_filename="test.txt", simulation_timestep=1, debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 positive = 0 negative = 0 default = 0 output_df = pd.DataFrame(columns=[ ReportColumn.negative, ReportColumn.default, ReportColumn.positive ]) output_df.index.name = Config.simulation_timestep for line in filtered_lines: if matches[0] in line: output_df.loc[time_step] = pd.Series({ ReportColumn.positive: positive, ReportColumn.negative: negative, ReportColumn.default: default }) time_step += simulation_timestep positive = 0 negative = 0 default = 0 if matches[1] in line: result = int(dtk_sft.get_val(matches[1], line)) if result: positive += 1 else: negative += 1 if matches[2] in line: default += 1 res_path = r'./DEBUG_tb_test_result_from_logging.csv' with open(res_path, "w") as file: output_df.to_csv(file) return output_df
def parse_stdout_file(start_timestep, stdout_filename="test.txt", debug=False): """creates a dictionary of infected mosquitoes per day :param curr_timestep: first timestep from config :param stdout_filename: file to parse (test.txt) :return: infected_mosquitoes_per_day dictionary, total_infections int """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables # so actually all the will-relapse calculations are done at time of clearance. # our clearance rate is 100%, so they all happen at once. So we just calculate # the number of will-relapses at time-of-clearance and use # sft.test_binomial_95ci( relapses, N, 'rate', None, None ) # to figure out if everything was right. # But first we have to fix the bug where fast progressors who get cleared by # drugs bounce back-and-forth between active and latent. cum_relapses = [] relapses = [] timestep = 0 initial_relapses = 0 pre_cum = 0 cum = 0 for line in filtered_lines: if "Time:" in line: timestep += 1 cum_relapses.append(cum) relapses.append(cum - pre_cum) pre_cum = cum else: if timestep == start_timestep + 1: initial_relapses += 1 cum += 1 if debug: with open("relapses.txt", "w") as file: file.writelines(str(relapses)) with open("cum_relapses.txt", "w") as file: file.writelines(str(cum_relapses)) print("initial relapses is {}.\n".format(initial_relapses)) return relapses, cum_relapses
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 simulation_timestep = 1 output_dict = {} for line in filtered_lines: if matches[0] in line: time_step += simulation_timestep elif matches[1] in line: # this individual is Cleared individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line) if individual_id in output_dict: output_dict[individual_id][KEY_CLEARED] = time_step else: output_dict[individual_id] = {KEY_CLEARED: time_step} elif matches[2] in line: # this individual is Latent individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line) if individual_id in output_dict: output_dict[individual_id][KEY_LATENT] = time_step else: output_dict[individual_id] = {KEY_LATENT: time_step} elif matches[3] in line: # this individual is PreSymptomatic active individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line) if individual_id in output_dict: output_dict[individual_id][KEY_PRESYMPTOMATIC] = time_step else: output_dict[individual_id] = {KEY_PRESYMPTOMATIC: time_step} res_path = r'./SEIR_Latent_Cure_from_logging.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_output_file(output_filename="test.txt", simulation_timestep = 1, debug=False): """ creates a dataframe of time step, infected, infectiouness and stat populations :param output_filename: file to parse (test.txt) :param simulation_timestep: simulation time step, * days :return: output_df: data frame contains: 1, time step, 2, # of infected population, 3, infectiousness, 4, statistical populations, at each time step """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 # infectiousness = 0 infected = 0 statpop = 0 output_df = pd.DataFrame( columns = [KEY_SIMULATION_TIMESTEP, KEY_INFECTED, KEY_INFECTIOUSNESS, KEY_STAT_POP]) output_df.index.name = "index" index = 0 for line in filtered_lines: if matches[0] in line: infected = dtk_sft.get_val(matches[1], line) statpop = dtk_sft.get_val(matches[3], line) output_df.loc[index] = [time_step, infected, infectiousness, statpop] index += 1 time_step += simulation_timestep infectiousness = 0 continue if matches[2] in line: infectiousness = dtk_sft.get_val(matches[2], line) continue res_path = r'./infected_vs_infectiousness.csv' if not os.path.exists(os.path.dirname(res_path)): os.makedirs(os.path.dirname(res_path)) output_df.to_csv(res_path) return output_df
def parse_stdout_file( start_timestep, duration_of_interest, stdout_filename="test.txt", debug=False): """creates cum_death and death_times array :param start_timestep: drug start time :param stdout_filename: file to parse (test.txt) :return: cum_deaths, death_times """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables cum_deaths = [] deaths = [] death_times = [] timestep = 0 cum = death_daily = infected = 0 infected_individuals = [] for line in filtered_lines: if "Time:" in line: timestep += 1 infected_individuals.append(infected) cum_deaths.append(cum) deaths.append(death_daily) death_daily = 0 infected = int(dtk_sft.get_val("Infected: ", line)) else: cum += 1 death_daily += 1 # cum_deaths[-1] = cum if timestep <= duration_of_interest + start_timestep: death_times.append( timestep-start_timestep ) if debug: with open("cum_deaths.txt","w") as file: file.writelines( str( cum_deaths ) ) with open("death_times.txt","w") as file: file.writelines( str( death_times ) ) return cum_deaths, deaths, infected_individuals, death_times
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 simulation_timestep = 1 output_dict = {} for line in filtered_lines: if matches[0] in line: time_step += simulation_timestep if matches[1] in line: individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line) if individual_id in output_dict: output_dict[individual_id][KEY_DEATH] = time_step else: output_dict[individual_id] = {KEY_DEATH: time_step} if matches[2] in line: individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line) timer = float(dtk_sft.get_val(KEY_TIMER, line)) if individual_id in output_dict: output_dict[individual_id][KEY_SYMPTOMATIC] = [ time_step, timer ] else: output_dict[individual_id] = { KEY_SYMPTOMATIC: [time_step, timer] } res_path = r'./SEIR_Death_from_logging.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_stdout_file(output_filename="test.txt", debug=False): """ Reads stdout file and creates an individual ID indexed DataFrame of ages and mod_aquire :param output_filename: stdout filename (test.txt) :param debug: generate :return: dataframe of all individuals from a single timestep """ matches = ["{} = ".format(DataframeKeys.MOD_ACQUIRE), "{} = ".format(DataframeKeys.AGE), "{} = ".format(DataframeKeys.ID), "Update(): Time: "] filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if sft.has_match(line, matches): if matches[-1] in line: break filtered_lines.append(line) if debug: with open("filtered_lines.txt","w") as outfile: outfile.writelines(filtered_lines) individuals_df = pd.DataFrame( columns=[DataframeKeys.AGE, DataframeKeys.MOD_ACQUIRE]) individuals_df.index.name = 'index' for line in filtered_lines: age = float(sft.get_val(matches[1], line)) acquire = float(sft.get_val(matches[0], line)) id = int(sft.get_val(matches[2], line)) individuals_df.loc[id] = [age, acquire] if debug: with open("DEBUG_individuals_dataframe.csv","w") as outfile: outfile.write(individuals_df.to_csv()) return individuals_df
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dataframe of time step and infected, infectiouness and stat populations :param output_filename: file to parse (test.txt) :param debug: if True then print debug_info and write output_df to disk as './individual_susceptibility.csv' :return: output_df: # of infected population and infectiousness per person at each time step """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match( line, ["Update(): Time: ", KEY_INDIVIDUAL_MOD_ACQUIRE ]): # search for "Update(): time" | Susceptibility update filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 output_df = pd.DataFrame(columns=[ KEY_SIMULATION_TIMESTEP, KEY_INDIVIDUAL_ID, KEY_INDIVIDUAL_AGE, KEY_INDIVIDUAL_MOD_ACQUIRE, KEY_INDIVIDUAL_IMMUNE_FAILAGE ]) output_df.index.name = "index" index = 0 for line in filtered_lines: if "Update(): Time:" in line: if debug: print("working on... " + line) print("time_step = " + str(time_step)) time_step += 1 continue if time_step == 1: # DEVNOTE: we just validate time_step 1 and all individuals break a = parse_name_value_pair(line, print_indvidiual_susceptibility) if index > 0 and output_df.loc[index - 1].id == int( a[KEY_INDIVIDUAL_ID] ): # DEVNOTE: somehow we have duplicate Susceptibility where evaluating getModAcquire, not sure why, add this to skip continue output_df.loc[index] = [ int(time_step), int(a[KEY_INDIVIDUAL_ID]), float(a[KEY_INDIVIDUAL_AGE]), float(a[KEY_INDIVIDUAL_MOD_ACQUIRE]), float(a[KEY_INDIVIDUAL_IMMUNE_FAILAGE]) ] # output_df.id = pd.to_numeric(output_df.id, downcast='signed') # not sure why id didn't case to int, but we will cast it anyway output_df.id = output_df[KEY_INDIVIDUAL_ID].astype(int) index += 1 continue # drop duplicates as dTK will call getModAquire() multiple time per individual per timesteps output_df = output_df.drop_duplicates( subset=[KEY_SIMULATION_TIMESTEP, KEY_INDIVIDUAL_ID]) if debug: res_path = r'./individual_susceptibility.csv' if not os.path.exists(os.path.dirname(res_path)): os.makedirs(os.path.dirname(res_path)) output_df.to_csv(res_path) return output_df
def parse_stdout_file(drug_start_time, start_timestep, stdout_filename="test.txt", debug=False): """creates cum_inactivations, inactivation_times arrays :param drug_start_time: drug start time :param start_timestep: first timestep from config :param stdout_filename: file to parse (test.txt) :return: infected_mosquitoes_per_day dictionary, total_infections int """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables cum_inactivations = [] inactivation_times = [] timestep = 0 cum = inactivation_daily = infected = active = 0 infected_individuals = [] inactivations = [] active_count = [] reactivation_times = {} for line in filtered_lines: if matches[2] in line: cum_inactivations.append(cum) infected_individuals.append(infected) inactivations.append(inactivation_daily) active_count.append(active) infected = int(dtk_sft.get_val("Infected: ", line)) inactivation_daily = 0 timestep += 1 elif matches[0] in line: # deactivation: have to track individual individual = int(dtk_sft.get_val("TB drug deactivated my \(", line)) # if timestep <= duration_of_interest + start_timestep + drug_start_time: inactivation_time = timestep - start_timestep - drug_start_time if individual in reactivation_times: inactivation_time = timestep-reactivation_times[individual] reactivation_times.pop( individual ) # not including the reactivations: there are some data point lost due to simulation end before timers end # inactivation_times.append( inactivation_time ) else: cum += 1 active -= 1 inactivation_daily += 1 inactivation_times.append( inactivation_time ) elif matches[1] in line: # "progressing from Latent to Active Presymptomatic while on TB Drugs" # activation: have to track individual individual = int(dtk_sft.get_val("Individual ", line)) reactivation_times[individual] = timestep elif matches[3] in line: # move to active active += 1 else: #die from HIV individual = int(dtk_sft.get_val("individual ", line)) if individual in reactivation_times: active -= 1 reactivation_times.pop(individual) if debug: with open ("Cum_Inactivations.txt" ,"w") as outfile: outfile.write(str( cum_inactivations ) ) with open ("Inactivations.txt" ,"w") as outfile: outfile.write(str( inactivation_times ) ) print( "there are {} individual in reactivation state at the end of simulation.\n".format(len(reactivation_times)) ) return cum_inactivations, inactivation_times, active_count, inactivations
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict, message """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if dtk_sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 core = 0 output_dict = {} exception_message = None for line in filtered_lines: try: if matches[0] in line: #this may raise LookupError value = dtk_sft.get_val(matches[0], line) if debug: print("time value I get is '{}'".format(value)) # this may raise ValueError time_step = int(float(value)) if matches[1] in line: # this may raise ValueError or LookupError core = int(dtk_sft.get_val(matches[1], line)) else: print(line) raise Exception( "at timestep = {0}, {1} and {2) are not in the same line.\n" .format(time_step, matches[0], matches[1])) if debug: print("core is {}".format(core)) if time_step not in output_dict: output_dict[time_step] = {core: [0, 0]} elif core not in output_dict[time_step]: output_dict[time_step][core] = [0, 0] elif matches[ 2] in line: # this individual is died from TB Symptomatic active output_dict[time_step][core][0] += 1 elif matches[3] in line: # this individual died from HIV output_dict[time_step][core][1] += 1 except Exception as ex: exception_message = "failed to parse {0}, got exception: {1}.".format( output_filename, ex) print(exception_message) return None, exception_message if debug: res_path = r'./DEBUG_stdout_parsed.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict, exception_message