def parse_output_file(output_filename="test.txt", simulation_timestep=1, debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 output_dict = {} for line in filtered_lines: if matches[0] in line: time_step += simulation_timestep output_dict[time_step] = 0 elif matches[1] in line: # this individual is Symptomatic active output_dict[time_step] += 1 if debug: res_path = r'./DEBUG_incidence.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_stdout_file(stdout_filename="StdOut.txt", simulation_timestep=1, debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (StdOut.txt) :return: stdout_df """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if sft.has_match(line,matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = index = 0 stdout_df = pd.DataFrame(columns=[ConfigKeys.Simulation_Timestep, Stdout.stat_pop, Stdout.infected]) stdout_df.index.name = 'index' for line in filtered_lines: if matches[0] in line: stat_pop = int(sft.get_val(Stdout.stat_pop, line)) infected = int(sft.get_val(Stdout.infected, line)) stdout_df.loc[index] = [time_step, stat_pop, infected] index += 1 time_step += simulation_timestep if debug: res_path = r'./DEBUG_filtered_from_logging.csv' stdout_df.to_csv(res_path) return stdout_df
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if sft.has_match(line, [matches[0]]): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 core = None output_dict = {} for line in filtered_lines: time_step = sft.get_val(matches[0], line) core = sft.get_val(matches[1], line) if time_step not in output_dict: output_dict[time_step] = [core] else: output_dict[time_step].append(core) if debug: res_path = r'./DEBUG_core_per_time_step.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store stat populations, infected population, and MDR test result for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 infected = 0 statpop = 0 simulation_timestep = 1 positive = 0 negative = 0 default = 0 output_dict = {} for line in filtered_lines: if matches[0] in line: output_dict[time_step] = { KEY_STAT_POP: statpop, KEY_INFECTED: infected, KEY_POSITIVE: positive, KEY_NEGATIVE: negative, KEY_DEFAULT: default } infected = sft.get_val(matches[2], line) statpop = sft.get_val(matches[1], line) time_step += simulation_timestep positive = 0 negative = 0 default = 0 if matches[3] in line: result = int(sft.get_val(matches[3], line)) if result: positive += 1 else: negative += 1 if matches[4] in line: default += 1 res_path = r'./tb_test_result_from_logging.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_output_file(output_filename="test.txt", simulation_timestep=1, debug=False): """ creates a dataframe of time step, infected, infectiouness and stat populations :param output_filename: file to parse (test.txt) :param simulation_timestep: simulation time step, * days :return: output_df: data frame contains: 1, time step, 2, # of infected population, 3, infectiousness, 4, statistical populations, at each time step """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 # infectiousness = 0 infected = 0 statpop = 0 output_df = pd.DataFrame(columns=[ KEY_SIMULATION_TIMESTEP, KEY_INFECTED, KEY_INFECTIOUSNESS, KEY_STAT_POP ]) output_df.index.name = "index" index = 0 for line in filtered_lines: if matches[0] in line: infected = sft.get_val(matches[1], line) statpop = sft.get_val(matches[3], line) output_df.loc[index] = [ time_step, infected, infectiousness, statpop ] index += 1 time_step += simulation_timestep infectiousness = 0 continue if matches[2] in line: infectiousness = sft.get_val(matches[2], line) continue res_path = r'./infected_vs_infectiousness.csv' if not os.path.exists(os.path.dirname(res_path)): os.makedirs(os.path.dirname(res_path)) output_df.to_csv(res_path) return output_df
def parse_stdout_file(stdout_filename="StdOut.txt", simulation_timestep=1, debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (StdOut.txt) :return: stdout_df """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = index = 0 stdout_df = pd.DataFrame(columns=[ ConfigKeys.Simulation_Timestep, Stdout.stat_pop, Stdout.infected, Stdout.group_id, Stdout.contagion, Stdout.prob ]) stdout_df.index.name = 'index' stat_pop = infected = contagion = prob = group_id = None group_contagion = {} for line in filtered_lines: if matches[0] in line: stat_pop = int(sft.get_val(Stdout.stat_pop, line)) infected = int(sft.get_val(Stdout.infected, line)) for group_id in sorted(group_contagion): stdout_df.loc[index] = [ time_step, stat_pop, infected, group_id, group_contagion[group_id][0], group_contagion[group_id][1] ] index += 1 group_contagion = {} time_step += simulation_timestep elif len(group_contagion) < 10 and matches[1] in line: contagion = float(sft.get_val(matches[1], line)) prob = float(sft.get_val(Stdout.prob, line)) group_id = int(sft.get_val(matches[2], line)) if group_id not in group_contagion: group_contagion[group_id] = [contagion, prob] if debug: res_path = r'./DEBUG_filtered_from_logging.csv' stdout_df.to_csv(res_path) return stdout_df
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 simulation_timestep = 1 output_dict = {} for line in filtered_lines: if matches[0] in line: time_step += simulation_timestep elif matches[1] in line: # this individual is PreSymptomatic active individual_id = sft.get_val(KEY_INDIVIDUAL, line) timer = float(sft.get_val(KEY_TIMER, line)) if individual_id in output_dict: output_dict[individual_id][KEY_PRESYMPTOMATIC] = [ time_step, timer ] else: output_dict[individual_id] = { KEY_PRESYMPTOMATIC: [time_step, timer] } elif matches[2] in line: # this individual is Symptomatic active individual_id = sft.get_val(KEY_INDIVIDUAL, line) timer = float(sft.get_val(KEY_TIMER, line)) if individual_id in output_dict: output_dict[individual_id][KEY_SYMPTOMATIC] = [ time_step, timer ] else: output_dict[individual_id] = { KEY_SYMPTOMATIC: [time_step, timer] } res_path = r'./SEIR_Presymptomatic_from_logging.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict
def parse_stdout_file( start_timestep, stdout_filename="test.txt", debug=False ): """creates a dictionary of infected mosquitoes per day :param curr_timestep: first timestep from config :param stdout_filename: file to parse (test.txt) :return: infected_mosquitoes_per_day dictionary, total_infections int """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if sft.has_match(line,matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables # so actually all the will-relapse calculations are done at time of clearance. # our clearance rate is 100%, so they all happen at once. So we just calculate # the number of will-relapses at time-of-clearance and use # sft.test_binomial_95ci( relapses, N, 'rate', None, None ) # to figure out if everything was right. # But first we have to fix the bug where fast progressors who get cleared by # drugs bounce back-and-forth between active and latent. cum_relapses = [] relapses = [] timestep = 0 initial_relapses = 0 pre_cum = 0 cum = 0 for line in filtered_lines: if "Time:" in line: timestep += 1 cum_relapses.append(cum) relapses.append(cum - pre_cum) pre_cum = cum else: if timestep == start_timestep + 1: initial_relapses += 1 cum += 1 if debug: with open("relapses.txt", "w") as file: file.writelines(str(relapses)) with open("cum_relapses.txt", "w") as file: file.writelines(str(cum_relapses)) print( "initial relapses is {}.\n".format(initial_relapses ) ) return relapses, cum_relapses
def parse_stdout_file( start_timestep, duration_of_interest, stdout_filename="test.txt", debug=False): """creates cum_death and death_times array :param start_timestep: drug start time :param stdout_filename: file to parse (test.txt) :return: cum_deaths, death_times """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables cum_deaths = [] deaths = [] death_times = [] timestep = 0 cum = death_daily = infected = 0 infected_individuals = [] for line in filtered_lines: if "Time:" in line: timestep += 1 infected_individuals.append(infected) cum_deaths.append(cum) deaths.append(death_daily) death_daily = 0 infected = int(sft.get_val("Infected: ", line)) else: cum += 1 death_daily += 1 # cum_deaths[-1] = cum if timestep <= duration_of_interest + start_timestep: death_times.append( timestep-start_timestep ) if debug: with open("cum_deaths.txt","w") as file: file.writelines( str( cum_deaths ) ) with open("death_times.txt","w") as file: file.writelines( str( death_times ) ) return cum_deaths, deaths, infected_individuals, death_times
def parse_stdout_file(stdout_filename="StdOut.txt", simulation_timestep=1, debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (StdOut.txt) :return: stdout_df """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if sft.has_match(line,matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = index = 0 stdout_df = pd.DataFrame(columns=[ConfigKeys.Simulation_Timestep, Stdout.stat_pop, Stdout.infected, Stdout.group_id, Stdout.contagion, Stdout.prob]) stdout_df.index.name = 'index' stat_pop = infected = contagion = prob = group_id = None group_contagion = {} for line in filtered_lines: if matches[0] in line: stat_pop = int(sft.get_val(Stdout.stat_pop, line)) infected = int(sft.get_val(Stdout.infected, line)) for group_id in sorted(group_contagion): stdout_df.loc[index] = [time_step, stat_pop, infected, group_id, group_contagion[group_id][0], group_contagion[group_id][1]] index += 1 group_contagion = {} time_step += simulation_timestep elif len(group_contagion) < 10 and matches[1] in line: contagion = float(sft.get_val(matches[1], line)) prob = float(sft.get_val(Stdout.prob, line)) group_id = int(sft.get_val(matches[2], line)) if group_id not in group_contagion: group_contagion[group_id] = [contagion, prob] if debug: res_path = r'./DEBUG_filtered_from_logging.csv' stdout_df.to_csv(res_path) return stdout_df
def parse_output_file(output_filename="test.txt", simulation_timestep=1, debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if sft.has_match(line,matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 positive = 0 negative = 0 default = 0 output_df = pd.DataFrame(columns=[ReportColumn.negative, ReportColumn.default, ReportColumn.positive]) output_df.index.name = Config.simulation_timestep for line in filtered_lines: if matches[0] in line: output_df.loc[time_step] = pd.Series({ReportColumn.positive: positive, ReportColumn.negative: negative, ReportColumn.default: default}) time_step += simulation_timestep positive = 0 negative = 0 default = 0 if matches[1] in line: result = int(sft.get_val(matches[1], line)) if result: positive += 1 else: negative += 1 if matches[2] in line: default += 1 res_path = r'./DEBUG_tb_test_result_from_logging.csv' with open(res_path, "w") as file: output_df.to_csv(file) return output_df
def parse_stdout_file(output_filename="test.txt", debug=False): """ Reads stdout file and creates an individual ID indexed DataFrame of ages and mod_aquire :param output_filename: stdout filename (test.txt) :param debug: generate :return: dataframe of all individuals from a single timestep """ matches = [ "{} = ".format(DataframeKeys.MOD_ACQUIRE), "{} = ".format(DataframeKeys.AGE), "{} = ".format(DataframeKeys.ID), "Update(): Time: " ] filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if sft.has_match(line, matches): if matches[-1] in line: break filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) individuals_df = pd.DataFrame( columns=[DataframeKeys.AGE, DataframeKeys.MOD_ACQUIRE]) individuals_df.index.name = 'index' for line in filtered_lines: age = float(sft.get_val(matches[1], line)) acquire = float(sft.get_val(matches[0], line)) id = int(sft.get_val(matches[2], line)) individuals_df.loc[id] = [age, acquire] if debug: with open("DEBUG_individuals_dataframe.csv", "w") as outfile: outfile.write(individuals_df.to_csv()) return individuals_df
def parse_stdout_file(drug_start_time, start_timestep, stdout_filename="test.txt", debug=False): """creates cum_inactivations, inactivation_times arrays :param drug_start_time: drug start time :param start_timestep: first timestep from config :param stdout_filename: file to parse (test.txt) :return: infected_mosquitoes_per_day dictionary, total_infections int """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables cum_inactivations = [] inactivation_times = [] timestep = 0 cum = inactivation_daily = infected = active = 0 infected_individuals = [] inactivations = [] active_count = [] reactivation_times = {} for line in filtered_lines: if matches[2] in line: cum_inactivations.append(cum) infected_individuals.append(infected) inactivations.append(inactivation_daily) active_count.append(active) infected = int(sft.get_val("Infected: ", line)) inactivation_daily = 0 timestep += 1 elif matches[0] in line: # deactivation: have to track individual individual = int(sft.get_val("TB drug deactivated my \(", line)) # if timestep <= duration_of_interest + start_timestep + drug_start_time: inactivation_time = timestep - start_timestep - drug_start_time if individual in reactivation_times: inactivation_time = timestep - reactivation_times[individual] reactivation_times.pop(individual) # not including the reactivations: there are some data point lost due to simulation end before timers end # inactivation_times.append( inactivation_time ) else: cum += 1 active -= 1 inactivation_daily += 1 inactivation_times.append(inactivation_time) elif matches[ 1] in line: # "progressing from Latent to Active Presymptomatic while on TB Drugs" # activation: have to track individual individual = int(sft.get_val("Individual ", line)) reactivation_times[individual] = timestep elif matches[3] in line: # move to active active += 1 else: #die from HIV individual = int(sft.get_val("individual ", line)) if individual in reactivation_times: active -= 1 reactivation_times.pop(individual) if debug: with open("Cum_Inactivations.txt", "w") as outfile: outfile.write(str(cum_inactivations)) with open("Inactivations.txt", "w") as outfile: outfile.write(str(inactivation_times)) print( "there are {} individual in reactivation state at the end of simulation.\n" .format(len(reactivation_times))) return cum_inactivations, inactivation_times, active_count, inactivations
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (test.txt) :return: output_dict, message """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 core = 0 output_dict = {} exception_message = None for line in filtered_lines: try: if matches[0] in line: #this may raise LookupError value = sft.get_val(matches[0], line) if debug: print("time value I get is '{}'".format(value)) # this may raise ValueError time_step = int(float(value)) if matches[1] in line: # this may raise ValueError or LookupError core = int(sft.get_val(matches[1], line)) else: print(line) raise Exception( "at timestep = {0}, {1} and {2) are not in the same line.\n" .format(time_step, matches[0], matches[1])) if debug: print("core is {}".format(core)) if time_step not in output_dict: output_dict[time_step] = {core: [0, 0]} elif core not in output_dict[time_step]: output_dict[time_step][core] = [0, 0] elif matches[ 2] in line: # this individual is died from TB Symptomatic active output_dict[time_step][core][0] += 1 elif matches[3] in line: # this individual died from HIV output_dict[time_step][core][1] += 1 except Exception as ex: exception_message = "failed to parse {0}, got exception: {1}.".format( output_filename, ex) print(exception_message) return None, exception_message if debug: res_path = r'./DEBUG_stdout_parsed.json' with open(res_path, "w") as file: json.dump(output_dict, file, indent=4) return output_dict, exception_message
def parse_stdout_file(stdout_filename="StdOut.txt", simulation_timestep=1, debug=False): """ creates a dictionary to store filtered information for each time step :param output_filename: file to parse (StdOut.txt) :return: stdout_df """ filtered_lines = [] with open(stdout_filename) as logfile: for line in logfile: if sft.has_match(line, matches): filtered_lines.append(line) if debug: with open("DEBUG_filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = index = envi_sample = 0 stdout_df = pd.DataFrame(columns=[ ConfigKeys.Simulation_Timestep, Stdout.stat_pop, Stdout.infected, Stdout.test_positive, Stdout.test_negative, Stdout.test_default, Stdout.sample, Stdout.ip_value ]) stdout_df.index.name = 'index' stat_pop = infected = None ip_value = "" test_positive = test_negative = test_default = 0 for line in filtered_lines: if matches[0] in line: stdout_df.loc[index] = [ time_step, stat_pop, infected, test_positive, test_negative, test_default, envi_sample, ip_value ] index += 1 time_step += simulation_timestep infected = int(sft.get_val(Stdout.infected, line)) stat_pop = int(sft.get_val(Stdout.stat_pop, line)) test_positive = test_negative = test_default = 0 elif matches[1] in line: test_result = sft.get_char(matches[1], line) if 'positive' in test_result.lower(): test_positive += 1 elif 'negative' in test_result.lower(): test_negative += 1 else: test_default += 1 else: if matches[2] in line: envi_sample = float(sft.get_val(matches[2], line)) # matches[2] and matches[3] may and may not be in the same line if matches[3] in line: # ip_value = str(sft.get_char(matches[3], line)) for s in line.split(): if matches[3] in s: ip_value = s.replace(matches[3], '') if debug: res_path = r'./DEBUG_filtered_from_logging.csv' stdout_df.to_csv(res_path) return stdout_df
def parse_output_file(output_filename="test.txt", debug=False): """ creates a dataframe of time step and infected, infectiouness and stat populations :param output_filename: file to parse (test.txt) :param debug: if True then print debug_info and write output_df to disk as './individual_susceptibility.csv' :return: output_df: # of infected population and infectiousness per person at each time step """ filtered_lines = [] with open(output_filename) as logfile: for line in logfile: if sft.has_match( line, ["Update(): Time: ", KEY_INDIVIDUAL_MOD_ACQUIRE ]): # search for "Update(): time" | Susceptibility update filtered_lines.append(line) if debug: with open("filtered_lines.txt", "w") as outfile: outfile.writelines(filtered_lines) # initialize variables time_step = 0 output_df = pd.DataFrame(columns=[ KEY_SIMULATION_TIMESTEP, KEY_INDIVIDUAL_ID, KEY_INDIVIDUAL_AGE, KEY_INDIVIDUAL_MOD_ACQUIRE, KEY_INDIVIDUAL_IMMUNE_FAILAGE ]) output_df.index.name = "index" index = 0 for line in filtered_lines: if "Update(): Time:" in line: if debug: print("working on... " + line) print("time_step = " + str(time_step)) time_step += 1 continue if time_step == 1: # DEVNOTE: we just validate time_step 1 and all individuals break a = parse_name_value_pair(line, print_indvidiual_susceptibility) if index > 0 and output_df.loc[index - 1].id == int( a[KEY_INDIVIDUAL_ID] ): # DEVNOTE: somehow we have duplicate Susceptibility where evaluating getModAcquire, not sure why, add this to skip continue output_df.loc[index] = [ int(time_step), int(a[KEY_INDIVIDUAL_ID]), float(a[KEY_INDIVIDUAL_AGE]), float(a[KEY_INDIVIDUAL_MOD_ACQUIRE]), float(a[KEY_INDIVIDUAL_IMMUNE_FAILAGE]) ] # output_df.id = pd.to_numeric(output_df.id, downcast='signed') # not sure why id didn't case to int, but we will cast it anyway output_df.id = output_df[KEY_INDIVIDUAL_ID].astype(int) index += 1 continue # drop duplicates as dTK will call getModAquire() multiple time per individual per timesteps output_df = output_df.drop_duplicates( subset=[KEY_SIMULATION_TIMESTEP, KEY_INDIVIDUAL_ID]) if debug: res_path = r'./individual_susceptibility.csv' if not os.path.exists(os.path.dirname(res_path)): os.makedirs(os.path.dirname(res_path)) output_df.to_csv(res_path) return output_df