Python get_val 예제들, dtk_test.dtk_sft.get_val Python 예제들

예제 #1

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, [matches[0]]):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    core = None
    output_dict = {}
    for line in filtered_lines:
        time_step = sft.get_val(matches[0], line)
        core = sft.get_val(matches[1], line)
        if time_step not in output_dict:
            output_dict[time_step] = [core]
        else:
            output_dict[time_step].append(core)

    if debug:
        res_path = r'./DEBUG_core_per_time_step.json'
        with open(res_path, "w") as file:
            json.dump(output_dict, file, indent=4)
    return output_dict

예제 #2

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_stdout_file(stdout_filename="StdOut.txt", simulation_timestep=1, debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (StdOut.txt)
    :return:                stdout_df
    """
    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line,matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = index = 0
    stdout_df = pd.DataFrame(columns=[ConfigKeys.Simulation_Timestep, Stdout.stat_pop, Stdout.infected])
    stdout_df.index.name = 'index'
    for line in filtered_lines:
        if matches[0] in line:
            stat_pop = int(sft.get_val(Stdout.stat_pop, line))
            infected = int(sft.get_val(Stdout.infected, line))
            stdout_df.loc[index] = [time_step, stat_pop, infected]
            index += 1
            time_step += simulation_timestep

    if debug:
        res_path = r'./DEBUG_filtered_from_logging.csv'
        stdout_df.to_csv(res_path)
    return stdout_df

예제 #3

0

파일 보기

파일: dtk_post_process.py 프로젝트: terrenceedmonds/EMOD

def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[
        2]  # this test assumes the vector is constant

    latency_update_data = []
    original_latency_data = []
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "Incubation_timer calculated as" in line:
                incubation_timer = float(sft.get_val("as ", line))
                original_latency_data.append(incubation_timer)
            if "LifeCourseLatencyTimerUpdate" in line:
                new_incubation_timer = float(sft.get_val("timer ", line))
                latency_update_data.append(new_incubation_timer)

        # expecting the original distribution to NOT match the art-triggered update distribution
        if sft.test_exponential(original_latency_data,
                                tb_cd4_activation_vector[2],
                                integers=True,
                                roundup=True,
                                round_nearest=False):
            outfile.write(
                "BAD: The updated latency data matches the original distribution.\n"
            )
            success = False
        expected_update_data = np.random.exponential(
            1 / tb_cd4_activation_vector[2], len(latency_update_data))
        if not sft.test_exponential(latency_update_data,
                                    tb_cd4_activation_vector[2],
                                    outfile,
                                    integers=True,
                                    roundup=True,
                                    round_nearest=False):
            # as it should fail , success = bad.
            outfile.write(
                "BAD: The updated latency data does not match the expected distribution.\n"
            )
            success = False
        outfile.write("Data points checked = {}.\n".format(
            len(latency_update_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        sft.plot_data(sorted(latency_update_data),
                      sorted(expected_update_data),
                      label1="Actual",
                      label2="Expected",
                      title="Latency Duration recalculated for ART",
                      xlabel="Data Points",
                      ylabel="Days",
                      category="tb_activation_and_cd4_hiv_first_on_art",
                      line=True,
                      overlap=True)

예제 #4

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_stdout_file(initial_timestep=0,
                      stdout_filename="test.txt",
                      debug=False):
    """
    :param initial_timestep:   first timestep from config
    :param stdout_filename: file to parse (test.txt)
    :param debug:           whether or not we write an additional file that's full of the matched lines
    :return:                array of lines of interest
    """
    exogenous = "EXOGENOUS infected"  # "EXOGENOUS infected"
    infectiousness = "UpdateInfectiousness"
    state_latent = "state= Latent"
    filtered_lines = []
    exogenous_infected_count = 0
    latent_count = 0
    exogenous_infected_dict = {}
    exogenous_infected_list = []
    infections_start_time = 12  # this will vary depending when imported people will begin to be infectious.
    initial_population = 0  # placeholder for the first time step
    update_time = "Update(): Time: "
    time = initial_timestep
    with open(stdout_filename) as logfile:
        for line in logfile:
            if update_time in line:
                if time > infections_start_time:
                    exogenous_infected_list.append(exogenous_infected_count)
                    exogenous_infected_dict[time] = [
                        exogenous_infected_count, latent_count
                    ]
                time += 1
                if time == 1:
                    initial_population = int(sft.get_val("StatPop: ", line))
                exogenous_infected_count = 0  # resetting for the next time step
                latent_count = 0
                filtered_lines.append(line)
            elif exogenous in line:
                ind_id = int(float(sft.get_val("Individual ", line)))
                if ind_id <= initial_population:  # ignoring imported people
                    exogenous_infected_count += 1
                filtered_lines.append(line)
            elif infectiousness in line and state_latent in line:
                ind_id = int(float(sft.get_val("Individual ", line)))
                fast_progressor = int(float(sft.get_val("progressor=", line)))
                filtered_lines.append(line)
                if not fast_progressor and ind_id <= initial_population:  # ignoring imported people
                    latent_count += 1

    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            for line in filtered_lines:
                outfile.write(line)

    return [exogenous_infected_dict, exogenous_infected_list]

예제 #5

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store stat populations, infected population, and MDR test result for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    infected = 0
    statpop = 0
    simulation_timestep = 1
    positive = 0
    negative = 0
    default = 0
    output_dict = {}
    for line in filtered_lines:
        if matches[0] in line:
            output_dict[time_step] = {
                KEY_STAT_POP: statpop,
                KEY_INFECTED: infected,
                KEY_POSITIVE: positive,
                KEY_NEGATIVE: negative,
                KEY_DEFAULT: default
            }
            infected = sft.get_val(matches[2], line)
            statpop = sft.get_val(matches[1], line)
            time_step += simulation_timestep
            positive = 0
            negative = 0
            default = 0
        if matches[3] in line:
            result = int(sft.get_val(matches[3], line))
            if result:
                positive += 1
            else:
                negative += 1
        if matches[4] in line:
            default += 1
    res_path = r'./tb_test_result_from_logging.json'
    with open(res_path, "w") as file:
        json.dump(output_dict, file, indent=4)
    return output_dict

예제 #6

0

파일 보기

파일: dtk_InfectivityScalingExponential_Support.py 프로젝트: waternk/EMOD

def parse_output_file(output_filename="test.txt",
                      simulation_timestep=1,
                      debug=False):
    """
    creates a dataframe of time step, infected,  infectiouness and stat populations
    :param output_filename: file to parse (test.txt)
    :param simulation_timestep: simulation time step, * days
    :return: output_df:  data frame contains: 1, time step,
                                            2, # of infected population,
                                            3, infectiousness,
                                            4, statistical populations, at each time step
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0  #
    infectiousness = 0
    infected = 0
    statpop = 0
    output_df = pd.DataFrame(columns=[
        KEY_SIMULATION_TIMESTEP, KEY_INFECTED, KEY_INFECTIOUSNESS, KEY_STAT_POP
    ])
    output_df.index.name = "index"
    index = 0
    for line in filtered_lines:
        if matches[0] in line:
            infected = sft.get_val(matches[1], line)
            statpop = sft.get_val(matches[3], line)
            output_df.loc[index] = [
                time_step, infected, infectiousness, statpop
            ]
            index += 1
            time_step += simulation_timestep
            infectiousness = 0
            continue
        if matches[2] in line:
            infectiousness = sft.get_val(matches[2], line)
            continue
    res_path = r'./infected_vs_infectiousness.csv'
    if not os.path.exists(os.path.dirname(res_path)):
        os.makedirs(os.path.dirname(res_path))
    output_df.to_csv(res_path)
    return output_df

예제 #7

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_stdout_file(stdout_filename="StdOut.txt",
                      simulation_timestep=1,
                      debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (StdOut.txt)
    :return:                stdout_df
    """
    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = index = 0
    stdout_df = pd.DataFrame(columns=[
        ConfigKeys.Simulation_Timestep, Stdout.stat_pop, Stdout.infected,
        Stdout.group_id, Stdout.contagion, Stdout.prob
    ])
    stdout_df.index.name = 'index'
    stat_pop = infected = contagion = prob = group_id = None
    group_contagion = {}
    for line in filtered_lines:
        if matches[0] in line:
            stat_pop = int(sft.get_val(Stdout.stat_pop, line))
            infected = int(sft.get_val(Stdout.infected, line))
            for group_id in sorted(group_contagion):
                stdout_df.loc[index] = [
                    time_step, stat_pop, infected, group_id,
                    group_contagion[group_id][0], group_contagion[group_id][1]
                ]
                index += 1
            group_contagion = {}
            time_step += simulation_timestep

        elif len(group_contagion) < 10 and matches[1] in line:
            contagion = float(sft.get_val(matches[1], line))
            prob = float(sft.get_val(Stdout.prob, line))
            group_id = int(sft.get_val(matches[2], line))
            if group_id not in group_contagion:
                group_contagion[group_id] = [contagion, prob]
    if debug:
        res_path = r'./DEBUG_filtered_from_logging.csv'
        stdout_df.to_csv(res_path)
    return stdout_df

예제 #8

0

파일 보기

def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    simulation_timestep = 1
    output_dict = {}
    for line in filtered_lines:
        if matches[0] in line:
            time_step += simulation_timestep
        elif matches[1] in line:  # this individual is PreSymptomatic active
            individual_id = sft.get_val(KEY_INDIVIDUAL, line)
            timer = float(sft.get_val(KEY_TIMER, line))
            if individual_id in output_dict:
                output_dict[individual_id][KEY_PRESYMPTOMATIC] = [
                    time_step, timer
                ]
            else:
                output_dict[individual_id] = {
                    KEY_PRESYMPTOMATIC: [time_step, timer]
                }
        elif matches[2] in line:  # this individual is Symptomatic active
            individual_id = sft.get_val(KEY_INDIVIDUAL, line)
            timer = float(sft.get_val(KEY_TIMER, line))
            if individual_id in output_dict:
                output_dict[individual_id][KEY_SYMPTOMATIC] = [
                    time_step, timer
                ]
            else:
                output_dict[individual_id] = {
                    KEY_SYMPTOMATIC: [time_step, timer]
                }
    res_path = r'./SEIR_Presymptomatic_from_logging.json'
    with open(res_path, "w") as file:
        json.dump(output_dict, file, indent=4)
    return output_dict

예제 #9

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_stdout_file(curr_timestep=0,
                      stdout_filename="test.txt",
                      debug=False):
    """
    :param curr_timestep:   first timestep from config
    :param stdout_filename: file to parse (test.txt)
    :param debug:           whether or not we write an additional file that's full of the matched lines
    :return:                array of lines of interest
    """
    expose = "Expose: Individual"
    filtered_lines = []
    update_time = "Update(): Time:"
    time = 0
    with open(stdout_filename) as logfile:
        for line in logfile:
            if update_time in line:
                time += 1
            if expose in line:
                ind = int(sft.get_val("Individual ", line))
                if ind <= 100:  # do not look at imported people
                    new_line = sft.add_time_stamp(time, line)
                    filtered_lines.append(new_line)

    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    return filtered_lines

예제 #10

0

파일 보기

 def parse_stdout_file(self,
                       initial_timestep=0,
                       stdout_filename="test.txt",
                       debug=False):
     """
     :param initial_timestep:   first timestep from config
     :param stdout_filename: file to parse (test.txt)
     :param debug:           whether or not we write an additional file that's full of the matched lines
     :return:                array of attenuation for each day
     """
     attenuation = []
     filtered_lines = []
     time_step = initial_timestep
     latest_time = initial_timestep
     with open(stdout_filename) as logfile:
         for line in logfile:
             if "Time: " in line:
                 time_step += 1
             if self.attenuation_text in line:
                 if time_step > latest_time:
                     attenuation.append(
                         float(sft.get_val(self.attenuation_text, line)))
                     latest_time = time_step
                     if debug:
                         filtered_lines.append(line)
     if debug:
         with open("filtered_lines.txt", "w") as outfile:
             for line in filtered_lines:
                 outfile.write(line)
     return attenuation

예제 #11

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def calc_expected_infectiousness(line, treatment_list):
    """
    calculate the infectiousness based on the state and the params in config.json.
    """
    states = ["state PRE.", "state SUB.", "state CHR.", "state ACU."]
    expected_infectiousness = None
    state = None
    for regex in states:
        match = re.search(regex, line)
        if match:
            if regex == "state PRE.":
                state = "Prepatent"
                expected_infectiousness = tai * tpri
            elif regex == "state SUB.":
                state = "Subclinical"
                expected_infectiousness = tai * tsri
            elif regex == "state CHR.":
                state = "Chronic"
                expected_infectiousness = tai * tcri
            elif regex == "state ACU.":
                state = "Acute"
                expected_infectiousness = tai
                ind_id = int(sft.get_val("Individual ", line))
                if ind_id in treatment_list:
                    expected_infectiousness *= treatment_multiplier
            return {
                'expected_infectiousness': expected_infectiousness,
                'state': state
            }

예제 #12

0

파일 보기

def parse_stdout_file(time_step=0,
                      stdout_filename="test.txt",
                      filtered_filename="filtered_lines.txt",
                      debug=False):
    """ creates a list of total susceptible people daily (each day is new susceptible people + total susceptible
        the day before)
    :param time_step:   first timestep from config
    :param stdout_filename: file to parse (test.txt)
    :param filtered_filename: file to which we write lines of interest when debug = True
    :param debug: flad indicating whether or not we want debug data written out
    :return: a list of total people susceptible each day, total population
    """
    with open(stdout_filename, "r") as infile:
        filtered_lines = []
        total_susceptible_daily = []
        population = 0
        total_susceptible = 0
        for line in infile:
            if UPDATE_TIME in line:
                filtered_lines.append(line)
                total_susceptible_daily.append(total_susceptible)
                time_step += 1
                if population == 0:
                    population = int(sft.get_val(POPULATION, line))
            elif SUSCEPTIBLE in line:
                filtered_lines.append(line)
                total_susceptible += 1

    if debug:
        with open(filtered_filename, "w") as outfile:
            outfile.writelines(filtered_lines)

    return [total_susceptible_daily, population]

예제 #13

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def create_report_file(data, debug=False):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[2]  # this test assumes the vector is constant

    latency_data = {}
    duration_data = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "LifeCourseLatencyTimerUpdate" in line:
                ind_id = int(sft.get_val("Individual ", line))
                start_time_stamp = int(sft.get_val("time= ", line))
                if ind_id in latency_data.keys():
                    outfile.write("Individual {} incubation timer reset at time {}. Please check. "
                                  "\n".format(ind_id, start_time_stamp))
                latency_data[ind_id] = start_time_stamp
            elif "TBActivationPresymptomatic" in line:
                ind_id = int(sft.get_val("Individual ", line))
                end_time_stamp = int(sft.get_val("time= ", line))
                if ind_id not in latency_data.keys():
                    outfile.write("Individual {} went presymptomatic without incubation timer update at time {}. "
                                  "Please check. \n".format(ind_id, end_time_stamp))
                else:
                    duration = end_time_stamp - latency_data.get(ind_id)
                    duration_data[ind_id] = duration
        if debug:
            with open("DEBUG_duration_data.json","w") as debug_outfile:
                json.dump(duration_data, debug_outfile, indent=4)
        durations = list(duration_data.values())

        if not sft.test_exponential(durations, tb_cd4_activation_vector[0], outfile, integers=True,
                                        roundup=True, round_nearest=False):
            success = False
        outfile.write("Data points checked = {}.\n".format(len(duration_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        # only used for graphing purposes
        expected_data = map(math.ceil, np.random.exponential(1/tb_cd4_activation_vector[0], len(duration_data)))
        expected_durations = list(expected_data)
        sft.plot_data_sorted(durations, expected_durations, label1="Actual", label2="Expected",
                          title="Recalculated Latency Duration TB then HIV(Sorted)", xlabel="Data Points",
                          ylabel="Days",
                          category="tb_activation_and_cd4_tb_first", line = True, overlap=True)

예제 #14

0

파일 보기

def create_report_file(data, inset_days, report_name=sft.sft_output_filename):

    lines_c = data[0]
    lines_e = data[1]
    typhoid_acute_infectiousness = data[2]
    ALPHA = 0.175
    N50 = 1.11e6
    dose_response_data_c = []
    dose_response_data_e = []
    dose_response_data_theoretic_c = []
    dose_response_data_theoretic_e = []
    success = True
    with open(report_name, "w") as report_file:
        if len(lines_c) == 0:
            success = False
            report_file.write("Found no individual exposed from route contact.\n" )
        else:
            for line in lines_c:
                dose_response_c = float(sft.get_val("infects=", line))
                dose_response_data_c.append(dose_response_c)
                ind_id_c = int(sft.get_val("individual ", line))
                timestep_c = int(sft.get_val("TimeStep: ", line))
                fContact = float(sft.get_val("fContact=", line))
                dose_response_theoretic_c = float(fContact / typhoid_acute_infectiousness)
                dose_response_data_theoretic_c.append(dose_response_theoretic_c)
                if math.fabs(dose_response_theoretic_c - dose_response_c) > 5e-2:
                    success = False
                    report_file.write(
                        "BAD: Dose-response probability for individual {0} at time {1}, route contact is {2}, "
                        "expected {3}.\n".format(ind_id_c, timestep_c, dose_response_c, dose_response_theoretic_c))
        if len(lines_e) == 0:
            success = False
            report_file.write("Found no individual exposed from route environment.\n")
        else:
            for line in lines_e:
                dose_response_e = float(sft.get_val("infects=", line))
                dose_response_data_e.append(dose_response_e)
                ind_id = int(sft.get_val("individual ", line))
                timestep = int(sft.get_val("TimeStep: ", line))
                exposure = float(sft.get_val("exposure=", line))
                dose_response_theoretic_e = 1.0 - math.pow(1.0 + exposure *
                                                           (math.pow(2.0, 1.0 / ALPHA) - 1.0) / N50, -1.0 * ALPHA)
                dose_response_data_theoretic_e.append(dose_response_theoretic_e)
                if math.fabs(dose_response_theoretic_e - dose_response_e) > 5e-2:
                    success = False
                    report_file.write(
                        "BAD: Dose-response probability for individual {0} at time {1}, route environment is {2}, "
                        "expected {3}.\n".format(ind_id, timestep, dose_response_e, dose_response_theoretic_e))

        report_file.write("Sample size of dose response is {0} for route contact and {1} for route environment."
                          "\n".format(len(lines_c), len(lines_e)))
        report_file.write(sft.format_success_msg(success))

        return [dose_response_data_c, dose_response_data_theoretic_c,
                dose_response_data_e, dose_response_data_theoretic_e]

예제 #15

0

파일 보기

def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    coinfection_mortality_rate_off_art = data[2]
    coinfection_mortality_rate_on_art = data[3]

    died_of_coinfection = "died of CoInfection"
    state_active_symptomatic = "infectionstatechange TBActivation "
    time_to_death_data = []
    active_infections_dictionary = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if died_of_coinfection in line:
                ind_id = int(sft.get_val("Individual ", line))
                time_stamp = int(sft.get_val("time= ", line))
                if ind_id in active_infections_dictionary.keys():
                    time_to_death_data.append(time_stamp - active_infections_dictionary[ind_id])
                else:
                    success = False
                    outfile.write("BAD: Individual {} died of coinfection without going active, at time {}."
                                  "\n".format(ind_id, time_stamp))
            elif state_active_symptomatic in line:
                ind_id = int(sft.get_val("Individual ", line))
                start_time_stamp = int(sft.get_val("time= ", line))
                if ind_id in active_infections_dictionary.keys():
                    outfile.write("Individual {} went active symptomatic while already being active symptomatic"
                                  "at time {}. \n".format(ind_id, start_time_stamp))
                else:
                    active_infections_dictionary[ind_id] = start_time_stamp
        # expected_data here only used for graphing purposes
        expected_data = map(int, np.random.exponential(1/coinfection_mortality_rate_on_art, len(time_to_death_data)))
        if not sft.test_exponential(time_to_death_data, coinfection_mortality_rate_on_art, outfile, integers=True,
                                        roundup=False, round_nearest=False):
            success = False
        outfile.write("Data points checked = {}.\n".format(len(time_to_death_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        sft.plot_data(sorted(time_to_death_data), sorted(expected_data), label1="Actual", label2="Expected",
                          title="Time from Smear Positive On ART TBHIV to Death", xlabel="Data Points", ylabel="Days",
                          category="tbhiv_mortality_smear_positive_on_art",line = True, overlap=True)

예제 #16

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    cd4_strata = data[2]
    mod_array = data[3]

    actual_data = []
    expected_data = []
    success = True
    epsilon = 0.000002
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            cd4_count = float(sft.get_val("CD4count=", line))
            cd4_mod_actual = float(sft.get_val("CD4mod=", line))
            cd4_mod_expected = tb_cd4_susceptibility_calc(
                [mod_array, cd4_strata, cd4_count])
            actual_data.append(cd4_mod_actual)
            expected_data.append(cd4_mod_expected)
            if abs(cd4_mod_actual - cd4_mod_expected) > epsilon:
                success = False
                outfile.write(
                    "BAD: At Time: {} for Individual {} with CD4 count {} Expected susceptibility modifier "
                    "was {}, but actual was {}.\n".format(
                        sft.get_val("time= ", line),
                        sft.get_val("Individual ", line), cd4_count,
                        cd4_mod_expected, cd4_mod_actual))

        outfile.write("Data points checked = {} .\n".format(len(lines)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        sft.plot_data_sorted(actual_data,
                             expected_data,
                             label1="Actual",
                             label2="Expected",
                             title="Susceptibility Modifier",
                             xlabel="Data Points",
                             ylabel="Modifying Multiplier",
                             category="tb_susceptibility_and_cd4",
                             line=True,
                             overlap=True)

예제 #17

0

파일 보기

파일: dtk_post_process.py 프로젝트: InstituteforDiseaseModeling/EMOD

def parse_stdout_file(stdout_filename="StdOut.txt", simulation_timestep=1, debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (StdOut.txt)
    :return:                stdout_df
    """
    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line,matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = index = 0
    stdout_df = pd.DataFrame(columns=[ConfigKeys.Simulation_Timestep, Stdout.stat_pop, Stdout.infected,
                                      Stdout.group_id, Stdout.contagion, Stdout.prob])
    stdout_df.index.name = 'index'
    stat_pop = infected = contagion = prob = group_id = None
    group_contagion = {}
    for line in filtered_lines:
        if matches[0] in line:
            stat_pop = int(sft.get_val(Stdout.stat_pop, line))
            infected = int(sft.get_val(Stdout.infected, line))
            for group_id in sorted(group_contagion):
                stdout_df.loc[index] = [time_step, stat_pop, infected, group_id,
                                        group_contagion[group_id][0], group_contagion[group_id][1]]
                index += 1
            group_contagion = {}
            time_step += simulation_timestep

        elif len(group_contagion) < 10 and matches[1] in line:
            contagion = float(sft.get_val(matches[1], line))
            prob = float(sft.get_val(Stdout.prob, line))
            group_id = int(sft.get_val(matches[2], line))
            if group_id not in group_contagion:
                group_contagion[group_id] = [contagion, prob]
    if debug:
        res_path = r'./DEBUG_filtered_from_logging.csv'
        stdout_df.to_csv(res_path)
    return stdout_df

예제 #18

0

파일 보기

def parse_stdout_file(output_filename="test.txt", debug=False):
    """
    Reads stdout file and creates an individual ID indexed
    DataFrame of ages and mod_aquire
    :param output_filename: stdout filename (test.txt)
    :param debug: generate
    :return: dataframe of all individuals from a single timestep
    """
    matches = [
        "{} = ".format(DataframeKeys.MOD_ACQUIRE),
        "{} = ".format(DataframeKeys.AGE), "{} = ".format(DataframeKeys.ID),
        "Update(): Time: "
    ]
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                if matches[-1] in line:
                    break
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    individuals_df = pd.DataFrame(
        columns=[DataframeKeys.AGE, DataframeKeys.MOD_ACQUIRE])
    individuals_df.index.name = 'index'
    for line in filtered_lines:
        age = float(sft.get_val(matches[1], line))
        acquire = float(sft.get_val(matches[0], line))
        id = int(sft.get_val(matches[2], line))
        individuals_df.loc[id] = [age, acquire]

    if debug:
        with open("DEBUG_individuals_dataframe.csv", "w") as outfile:
            outfile.write(individuals_df.to_csv())
    return individuals_df

예제 #19

0

파일 보기

def parse_stdout_file( start_timestep, duration_of_interest, stdout_filename="test.txt", debug=False):
    """creates cum_death and death_times array

    :param start_timestep:   drug start time
    :param stdout_filename: file to parse (test.txt)
    :return:                cum_deaths, death_times
    """

    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    cum_deaths = []
    deaths = []
    death_times = []
    timestep = 0
    cum = death_daily = infected = 0
    infected_individuals = []
    for line in filtered_lines:
        if "Time:" in line:
            timestep += 1
            infected_individuals.append(infected)
            cum_deaths.append(cum)
            deaths.append(death_daily)
            death_daily = 0
            infected = int(sft.get_val("Infected: ", line))
        else:
            cum += 1
            death_daily += 1
            # cum_deaths[-1] = cum
            if timestep <= duration_of_interest + start_timestep:
                death_times.append( timestep-start_timestep )
    if debug:
        with open("cum_deaths.txt","w") as file:
            file.writelines(  str( cum_deaths ) )
        with open("death_times.txt","w") as file:
            file.writelines( str( death_times ) )
    return cum_deaths, deaths, infected_individuals, death_times

예제 #20

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_output_file(output_filename="test.txt", simulation_timestep=1, debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line,matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    positive = 0
    negative = 0
    default = 0
    output_df = pd.DataFrame(columns=[ReportColumn.negative, ReportColumn.default, ReportColumn.positive])
    output_df.index.name = Config.simulation_timestep
    for line in filtered_lines:
        if matches[0] in line:
            output_df.loc[time_step] = pd.Series({ReportColumn.positive: positive,
                                                  ReportColumn.negative: negative, ReportColumn.default: default})
            time_step += simulation_timestep
            positive = 0
            negative = 0
            default = 0
        if matches[1] in line:
            result = int(sft.get_val(matches[1], line))
            if result:
                positive += 1
            else:
                negative += 1
        if matches[2] in line:
            default += 1
    res_path = r'./DEBUG_tb_test_result_from_logging.csv'
    with open(res_path, "w") as file:
        output_df.to_csv(file)
    return output_df

예제 #21

0

파일 보기

파일: dtk_post_process.py 프로젝트: InstituteforDiseaseModeling/EMOD

def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates an object which contains the heterogeneity multiplier and infectiousness
    :param output_filename: file to parse (test.txt)
    :return:                output_obj:  heterogeneity multiplier and infectiousness for each infection
    """
    filtered_lines = []
    output_obj= {}
    for match in matches:
        output_obj[match] = []
    with open(output_filename) as logfile:
        for line in logfile:
            for match in matches:
                if match in line:
                    output_obj[match].append(float(sft.get_val(match, line)))
                    filtered_lines.append(line)
                    break

    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)
    return output_obj

예제 #22

0

파일 보기

파일: dtk_post_process.py 프로젝트: terrenceedmonds/EMOD

def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates an object which contains the heterogeneity multiplier and infectiousness
    :param output_filename: file to parse (test.txt)
    :return:                output_obj:  heterogeneity multiplier and infectiousness for each infection
    """
    filtered_lines = []
    output_obj = {}
    for match in matches:
        output_obj[match] = []
    with open(output_filename) as logfile:
        for line in logfile:
            for match in matches:
                if match in line:
                    output_obj[match].append(float(sft.get_val(match, line)))
                    filtered_lines.append(line)
                    break

    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)
    return output_obj

예제 #23

0

파일 보기

def application(report_file):
    sft.wait_for_done()
    lines = []
    with open("test.txt") as logfile:
        for line in logfile:
            if re.search("Initializing Typhoid immunity object",
                         line) and re.search("age=0.000000", line):
                lines.append(line)

    success = True
    actual_newborn_immunity_data = []
    expected_newborn_immunity_data = []
    with open(sft.sft_output_filename, "w") as report_file:
        if not lines:
            success = False
            report_file.write("BAD: Found no data matching test case.\n")
        else:
            for line in lines:
                immunity = float(sft.get_val(" immunity modifier", line))
                actual_newborn_immunity_data.append(immunity)
                expected_newborn_immunity_data.append(1)
                if immunity != 1.000:
                    success = False
                    report_file.write(
                        "BAD: immunity for newborn={0} instead of 1.0\n".
                        format(immunity))

        report_file.write(sft.format_success_msg(success))

        sft.plot_data_sorted(actual_newborn_immunity_data,
                             expected_newborn_immunity_data,
                             label1="Actual",
                             label2="Expected",
                             title="Newborn Immunity Data",
                             xlabel="Individuals",
                             ylabel="Immunity",
                             category='multi_mode_intervention')

예제 #24

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def application(report_file):
    # pdb.set_trace()
    # print( "Post-processing: " + report_file )
    sft.wait_for_done()
    cdj = json.loads(open("config.json").read())["parameters"]
    start_time = cdj["Start_Time"]
    timestep = start_time
    lines_c = []
    count_contact = 0

    with open("test.txt") as logfile:
        for line in logfile:
            if "Update(): Time:" in line:
                # calculate time step
                timestep = int(float(sft.get_val('Time: ', line)))
            elif ("Exposing" in line) and ("route 'contact'" in line):
                # collect dose_response probabilities and dose for route contact
                line = "TimeStep: " + str(timestep) + " " + line
                lines_c.append(line)
            # route=0, outbreak; route=1, contact; route=2, environment
            elif ("AcquireNewInfection:" in line) and ("route=1" in line):
                count_contact += 1

    success = True
    infection_prob_c_all = []
    infection_prob_c_theoretic_all = []

    with open(sft.sft_output_filename, "w") as report_file:
        if not lines_c:
            success = False
            report_file.write("Found no individual exposed from route contact.\n")
        else:
            prob_per_num_exposures = {}
            prob_per_num_exposures_expected = {}
            random_dose_response = None

            for line in lines_c:
                dose_response = float(sft.get_val("infects=", line))
                # some version of logging as a typo
                ind_id = int(sft.get_val("individual ", line)) if "individual " in line else \
                    int(sft.get_val("inividual ", line))
                immunity = float(sft.get_val("immunity=", line))
                num_exposures = float(sft.get_val("num_exposures=", line))
                infection_prob = float(sft.get_val("prob=", line))
                timestep = int(sft.get_val("TimeStep: ", line))
                infection_prob_theoretic = 1.0 - math.pow((1.0 - immunity * dose_response), num_exposures)
                infection_prob_c_all.append(infection_prob)
                infection_prob_c_theoretic_all.append(round(infection_prob_theoretic, 6))
                if math.fabs(infection_prob_theoretic - infection_prob) > 5e-2:
                    success = False
                    report_file.write("BAD: Infection probability for individual {0} at time {1}, route contact is {2},"
                                      " expected {3}.\n".format(ind_id, timestep, infection_prob,
                                                                infection_prob_theoretic))
                # pick a random dose_response, for plot to be added to spec
                if random_dose_response is None:
                    random_dose_response = dose_response

                if immunity == 1.0 and dose_response == random_dose_response:
                    if num_exposures not in prob_per_num_exposures:
                        prob_per_num_exposures[num_exposures] = infection_prob
                        prob_per_num_exposures_expected[num_exposures] = infection_prob_theoretic

            if success:
                report_file.write("GOOD: Infection probability matches expected value for every exposure.\n")
            else:
                report_file.write("BAD: Infection probability doesn't match expected value for every exposure.\n")

            # plot for Spec
            # sort dictionary by keys
            prob_per_num_exposures = dict(sorted(prob_per_num_exposures.items()))
            prob_per_num_exposures_expected = dict(sorted(prob_per_num_exposures_expected.items()))

            sft.plot_data([v for k,v in prob_per_num_exposures.items()],
                          [v for k,v in prob_per_num_exposures_expected.items()],
                          label1="Actual",
                          label2="Expected", title="Infection Probability(Contact)\nimmunity=1, dose_response={}".format(random_dose_response),
                          xlabel="num_exposures",
                          ylabel="Infection Probability",
                          category='infection_probability_contact',
                          alpha=0.5, overlap=True, xticks=range(len(prob_per_num_exposures)),
                          xtickslabel=[str(int(i)) for i in prob_per_num_exposures]
                          )

            # sum of independent Bernoulli trials is Poisson binomial distribution
            # contact is calculated first
            # mean = average number of events per interval
            mean_c = sft.calc_poisson_binomial(infection_prob_c_all)['mean']
            sd_c = sft.calc_poisson_binomial(infection_prob_c_all)['standard_deviation']
            num_trials_c = len(infection_prob_c_all)
            prob_c = mean_c / float(num_trials_c)
            # report_file.write("contact:trials = {2}, num_infection = {3},prob_c = {4}, mean= {0}, sd = {1}.
            # \n".format(mean_c, sd_c,num_trials_c,count_contact,prob_c))

            # TODO: comment out line 101 to 115 once #2895 is fixed. we are going to test New Infections By Route
            # channels in NewInfection SFT, we only test the logging in this test.
            isj = json.loads(open("output/InsetChart.json").read())["Channels"]
            new_infection_contact = isj["New Infections By Route (CONTACT)"]["Data"]
            insetchart_total_contact_infection = sum(new_infection_contact)
            message_template = "{0}: {1} is {2}, while total number of exposure = {3}, sum of contact " \
                               "infection_probability = {4}, expected total contact infections = {5} " \
                               "with standard_deviation = {6}.\n"
            if not sft.test_binomial_95ci(insetchart_total_contact_infection, num_trials_c, prob_c, report_file, 'contact infection'):
                success = False
                report_file.write(message_template.format("BAD", "sum of 'New Infections By Route (CONTACT)' in "
                                                                 "InsetChart.json", insetchart_total_contact_infection,
                                                          num_trials_c, prob_c, mean_c, sd_c))
            else:
                report_file.write(message_template.format("GOOD", "sum of 'New Infections By Route (CONTACT)' in "
                                                                 "InsetChart.json", insetchart_total_contact_infection,
                                                          num_trials_c, prob_c, mean_c, sd_c))

            if not sft.test_binomial_95ci(count_contact, num_trials_c, prob_c, report_file, 'contact infection'):
                success = False
                report_file.write(message_template.format("BAD", "total contact infections from StdOut logging",
                                                          count_contact,
                                                          num_trials_c, prob_c, mean_c, sd_c))
            else:
                report_file.write(message_template.format("GOOD", "total contact infections from StdOut logging",
                                                          count_contact,
                                                          num_trials_c, prob_c, mean_c, sd_c))

            sft.plot_data(infection_prob_c_all, infection_prob_c_theoretic_all,
                          # filter(lambda a: a != 0, infection_prob_c_all),
                          # filter(lambda a: a != 0, infection_prob_c_theoretic_all),
                          label1="Actual",
                          label2="Expected", title="Infection Probability Contact",
                          xlabel="Occurrence",
                          ylabel="Infection Probability",
                          category='immunity_probability_contact',
                          alpha=0.5, overlap=True, sort=False)

        report_file.write(sft.format_success_msg(success))

예제 #25

0

파일 보기

def application(report_file, debug=True):
    sft.wait_for_done()
    """
    Parse this line from test.txt:
    00:00:00 [0] [V] [IndividualTyphoid] amplification calculated as 0.997059: day of year=1, start=360.000000,
    end=365.000000, ramp_up=30.000000, ramp_down=170.000000, cutoff=160.000000.
    00:00:00 [0] [V] [IndividualTyphoid] Exposing individual 2 age 8582.488281 on route 'environment': prob=0.000000,
    infects=0.000008, immunity=1.000000, num_exposures=0, exposure=0.997059, environment=1.000000, iv_mult=1.000000.
    """
    #print( "Post-processing: " + report_file )
    # get params from config.json
    cdj = json.loads( open( "config.json" ).read() )["parameters"]
    ncdr = cdj["Node_Contagion_Decay_Rate"]
    start_time=cdj["Start_Time"]
    lines = []
    timestep=start_time
    count =0
    amp = {}
    exposure = {}
    environment = {}
    cum_shedding = cum = 0
    cum_shedding_all = {}
    Statpop = []

    with open( "test.txt" ) as logfile:
        for line in logfile:
            if "Update(): Time:" in line:
                # store the accumulated shedding and reset the counter at the end of each time step.
                cum_shedding_all[timestep] = cum_shedding
                pop = float(sft.get_val("StatPop: ", line))
                Statpop.append(pop)
                # environmental cp decay
                cum_shedding *= 1.0 - ncdr
                # resetting shedding variables
                shedding = 0
                #calculate time step
                timestep += 1
                line = "TimeStep: " + str(timestep) + " " + line
                lines.append(line)
            elif "amplification calculated as" in line:
                count += 1
                line = "TimeStep: " + str(timestep) + " " + line
                lines.append(line)
                amp[timestep] = float(sft.get_val("amplification calculated as ", line))
            elif ("Exposing" in line) and ("environment" in line):
                line = "TimeStep: " + str(timestep) + " " + line
                lines.append(line)
                if timestep not in exposure:
                    exposure[timestep] = float(sft.get_val("exposure=", line))
                    environment[timestep] = float(sft.get_val("environment=", line))
            elif ("depositing" in line) and ("route environment" in line):
                # get shedding of contact route and add it to accumulated shedding
                shedding = float(sft.get_val("depositing ", line))
                cum_shedding += shedding
                line = "TimeStep: " + str(timestep) + " " + line
                lines.append(line)

    if debug:
        with open("DEBUG_filtered_line.txt","w") as filter_file:
            filter_file.write("".join(lines))

    #more params from config file
    rud=cdj["Environmental_Ramp_Up_Duration"]
    rdd=cdj["Environmental_Ramp_Down_Duration"]
    ecd=cdj["Environmental_Cutoff_Days"]
    eps=cdj["Environmental_Peak_Start"]

    peak_duration=365-rud-rdd-ecd
    # for eps > 365
    peak_starttime = eps % 365
    peak_endtime = peak_starttime + peak_duration
    cutoff_starttime = peak_starttime + peak_duration + rdd
    #cutoff_endtime = peak_starttime + peak_duration + rdd + ecd

    success = True

    expected_e_contagion = {}
    environmental_amp = {}

    inset_chart_obj = general_support.parse_inset_chart("output", "InsetChart.json",
                                                        insetkey_list=[InsetKeys.ChannelsKeys.Environmental_Contagion_Population])
    with open( sft.sft_output_filename, "w" ) as report_file:
        report_file.write("Peak_Start={0}, Peak_Duration={1}, Peak_End={2}, Ramp_Down={3}, Ramp_Up={4},"
                          "Cutoff_Start={5}, Cutoff_Duration={6}.\n".format(peak_starttime, peak_duration, peak_endtime,
                                                                            rdd, rud, cutoff_starttime, ecd))
        if ncdr != 1:
            report_file.write("WARNING: Node_Contagion_Decay_Rate is {}, suggest to set it to 1.\n".format(
                ncdr
            ))
        if count == 0:
            success = False
            report_file.write( "Found no data matching test case.\n" )
        elif peak_duration < 0:
            success = False
            report_file.write("BAD: Environmental peak duration should be larger or equal to 0, the actual value is {}."
                              "\n The ramp durations and cutoff days need to follow a rule where: ramp_up_duration + "
                              "ramp_down_duration + cutoff_days < 365.\n".format(peak_duration))
        else:
            # adjust the times so that the ramp up starts at time 0, which means the cut off ends at time 0 too.
            adjust_time = peak_starttime - rud
            peak_starttime -= adjust_time
            peak_endtime -= adjust_time
            cutoff_starttime -= adjust_time
            # cutoff_endtime -= adjust_time
            with open("DEBUG_contagion_data.txt", "w") as debug_file:

                for t in range(timestep - 2):
                    amplification = environmental_amplification = None
                    if t in amp:
                        TimeStep = t - adjust_time
                        day_in_year = TimeStep%365
                        amplification = amp[t]
                        # Environment Ramp Up
                        if day_in_year < peak_starttime:
                            environmental_amplification = day_in_year/float(rud)
                        # Environment peak
                        elif peak_starttime <= day_in_year <= peak_endtime:
                            environmental_amplification = 1
                        # Environment Ramp Down
                        elif peak_endtime < day_in_year < cutoff_starttime:
                            environmental_amplification = (cutoff_starttime - day_in_year) / float(rdd)
                        # Environment cutoff
                        elif day_in_year >= cutoff_starttime:
                            environmental_amplification = 0
                        if math.fabs(amplification - environmental_amplification) > 5e-2 * environmental_amplification:
                            success =False
                            report_file.write("BAD: at time {0}, day of year = {1}, the environmental amplification is {2},"
                                              " expected {3}.\n".format(t, t%365, amplification,
                                                                        environmental_amplification))
                        environmental_amp[t] = environmental_amplification
                    if t in exposure:
                        exposure_t = exposure[t]
                        if amplification is not None:
                            environment_contagion = cum_shedding_all[t-start_time] / Statpop[t -start_time]
                            expected_exposure = environment_contagion * amplification
                            if math.fabs(expected_exposure - exposure_t) > 5e-2 * expected_exposure:
                                success =False
                                report_file.write("BAD: at time {0}, day of year = {1}, the amount of environmental contagion "
                                                  "that individual is exposed is {2}, expected {3}"
                                                  ".\n".format(t, t%365,
                                                               exposure_t, expected_exposure))
                            expected_e_contagion[t] = expected_exposure

                            if debug:
                                environment_t = environment[t]
                                inserchart_contagion_e = inset_chart_obj \
                                    [InsetKeys.ChannelsKeys.Environmental_Contagion_Population][t]
                                debug_file.write("At time step {0}: environment={1}, exposure={2} from loggind, "
                                                 "{3}={4} from InsetChart, expected value={5}."
                                                 "accumulated shedding after decay={6} calculated from logging, "
                                                 "environmental amplification={7}.\n".format(
                                    t, environment_t,exposure_t, InsetKeys.ChannelsKeys.Environmental_Contagion_Population,
                                    inserchart_contagion_e, expected_exposure, environment_contagion, amplification
                                ))

        sft.plot_data(amp.values(), environmental_amp.values(),
                      label1="Actual Seasonal Attenuation",
                      label2="Expected Seasonal Attenuation",
                      title="Seasonal Attenuation", xlabel="Time",
                      ylabel="Attenuation",
                      category='seasonal_attenuation')

        for t in range(len(amp)):
            if t not in exposure:
                exposure[t] = 0
                expected_e_contagion[t] = 0

        sft.plot_data([exposure[key] for key in sorted(exposure.keys())],
                      [expected_e_contagion[key] for key in sorted(expected_e_contagion.keys())],
                             label1="exposure contagion(from StdOut)",
                             label2="Expected",
                             title="Environmental Contagion", xlabel="Time",
                             ylabel="Environmental Contagion",
                             category='Environmental_Contagion')

        report_file.write( sft.format_success_msg( success ) )

예제 #26

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_stdout_file(drug_start_time,
                      start_timestep,
                      stdout_filename="test.txt",
                      debug=False):
    """creates cum_inactivations, inactivation_times arrays

    :param drug_start_time: drug start time
    :param start_timestep:   first timestep from config
    :param stdout_filename: file to parse (test.txt)
    :return:                infected_mosquitoes_per_day dictionary, total_infections int
    """

    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    cum_inactivations = []
    inactivation_times = []
    timestep = 0
    cum = inactivation_daily = infected = active = 0
    infected_individuals = []
    inactivations = []
    active_count = []
    reactivation_times = {}
    for line in filtered_lines:
        if matches[2] in line:
            cum_inactivations.append(cum)
            infected_individuals.append(infected)
            inactivations.append(inactivation_daily)
            active_count.append(active)
            infected = int(sft.get_val("Infected: ", line))
            inactivation_daily = 0
            timestep += 1
        elif matches[0] in line:
            # deactivation: have to track individual
            individual = int(sft.get_val("TB drug deactivated my \(", line))
            # if timestep <= duration_of_interest + start_timestep + drug_start_time:
            inactivation_time = timestep - start_timestep - drug_start_time
            if individual in reactivation_times:
                inactivation_time = timestep - reactivation_times[individual]
                reactivation_times.pop(individual)
                # not including the reactivations: there are some data point lost due to simulation end before timers end
                # inactivation_times.append( inactivation_time )
            else:
                cum += 1
                active -= 1
                inactivation_daily += 1
                inactivation_times.append(inactivation_time)
        elif matches[
                1] in line:  # "progressing from Latent to Active Presymptomatic while on TB Drugs"
            # activation: have to track individual
            individual = int(sft.get_val("Individual ", line))
            reactivation_times[individual] = timestep
        elif matches[3] in line:  # move to active
            active += 1
        else:  #die from HIV
            individual = int(sft.get_val("individual ", line))
            if individual in reactivation_times:
                active -= 1
                reactivation_times.pop(individual)

    if debug:
        with open("Cum_Inactivations.txt", "w") as outfile:
            outfile.write(str(cum_inactivations))
        with open("Inactivations.txt", "w") as outfile:
            outfile.write(str(inactivation_times))
        print(
            "there are {} individual in reactivation state at the end of simulation.\n"
            .format(len(reactivation_times)))
    return cum_inactivations, inactivation_times, active_count, inactivations

예제 #27

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def application(report_file, debug=False):
    sft.wait_for_done()
    """
    Parse this line from test.txt:
    00:00:00 [0] [V] [IndividualTyphoid] amplification calculated as 0.997059: day of year=1, start=360.000000,
    end=365.000000, ramp_up=30.000000, ramp_down=170.000000, cutoff=160.000000.
    00:00:00 [0] [V] [IndividualTyphoid] Exposing individual 2 age 8582.488281 on route 'environment': prob=0.000000,
    infects=0.000008, immunity=1.000000, num_exposures=0, exposure=0.997059, environment=1.000000, iv_mult=1.000000.
    """
    # print( "Post-processing: " + report_file )
    # get params from config.json
    cdj = json.loads(open("config.json").read())["parameters"]
    start_time = cdj["Start_Time"]
    lines = []
    timestep = start_time
    count = 0
    with open("test.txt") as logfile:
        for line in logfile:
            if re.search("Update\(\): Time:", line):
                # calculate time step
                timestep += 1
                line = "TimeStep: " + str(timestep) + " " + line
                lines.append(line)
            if re.search("amplification calculated as", line):
                count += 1
                line = "TimeStep: " + str(timestep) + " " + line
                lines.append(line)
            if re.search("Exposing", line) and re.search("environment", line):
                line = "TimeStep: " + str(timestep) + " " + line
                lines.append(line)
    # more params from config file
    rud = cdj["Environmental_Ramp_Up_Duration"]
    rdd = cdj["Environmental_Ramp_Down_Duration"]
    ecd = cdj["Environmental_Cutoff_Days"]
    eps = cdj["Environmental_Peak_Start"]

    peak_duration = 365 - rud - rdd - ecd
    # for eps > 365
    peak_starttime = eps % 365
    peak_endtime = peak_starttime + peak_duration
    cutoff_starttime = peak_starttime + peak_duration + rdd
    success = True
    amp = []
    environmental_amp = []
    with open(sft.sft_output_filename, "w") as report_file:
        if count == 0:
            success = False
            report_file.write("Found no data matching test case.\n")
        elif peak_duration < 0:
            success = False
            report_file.write("BAD: Environmental peak duration should be larger or equal to 0, the actual value is {}."
                              "\n The ramp durations and cutoff days need to follow a rule where: ramp_up_duration + "
                              "ramp_down_duration + cutoff_days < 365.\n".format(peak_duration))
        else:
            # adjust the times so that the ramp up starts at time 0, which means the cut off ends at time 0 too.
            adjust_time = peak_starttime - rud
            peak_starttime -= adjust_time
            peak_endtime -= adjust_time
            cutoff_starttime -= adjust_time
            for line in lines:
                if re.search("Update\(\): Time:", line):
                    TimeStep = int(sft.get_val("TimeStep: ", line))
                    TimeStep -= adjust_time
                    day_in_year = TimeStep % 365
                elif re.search("amplification calculated as", line):
                    amplification = float(sft.get_val("amplification calculated as ", line))
                    # Environment Ramp Up
                    if day_in_year < peak_starttime:
                        environmental_amplification = day_in_year/float(rud)
                    # Environment peak
                    elif peak_starttime <= day_in_year <= peak_endtime:
                        environmental_amplification = 1
                    # Environment Ramp Down
                    elif peak_endtime < day_in_year < cutoff_starttime:
                        environmental_amplification = (cutoff_starttime - day_in_year) / float(rdd)
                    # Environment cutoff
                    elif day_in_year >= cutoff_starttime:
                        environmental_amplification = 0
                    if math.fabs(amplification - environmental_amplification) > 5e-2:
                        success = False
                        TimeStep = int(sft.get_val("TimeStep: ", line))
                        report_file.write("BAD: at time {0}, day of year = {1}, the environmental amplification is {2},"
                                          " expected {3}.\n".format(TimeStep, day_in_year+adjust_time, amplification,
                                                                    environmental_amplification))
                    amp.append(amplification)
                    environmental_amp.append(environmental_amplification)
                elif re.search("Exposing", line):
                    ind_id = sft.get_val("individual ", line)
                    environment = float(sft.get_val("environment=", line))
                    exposure = float(sft.get_val("exposure=", line))
                    expected_exposure = environment * amplification
                    if math.fabs(expected_exposure - exposure) > 1e-2:
                        success = False
                        TimeStep = int(sft.get_val("TimeStep: ", line))
                        report_file.write("BAD: at time {0}, day of year = {1}, the amount of environmental contagion "
                                          "that individual (2) is exposed is {3}, expected {4}"
                                          ".\n".format(TimeStep, day_in_year + adjust_time, ind_id,
                                                       exposure, expected_exposure))
        sft.plot_data_sorted(amp, environmental_amp,
                      label1="Actual Seasonal Attenuation",
                      label2="Expected Seasonal Attenuation",
                      title="Seasonal Attenuation Peak Equals RUD plus CD plus RDD", xlabel="Time",
                      ylabel="Attenuation",
                      category='seasonal_attenuation_peakstartequalsRUDplusCDplusRDD')

        report_file.write(sft.format_success_msg(success))

예제 #28

0

파일 보기

def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    cd4_strata = data[2]
    mod_array = data[3]
    base_inf = data[4]
    presymp_mult = data[5]
    smear_neg_mult = data[6]

    latent_data_points = 0
    smear_negative_data_points = 0
    smear_positive_data_points = 0
    presymptomatic_data_points = 0
    extrapulmonary_data_points = 0
    success = True
    epsilon = 0.000002
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            tot_inf_actual = float(sft.get_val("total_infectiousness= ", line))
            cd4_count = float(sft.get_val("CD4count= ", line))
            cd4_mod_actual = float(sft.get_val("CD4mod= ", line))
            cd4_mod_expected = tb_cd4_infectiousness_calc([mod_array, cd4_strata, cd4_count])
            if "Latent" in line:
                latent_data_points += 1
                if tot_inf_actual != 0:
                    success = False
                    outfile.write("BAD, found Latent infection with total_infectiousness= {} at time= {}. "
                                  "Expected 0. \n".format(tot_inf_actual, sft.get_val("time= ", line)))
            elif "SmearNegative" in line:
                smear_negative_data_points += 1
                tot_inf_expected = cd4_mod_expected * base_inf * smear_neg_mult
                if abs(tot_inf_expected - tot_inf_actual) > epsilon:
                    success = False
                    outfile.write("BAD, found SmearNegative infection with total_infectiousness= {} at time= {},  "
                                  "Expected {}. \n {} \n ".format(tot_inf_actual, sft.get_val("time= ", line),
                                                                  tot_inf_expected, line))
            elif "SmearPositive" in line:
                smear_positive_data_points += 1
                tot_inf_expected = cd4_mod_expected * base_inf
                if abs(tot_inf_expected - tot_inf_actual) > epsilon:
                    success = False
                    outfile.write("BAD, found SmearPositive infection with total_infectiousness= {} at time= {},  "
                                  "Expected {}. \n {} \n".format(tot_inf_actual, sft.get_val("time= ", line),
                                                                 tot_inf_expected, line))
            elif "Presymptomatic" in line:
                presymptomatic_data_points += 1
                tot_inf_expected = cd4_mod_expected * base_inf * presymp_mult
                if abs(tot_inf_expected - tot_inf_actual) > epsilon:
                    success = False
                    outfile.write("BAD, found Presymptomatic infection with total_infectiousness= {} at time= {},  "
                                  "Expected {}. \n {}\n ".format(tot_inf_actual, sft.get_val("time= ", line),
                                                                 tot_inf_expected, line))
            elif "Extrapulmonary" in line:
                extrapulmonary_data_points += 1
                if tot_inf_actual != 0:
                    success = False
                    outfile.write("BAD, found Extrapulmonary infection with total_infectiousness= {} at time= {}. "
                                  "Should be 0. \n".format(tot_inf_actual, sft.get_val("time= ", line)))

        outfile.write("Data points for each TBHIV infection state:\nLatent = {} \nPresymptomatic = {} "
                      "\nSmear Negative = {} \nSmear Positive = {} \nExtrapulmonary = {} "
                      "\n".format(latent_data_points, presymptomatic_data_points, smear_negative_data_points,
                                  smear_positive_data_points, extrapulmonary_data_points))
        outfile.write("SUMMARY: Success={0}\n".format(success))

예제 #29

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def application(report_file):
    sft.wait_for_done()
    # print( "Post-processing: " + report_file )
    cdj = json.loads(open("config.json").read())["parameters"]
    start_time = cdj["Start_Time"]
    timestep = start_time
    lines_contact = []
    lines_environment = []
    contact_shedding_count = 0
    contact_shedding_count_per_day = []
    env_shedding_count = 0
    env_shedding_count_per_day = []
    with open("test.txt") as logfile:
        for line in logfile:
            # collect all lines of
            if re.search("Update\(\): Time:", line):
                # calculate time step
                timestep += 1
                contact_shedding_count_per_day.append(contact_shedding_count)
                env_shedding_count_per_day.append(env_shedding_count)
                contact_shedding_count = env_shedding_count = 0 # reset counter
            elif re.search("depositing", line):
                # append time step and depositing line to lists
                ind_id = sft.get_val("Individual ", line)
                line = "TimeStep: " + str(timestep) + " ind_id: " + str(ind_id) + " " + line
                if re.search("contact", line):
                    lines_contact.append(line)
                    contact_shedding_count += 1
                elif re.search("environment", line):
                    lines_environment.append(line)
                    env_shedding_count += 1
    success = True
    with open(sft.sft_output_filename, "w") as report_file:
        if contact_shedding_count_per_day != env_shedding_count_per_day:
            success = False
            report_file.write("BAD: the # od individual shedding to contact and environment pool doesn't matches")
        sft.plot_data(contact_shedding_count_per_day, env_shedding_count_per_day,
                      label1="Contact",
                      label2="Environmental",
                      title="Multi-route Shedding(count of individual shedding per day)", xlabel="Time",
                      ylabel="# of individuals shedding into pool",
                      category='shedding_multiroute_count',
                      overlap=True, alpha=0.5)
        if not lines_contact or not lines_environment:
            success = False
            report_file.write("BAD: Found no data matching test case.\n")
        else:
            contact_shedding_per_day = []
            env_shedding_per_day = []
            for line in lines_contact:
                infectiousness2 = None
                ind_id = int(sft.get_val("ind_id: ", line))
                timestep = sft.get_val("TimeStep: ", line)
                infectiousness1 = float(sft.get_val("depositing ", line))
                for l in lines_environment:
                    if "ind_id: " + str(ind_id) + " " in l and \
                            "TimeStep: " + str(timestep) + " " in l:
                        infectiousness2 = float(sft.get_val("depositing ", l))
                        if infectiousness1 != infectiousness2:
                            success = False
                            report_file.write("BAD: Individual {0} is depositing {1} to route {2} and {3} to route {4} "
                                              "at time {5}.\n".format(ind_id, infectiousness1, "contact",
                                                                      infectiousness2, "environment", timestep))
                        # for plotting
                        if ind_id == 1:
                            contact_shedding_per_day.append(round(infectiousness1))
                            env_shedding_per_day.append(round(infectiousness2))
                        lines_environment.remove(l)
                        break
                if infectiousness2 is None:
                    success = False
                    report_file.write("BAD: Individual {0} is not depositing to route environment at time {1}."
                                      "\n".format(ind_id, timestep))

            sft.plot_data(contact_shedding_per_day, env_shedding_per_day,label1="Contact",
                      label2="Environmental",
                      title="Multi-route Shedding(Infectiousness of individual 1)", xlabel="Time",
                      ylabel="Infectiousness",
                      category='shedding_multiroute',
                      overlap=True, alpha=0.5)

            if lines_environment:
                success = False
                report_file.write("BAD: {0} Individuals are not depositing to route contact while they are depositing "
                                  "to route contact.\n".format(len(lines_environment), timestep))
                for l in lines_environment:
                    report_file.write(l)

        report_file.write(sft.format_success_msg(success))

예제 #30

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def application(report_file, debug=False):
    sft.wait_for_done()

    cdj = json.loads(open("config.json").read())["parameters"]
    tcer = float(cdj["Typhoid_Contact_Exposure_Rate"])
    teer = float(cdj["Typhoid_Environmental_Exposure_Rate"])

    num_exposures_contact = []
    num_exposures_enviro = []
    with open("test.txt") as logfile:
        for line in logfile:
            if "Exposing " in line and "route 'environment'" in line:
                # collect # of exposures for route contact
                num_exp_e = int(sft.get_val("num_exposures=", line))
                num_exposures_enviro.append(num_exp_e)
            elif "Exposing " and "route 'contact'" in line:
                # collect # of exposures for route environment
                num_exp_c = int(sft.get_val("num_exposures=", line))
                num_exposures_contact.append(num_exp_c)

    success = True
    with open(sft.sft_output_filename, "w") as report_file:
        # report_file.write("len1={0}, len2={1}, teer = {2}, tcer = {3}.\n".format(len(num_exposures_enviro),
        # len(num_exposures_contact), teer, tcer))
        if not num_exposures_enviro:
            success = False
            report_file.write(
                "BAD: Found no individual exposed from route environment.\n")
        elif not sft.test_poisson(num_exposures_enviro, teer, report_file,
                                  'environment'):
            success = False
        if not num_exposures_contact:
            success = False
            report_file.write(
                "BAD: Found no individual exposed from route contact.\n")
        elif not sft.test_poisson(num_exposures_contact, tcer, report_file,
                                  'contact'):
            success = False
        report_file.write(
            "num_exposures_enviro = {0}, num_exposures_contact = {1}"
            "\n".format(len(num_exposures_enviro), len(num_exposures_contact)))

        # Write summary message to report file
        report_file.write(sft.format_success_msg(success))

        with open("error_larger_than_tolerance_log.txt", "w") as my_file:
            sft.plot_poisson_probability(
                teer,
                num_exposures_enviro,
                my_file,
                'environmental route',
                xlabel="Number of Exposures",
                ylabel="Probability",
                label2="Emod data",
                title="Poisson Probability Mass Funciton\n"
                "environmental route: Exposure_Rate = {}".format(teer))
            sft.plot_poisson_probability(
                tcer,
                num_exposures_contact,
                my_file,
                'contact route',
                xlabel="Number of Exposures",
                ylabel="Probability",
                label2="Emod data",
                title="Poisson Probability Mass Funciton\n"
                "contact route: Exposure_Rate = {}".format(tcer))

예제 #31

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def application(report_file):
    sft.wait_for_done()
    # print( "Post-processing: " + report_file )
    cdj = json.loads(open("config.json").read())["parameters"]
    ncdr = cdj["Node_Contagion_Decay_Rate"]
    start_time = cdj["Start_Time"]
    simulation_duration = int(cdj["Simulation_Duration"])
    isj = json.loads(open("output/InsetChart.json").read())["Channels"]
    environmental_contagion_population = isj[
        "Environmental Contagion Population"]["Data"]

    timestep = start_time
    lines = []
    cum_all = []
    cum = 0
    Statpop = []
    adding_cp_log = []
    adding_cp = 0
    shedding = 0
    with open("test.txt") as logfile:
        for line in logfile:
            if "Update(): Time:" in line:
                # calculate time step
                timestep += 1
                pop = float(sft.get_val("StatPop: ", line))
                Statpop.append(pop)

                # append the accumulated shedding and reset the counter at the end of each time step.
                # data for timestep 1 is stored in cum_all[1]
                cum_all.append(cum)
                # environmental cp decay
                cum *= 1.0 - ncdr
                adding_cp_log.append(adding_cp)
                # resetting shedding and adding_cp variables
                shedding = 0
                adding_cp = 0
            elif "[MultiRouteTransmissionGroups] Adding " in line and "route:1" in line:
                # append time step and total shedding line to lists
                # line = "TimeStep: " + str(timestep) + " " + line
                # lines.append(line)
                line = "TimeStep: " + str(timestep) + " " + line
                lines.append(line)
                adding_cp = float(sft.get_val("Adding ", line))
            elif "Exposing " in line and "route 'environment'" in line:
                # append time step and Exposing line to lists
                line = "TimeStep: " + str(timestep) + " " + line
                lines.append(line)
            elif "depositing" in line and "route environment" in line:
                # get shedding of contact route and add it to accumulated shedding
                shedding = float(sft.get_val("depositing ", line))
                cum += shedding
                line = "TimeStep: " + str(timestep) + " " + line
                lines.append(line)
            # elif "scaled by " in line and "route:1" in line:
            #     # get population
            #     if timestep > 0:
            #         # start collecting population at time 1
            #         pop = float(sft.get_val("scaled by ", line))
            #         Statpop.append(pop)
    with open("DEBUG_filtered_from_StdOut.txt", "w") as file:
        file.write("".join(lines))
    expected_cp_insetchart = []
    cp_stdout_dict = {}
    success = True
    with open(sft.sft_output_filename, "w") as report_file:
        if len(lines) == 0:
            success = False
            report_file.write("Found no data matching test case.\n")
        else:
            if len(cum_all) != simulation_duration or len(
                    Statpop) != simulation_duration:
                report_file.write(
                    "WARNING: the test.txt file is incomplete. len(cum_all)={0},  len(Statpop)={1},"
                    " expected {2}.\n".format(len(cum_all), len(Statpop),
                                              simulation_duration))
            for line in lines:
                if re.search("Exposing", line):
                    environment = float(sft.get_val("environment=", line))
                    timestep = int(sft.get_val("TimeStep: ", line))
                    expected_cp = cum_all[timestep -
                                          start_time] / Statpop[timestep -
                                                                start_time - 1]
                    if timestep not in cp_stdout_dict:
                        cp_stdout_dict[timestep] = [environment, expected_cp]
                        if math.fabs(environment -
                                     expected_cp) > 1e-2 * expected_cp:
                            success = False
                            ind_id = int(sft.get_val("inividual ", line))
                            report_file.write(
                                "BAD: at time {0}, individuals are exposed on route environment with contagion "
                                "population = {1} StatPop = {2}, expected {3}."
                                "\n".format(timestep, environment,
                                            Statpop[timestep - start_time - 1],
                                            expected_cp))
            pre_cum = 0
            for timestep in range(len(cum_all) - 1):
                # report_file.write("timestep={0}, StatPop = {1}, cum_all= {2}, adding_cp_log = {3}.
                # \n".format(x,Statpop[x+1],cum_all[x], adding_cp_log[x]))

                # the InsetChart is missing the first time step, so we're taking it out from the log. #2890
                # cur_cum = cum_all[timestep + 1]
                cur_cum = cum_all[timestep]

                # comment out these code since we are no longer logging the accumulated shedding in StdOut file.
                # adding_cp = cur_cum - pre_cum * (1.0 - ncdr)
                # # pass cur_cum to pre_cum for next time step
                # pre_cum = cur_cum
                # if math.fabs(adding_cp - adding_cp_log[x]) > 1e-1:
                #     success = False
                #     report_file.write(
                #         "BAD: At time {0}, the accumulated shedding is {1} from StdOut, expected {2}."
                #         "\n".format(timestep, adding_cp_log[x], adding_cp))

                expected_cp = cur_cum / Statpop[timestep]
                expected_cp_insetchart.append(expected_cp)
                if math.fabs(expected_cp -
                             environmental_contagion_population[timestep]
                             ) > 1e-2 * expected_cp:
                    success = False
                    report_file.write(
                        "BAD: At time {0}, the environmental contagion population is {1} from InsetChart.json, "
                        "expected {2}.\n".format(
                            timestep,
                            environmental_contagion_population[timestep],
                            expected_cp))

        report_file.write(sft.format_success_msg(success))

        sft.plot_data(
            environmental_contagion_population,
            expected_cp_insetchart,
            label1="InsetChart",
            label2="expected data",
            title=
            "Environmental Contagion Population\n InsetChart vs. expected",
            xlabel="Day",
            ylabel="Environmental Contagion Population",
            category='environmental_contagion_population_InsetChart',
            alpha=0.5,
            overlap=True)

        sft.plot_data(np.array(list(cp_stdout_dict.values()))[:, 0],
                      np.array(list(cp_stdout_dict.values()))[:, 1],
                      label1="StdOut",
                      label2="expected data",
                      title="Exposure(Contagion)\n StdOut vs. expected",
                      xlabel="Day",
                      ylabel="Contagion",
                      category='exposure_StdOut',
                      alpha=0.5,
                      overlap=True)

        cp_list_for_plot = []
        for key in sorted(cp_stdout_dict):
            cp_list_for_plot.append([
                cp_stdout_dict[key][0],
                environmental_contagion_population[key - 1]
            ])
        sft.plot_data(
            np.array(cp_list_for_plot)[:, 1][1:],
            np.array(cp_list_for_plot)[:, 0],
            label1="InsetChart",
            label2="StdOut",
            title="Environmental Contagion Population\n InsetChart vs. StdOut",
            xlabel="Day",
            ylabel="Contagion",
            category='environmental_contagion_population',
            alpha=0.5,
            overlap=True)

예제 #32

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_stdout_file(start_time=1,
                      stdout_filename="test.txt",
                      filtered_filename="filtered_lines.txt",
                      debug=False):
    """ Goes through the file and collects information on daily new acute infections (to verify with InsetChart.json),
    acute infection to treatment times and exceptions (if person goes acute while already acute, or treated after being
    recovered,etc)
    :param start_time:   first time step from config
    :param stdout_filename: file to parse (test.txt)
    :param filtered_filename:   file to parse to (only happens in debug)
    :param debug: whether or not data gets saved to to the external file
    :return:     acute_to_treatment_times, acute_infections_daily, exceptions
    """

    with open(stdout_filename) as infile:
        time_step = start_time
        exceptions = []
        filtered_lines = []
        acute_to_treatment_times = []
        acute_infections = {}
        acute_infections_daily = []
        acute_infections_count_daily = 0
        # no one dies in current application from the infection, we will have to change some things if people start
        # dying. Counting up time steps between individuals going acute to treatment, throwing out individuals
        # who self-cure or go chronic
        for line in infile:
            if UPDATE_TIME in line:
                filtered_lines.append(line)
                # calculate time step, count add daily infection count, reset daily infection count
                time_step += 1
                acute_infections_daily.append(acute_infections_count_daily)
                acute_infections_count_daily = 0
            elif TO_ACUTE in line:
                filtered_lines.append(line)
                acute_infections_count_daily += 1
                ind_id = int(sft.get_val(IND_ID_1, line))
                if ind_id not in acute_infections:
                    acute_infections[ind_id] = time_step
                else:
                    exceptions.append(
                        "Individual {} at time {} just became acute while already being "
                        "acute\n".format(ind_id, time_step))
            elif TREATMENT in line:
                filtered_lines.append(line)
                # when person gets treated, calculate the time from becoming acute to being treated
                ind_id = int(sft.get_val(IND_ID_2, line))
                if ind_id in acute_infections:
                    acute_to_treatment_times.append(
                        time_step - acute_infections.get(ind_id))
                else:
                    exceptions.append(
                        "Individual {} at time {} got treatment without being acute\n"
                        .format(ind_id, time_step))
            elif RECOVERED in line:
                filtered_lines.append(line)
                # when person is recovered, kick them out of acute_infections dictionary, stop keeping track of them
                ind_id = int(sft.get_val(IND_ID_3, line))
                if ind_id in acute_infections:
                    del acute_infections[ind_id]
                else:
                    exceptions.append(
                        "Individual {} at time {} recovered from acute without being acute\n"
                        .format(ind_id, time_step))
            elif CHRONIC in line:
                filtered_lines.append(line)
                # when person goes chronic, kick them out of acute_infections dictionary, stop keeping track of them
                ind_id = int(sft.get_val(IND_ID_3, line))
                if ind_id in acute_infections:
                    del acute_infections[ind_id]
                else:
                    exceptions.append(
                        "Individual {} at time {} just went chronic from acute without "
                        "being acute\n".format(ind_id, time_step))
        if debug:
            with open(filtered_filename, "w") as outfile:
                outfile.writelines(filtered_lines)

    return [acute_to_treatment_times, acute_infections_daily, exceptions]