Python has_match 예제들, dtk_test.dtk_sft.has_match Python 예제들

예제 #1

0

파일 보기

def parse_output_file(output_filename="test.txt",
                      simulation_timestep=1,
                      debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    output_dict = {}
    for line in filtered_lines:
        if matches[0] in line:
            time_step += simulation_timestep
            output_dict[time_step] = 0
        elif matches[1] in line:  # this individual is Symptomatic active
            output_dict[time_step] += 1

    if debug:
        res_path = r'./DEBUG_incidence.json'
        with open(res_path, "w") as file:
            json.dump(output_dict, file, indent=4)
    return output_dict

예제 #2

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_stdout_file(stdout_filename="StdOut.txt", simulation_timestep=1, debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (StdOut.txt)
    :return:                stdout_df
    """
    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line,matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = index = 0
    stdout_df = pd.DataFrame(columns=[ConfigKeys.Simulation_Timestep, Stdout.stat_pop, Stdout.infected])
    stdout_df.index.name = 'index'
    for line in filtered_lines:
        if matches[0] in line:
            stat_pop = int(sft.get_val(Stdout.stat_pop, line))
            infected = int(sft.get_val(Stdout.infected, line))
            stdout_df.loc[index] = [time_step, stat_pop, infected]
            index += 1
            time_step += simulation_timestep

    if debug:
        res_path = r'./DEBUG_filtered_from_logging.csv'
        stdout_df.to_csv(res_path)
    return stdout_df

예제 #3

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, [matches[0]]):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    core = None
    output_dict = {}
    for line in filtered_lines:
        time_step = sft.get_val(matches[0], line)
        core = sft.get_val(matches[1], line)
        if time_step not in output_dict:
            output_dict[time_step] = [core]
        else:
            output_dict[time_step].append(core)

    if debug:
        res_path = r'./DEBUG_core_per_time_step.json'
        with open(res_path, "w") as file:
            json.dump(output_dict, file, indent=4)
    return output_dict

예제 #4

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store stat populations, infected population, and MDR test result for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    infected = 0
    statpop = 0
    simulation_timestep = 1
    positive = 0
    negative = 0
    default = 0
    output_dict = {}
    for line in filtered_lines:
        if matches[0] in line:
            output_dict[time_step] = {
                KEY_STAT_POP: statpop,
                KEY_INFECTED: infected,
                KEY_POSITIVE: positive,
                KEY_NEGATIVE: negative,
                KEY_DEFAULT: default
            }
            infected = sft.get_val(matches[2], line)
            statpop = sft.get_val(matches[1], line)
            time_step += simulation_timestep
            positive = 0
            negative = 0
            default = 0
        if matches[3] in line:
            result = int(sft.get_val(matches[3], line))
            if result:
                positive += 1
            else:
                negative += 1
        if matches[4] in line:
            default += 1
    res_path = r'./tb_test_result_from_logging.json'
    with open(res_path, "w") as file:
        json.dump(output_dict, file, indent=4)
    return output_dict

예제 #5

0

파일 보기

파일: dtk_InfectivityScalingExponential_Support.py 프로젝트: waternk/EMOD

def parse_output_file(output_filename="test.txt",
                      simulation_timestep=1,
                      debug=False):
    """
    creates a dataframe of time step, infected,  infectiouness and stat populations
    :param output_filename: file to parse (test.txt)
    :param simulation_timestep: simulation time step, * days
    :return: output_df:  data frame contains: 1, time step,
                                            2, # of infected population,
                                            3, infectiousness,
                                            4, statistical populations, at each time step
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0  #
    infectiousness = 0
    infected = 0
    statpop = 0
    output_df = pd.DataFrame(columns=[
        KEY_SIMULATION_TIMESTEP, KEY_INFECTED, KEY_INFECTIOUSNESS, KEY_STAT_POP
    ])
    output_df.index.name = "index"
    index = 0
    for line in filtered_lines:
        if matches[0] in line:
            infected = sft.get_val(matches[1], line)
            statpop = sft.get_val(matches[3], line)
            output_df.loc[index] = [
                time_step, infected, infectiousness, statpop
            ]
            index += 1
            time_step += simulation_timestep
            infectiousness = 0
            continue
        if matches[2] in line:
            infectiousness = sft.get_val(matches[2], line)
            continue
    res_path = r'./infected_vs_infectiousness.csv'
    if not os.path.exists(os.path.dirname(res_path)):
        os.makedirs(os.path.dirname(res_path))
    output_df.to_csv(res_path)
    return output_df

예제 #6

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_stdout_file(stdout_filename="StdOut.txt",
                      simulation_timestep=1,
                      debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (StdOut.txt)
    :return:                stdout_df
    """
    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = index = 0
    stdout_df = pd.DataFrame(columns=[
        ConfigKeys.Simulation_Timestep, Stdout.stat_pop, Stdout.infected,
        Stdout.group_id, Stdout.contagion, Stdout.prob
    ])
    stdout_df.index.name = 'index'
    stat_pop = infected = contagion = prob = group_id = None
    group_contagion = {}
    for line in filtered_lines:
        if matches[0] in line:
            stat_pop = int(sft.get_val(Stdout.stat_pop, line))
            infected = int(sft.get_val(Stdout.infected, line))
            for group_id in sorted(group_contagion):
                stdout_df.loc[index] = [
                    time_step, stat_pop, infected, group_id,
                    group_contagion[group_id][0], group_contagion[group_id][1]
                ]
                index += 1
            group_contagion = {}
            time_step += simulation_timestep

        elif len(group_contagion) < 10 and matches[1] in line:
            contagion = float(sft.get_val(matches[1], line))
            prob = float(sft.get_val(Stdout.prob, line))
            group_id = int(sft.get_val(matches[2], line))
            if group_id not in group_contagion:
                group_contagion[group_id] = [contagion, prob]
    if debug:
        res_path = r'./DEBUG_filtered_from_logging.csv'
        stdout_df.to_csv(res_path)
    return stdout_df

예제 #7

0

파일 보기

def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    simulation_timestep = 1
    output_dict = {}
    for line in filtered_lines:
        if matches[0] in line:
            time_step += simulation_timestep
        elif matches[1] in line:  # this individual is PreSymptomatic active
            individual_id = sft.get_val(KEY_INDIVIDUAL, line)
            timer = float(sft.get_val(KEY_TIMER, line))
            if individual_id in output_dict:
                output_dict[individual_id][KEY_PRESYMPTOMATIC] = [
                    time_step, timer
                ]
            else:
                output_dict[individual_id] = {
                    KEY_PRESYMPTOMATIC: [time_step, timer]
                }
        elif matches[2] in line:  # this individual is Symptomatic active
            individual_id = sft.get_val(KEY_INDIVIDUAL, line)
            timer = float(sft.get_val(KEY_TIMER, line))
            if individual_id in output_dict:
                output_dict[individual_id][KEY_SYMPTOMATIC] = [
                    time_step, timer
                ]
            else:
                output_dict[individual_id] = {
                    KEY_SYMPTOMATIC: [time_step, timer]
                }
    res_path = r'./SEIR_Presymptomatic_from_logging.json'
    with open(res_path, "w") as file:
        json.dump(output_dict, file, indent=4)
    return output_dict

예제 #8

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_stdout_file( start_timestep, stdout_filename="test.txt", debug=False ):
    """creates a dictionary of infected mosquitoes per day

    :param curr_timestep:   first timestep from config
    :param stdout_filename: file to parse (test.txt)
    :return:                infected_mosquitoes_per_day dictionary, total_infections int
    """

    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line,matches):
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    # so actually all the will-relapse calculations are done at time of clearance. 
    # our clearance rate is 100%, so they all happen at once. So we just calculate
    # the number of will-relapses at time-of-clearance and use 
    # sft.test_binomial_95ci( relapses, N, 'rate', None, None )
    # to figure out if everything was right.
    # But first we have to fix the bug where fast progressors who get cleared by 
    # drugs bounce back-and-forth between active and latent.
    cum_relapses = []
    relapses = []
    timestep = 0
    initial_relapses = 0
    pre_cum = 0
    cum = 0
    for line in filtered_lines:
        if "Time:" in line:
            timestep += 1
            cum_relapses.append(cum)
            relapses.append(cum - pre_cum)
            pre_cum = cum
        else:
            if timestep == start_timestep + 1:
                initial_relapses += 1
            cum += 1
    if debug:
        with open("relapses.txt", "w") as file:
            file.writelines(str(relapses))
        with open("cum_relapses.txt", "w") as file:
            file.writelines(str(cum_relapses))
        print( "initial relapses is {}.\n".format(initial_relapses ) )
    return  relapses, cum_relapses

예제 #9

0

파일 보기

def parse_stdout_file( start_timestep, duration_of_interest, stdout_filename="test.txt", debug=False):
    """creates cum_death and death_times array

    :param start_timestep:   drug start time
    :param stdout_filename: file to parse (test.txt)
    :return:                cum_deaths, death_times
    """

    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    cum_deaths = []
    deaths = []
    death_times = []
    timestep = 0
    cum = death_daily = infected = 0
    infected_individuals = []
    for line in filtered_lines:
        if "Time:" in line:
            timestep += 1
            infected_individuals.append(infected)
            cum_deaths.append(cum)
            deaths.append(death_daily)
            death_daily = 0
            infected = int(sft.get_val("Infected: ", line))
        else:
            cum += 1
            death_daily += 1
            # cum_deaths[-1] = cum
            if timestep <= duration_of_interest + start_timestep:
                death_times.append( timestep-start_timestep )
    if debug:
        with open("cum_deaths.txt","w") as file:
            file.writelines(  str( cum_deaths ) )
        with open("death_times.txt","w") as file:
            file.writelines( str( death_times ) )
    return cum_deaths, deaths, infected_individuals, death_times

예제 #10

0

파일 보기

파일: dtk_post_process.py 프로젝트: InstituteforDiseaseModeling/EMOD

def parse_stdout_file(stdout_filename="StdOut.txt", simulation_timestep=1, debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (StdOut.txt)
    :return:                stdout_df
    """
    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line,matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = index = 0
    stdout_df = pd.DataFrame(columns=[ConfigKeys.Simulation_Timestep, Stdout.stat_pop, Stdout.infected,
                                      Stdout.group_id, Stdout.contagion, Stdout.prob])
    stdout_df.index.name = 'index'
    stat_pop = infected = contagion = prob = group_id = None
    group_contagion = {}
    for line in filtered_lines:
        if matches[0] in line:
            stat_pop = int(sft.get_val(Stdout.stat_pop, line))
            infected = int(sft.get_val(Stdout.infected, line))
            for group_id in sorted(group_contagion):
                stdout_df.loc[index] = [time_step, stat_pop, infected, group_id,
                                        group_contagion[group_id][0], group_contagion[group_id][1]]
                index += 1
            group_contagion = {}
            time_step += simulation_timestep

        elif len(group_contagion) < 10 and matches[1] in line:
            contagion = float(sft.get_val(matches[1], line))
            prob = float(sft.get_val(Stdout.prob, line))
            group_id = int(sft.get_val(matches[2], line))
            if group_id not in group_contagion:
                group_contagion[group_id] = [contagion, prob]
    if debug:
        res_path = r'./DEBUG_filtered_from_logging.csv'
        stdout_df.to_csv(res_path)
    return stdout_df

예제 #11

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_output_file(output_filename="test.txt", simulation_timestep=1, debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line,matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    positive = 0
    negative = 0
    default = 0
    output_df = pd.DataFrame(columns=[ReportColumn.negative, ReportColumn.default, ReportColumn.positive])
    output_df.index.name = Config.simulation_timestep
    for line in filtered_lines:
        if matches[0] in line:
            output_df.loc[time_step] = pd.Series({ReportColumn.positive: positive,
                                                  ReportColumn.negative: negative, ReportColumn.default: default})
            time_step += simulation_timestep
            positive = 0
            negative = 0
            default = 0
        if matches[1] in line:
            result = int(sft.get_val(matches[1], line))
            if result:
                positive += 1
            else:
                negative += 1
        if matches[2] in line:
            default += 1
    res_path = r'./DEBUG_tb_test_result_from_logging.csv'
    with open(res_path, "w") as file:
        output_df.to_csv(file)
    return output_df

예제 #12

0

파일 보기

def parse_stdout_file(output_filename="test.txt", debug=False):
    """
    Reads stdout file and creates an individual ID indexed
    DataFrame of ages and mod_aquire
    :param output_filename: stdout filename (test.txt)
    :param debug: generate
    :return: dataframe of all individuals from a single timestep
    """
    matches = [
        "{} = ".format(DataframeKeys.MOD_ACQUIRE),
        "{} = ".format(DataframeKeys.AGE), "{} = ".format(DataframeKeys.ID),
        "Update(): Time: "
    ]
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                if matches[-1] in line:
                    break
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    individuals_df = pd.DataFrame(
        columns=[DataframeKeys.AGE, DataframeKeys.MOD_ACQUIRE])
    individuals_df.index.name = 'index'
    for line in filtered_lines:
        age = float(sft.get_val(matches[1], line))
        acquire = float(sft.get_val(matches[0], line))
        id = int(sft.get_val(matches[2], line))
        individuals_df.loc[id] = [age, acquire]

    if debug:
        with open("DEBUG_individuals_dataframe.csv", "w") as outfile:
            outfile.write(individuals_df.to_csv())
    return individuals_df

예제 #13

0

파일 보기

파일: dtk_post_process.py 프로젝트: waternk/EMOD

def parse_stdout_file(drug_start_time,
                      start_timestep,
                      stdout_filename="test.txt",
                      debug=False):
    """creates cum_inactivations, inactivation_times arrays

    :param drug_start_time: drug start time
    :param start_timestep:   first timestep from config
    :param stdout_filename: file to parse (test.txt)
    :return:                infected_mosquitoes_per_day dictionary, total_infections int
    """

    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    cum_inactivations = []
    inactivation_times = []
    timestep = 0
    cum = inactivation_daily = infected = active = 0
    infected_individuals = []
    inactivations = []
    active_count = []
    reactivation_times = {}
    for line in filtered_lines:
        if matches[2] in line:
            cum_inactivations.append(cum)
            infected_individuals.append(infected)
            inactivations.append(inactivation_daily)
            active_count.append(active)
            infected = int(sft.get_val("Infected: ", line))
            inactivation_daily = 0
            timestep += 1
        elif matches[0] in line:
            # deactivation: have to track individual
            individual = int(sft.get_val("TB drug deactivated my \(", line))
            # if timestep <= duration_of_interest + start_timestep + drug_start_time:
            inactivation_time = timestep - start_timestep - drug_start_time
            if individual in reactivation_times:
                inactivation_time = timestep - reactivation_times[individual]
                reactivation_times.pop(individual)
                # not including the reactivations: there are some data point lost due to simulation end before timers end
                # inactivation_times.append( inactivation_time )
            else:
                cum += 1
                active -= 1
                inactivation_daily += 1
                inactivation_times.append(inactivation_time)
        elif matches[
                1] in line:  # "progressing from Latent to Active Presymptomatic while on TB Drugs"
            # activation: have to track individual
            individual = int(sft.get_val("Individual ", line))
            reactivation_times[individual] = timestep
        elif matches[3] in line:  # move to active
            active += 1
        else:  #die from HIV
            individual = int(sft.get_val("individual ", line))
            if individual in reactivation_times:
                active -= 1
                reactivation_times.pop(individual)

    if debug:
        with open("Cum_Inactivations.txt", "w") as outfile:
            outfile.write(str(cum_inactivations))
        with open("Inactivations.txt", "w") as outfile:
            outfile.write(str(inactivation_times))
        print(
            "there are {} individual in reactivation state at the end of simulation.\n"
            .format(len(reactivation_times)))
    return cum_inactivations, inactivation_times, active_count, inactivations

예제 #14

0

파일 보기

def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict, message
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    core = 0
    output_dict = {}
    exception_message = None
    for line in filtered_lines:
        try:
            if matches[0] in line:
                #this may raise LookupError
                value = sft.get_val(matches[0], line)
                if debug:
                    print("time value I get is '{}'".format(value))
                # this may raise ValueError
                time_step = int(float(value))

                if matches[1] in line:
                    # this may raise ValueError or LookupError
                    core = int(sft.get_val(matches[1], line))
                else:
                    print(line)
                    raise Exception(
                        "at timestep = {0}, {1} and {2) are not in the same line.\n"
                        .format(time_step, matches[0], matches[1]))
                if debug:
                    print("core is {}".format(core))

                if time_step not in output_dict:
                    output_dict[time_step] = {core: [0, 0]}
                elif core not in output_dict[time_step]:
                    output_dict[time_step][core] = [0, 0]
            elif matches[
                    2] in line:  # this individual is died from TB Symptomatic active
                output_dict[time_step][core][0] += 1
            elif matches[3] in line:  # this individual died from HIV
                output_dict[time_step][core][1] += 1

        except Exception as ex:
            exception_message = "failed to parse {0}, got exception: {1}.".format(
                output_filename, ex)
            print(exception_message)
            return None, exception_message

    if debug:
        res_path = r'./DEBUG_stdout_parsed.json'
        with open(res_path, "w") as file:
            json.dump(output_dict, file, indent=4)
    return output_dict, exception_message

예제 #15

0

파일 보기

파일: dtk_EnvironmentalDiagnostic_Support.py 프로젝트: waternk/EMOD

def parse_stdout_file(stdout_filename="StdOut.txt",
                      simulation_timestep=1,
                      debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (StdOut.txt)
    :return:                stdout_df
    """
    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = index = envi_sample = 0
    stdout_df = pd.DataFrame(columns=[
        ConfigKeys.Simulation_Timestep, Stdout.stat_pop, Stdout.infected,
        Stdout.test_positive, Stdout.test_negative, Stdout.test_default,
        Stdout.sample, Stdout.ip_value
    ])
    stdout_df.index.name = 'index'
    stat_pop = infected = None
    ip_value = ""
    test_positive = test_negative = test_default = 0
    for line in filtered_lines:
        if matches[0] in line:
            stdout_df.loc[index] = [
                time_step, stat_pop, infected, test_positive, test_negative,
                test_default, envi_sample, ip_value
            ]
            index += 1
            time_step += simulation_timestep
            infected = int(sft.get_val(Stdout.infected, line))
            stat_pop = int(sft.get_val(Stdout.stat_pop, line))
            test_positive = test_negative = test_default = 0

        elif matches[1] in line:
            test_result = sft.get_char(matches[1], line)
            if 'positive' in test_result.lower():
                test_positive += 1
            elif 'negative' in test_result.lower():
                test_negative += 1
            else:
                test_default += 1
        else:
            if matches[2] in line:
                envi_sample = float(sft.get_val(matches[2], line))
            # matches[2] and matches[3] may and may not be in the same line
            if matches[3] in line:
                # ip_value = str(sft.get_char(matches[3], line))
                for s in line.split():
                    if matches[3] in s:
                        ip_value = s.replace(matches[3], '')

    if debug:
        res_path = r'./DEBUG_filtered_from_logging.csv'
        stdout_df.to_csv(res_path)
    return stdout_df

예제 #16

0

파일 보기

def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dataframe of time step and infected,  infectiouness and stat populations
    :param output_filename: file to parse (test.txt)
    :param debug: if True then print debug_info and write output_df to disk as './individual_susceptibility.csv'
    :return: output_df:  # of infected population and infectiousness per person at each time step
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(
                    line,
                ["Update(): Time: ", KEY_INDIVIDUAL_MOD_ACQUIRE
                 ]):  # search for "Update(): time" | Susceptibility update
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    output_df = pd.DataFrame(columns=[
        KEY_SIMULATION_TIMESTEP, KEY_INDIVIDUAL_ID, KEY_INDIVIDUAL_AGE,
        KEY_INDIVIDUAL_MOD_ACQUIRE, KEY_INDIVIDUAL_IMMUNE_FAILAGE
    ])
    output_df.index.name = "index"
    index = 0
    for line in filtered_lines:
        if "Update(): Time:" in line:
            if debug:
                print("working on... " + line)
                print("time_step = " + str(time_step))
            time_step += 1
            continue
        if time_step == 1:  # DEVNOTE: we just validate time_step 1 and all individuals
            break
        a = parse_name_value_pair(line, print_indvidiual_susceptibility)
        if index > 0 and output_df.loc[index - 1].id == int(
                a[KEY_INDIVIDUAL_ID]
        ):  # DEVNOTE: somehow we have duplicate Susceptibility where evaluating getModAcquire, not sure why, add this to skip
            continue
        output_df.loc[index] = [
            int(time_step),
            int(a[KEY_INDIVIDUAL_ID]),
            float(a[KEY_INDIVIDUAL_AGE]),
            float(a[KEY_INDIVIDUAL_MOD_ACQUIRE]),
            float(a[KEY_INDIVIDUAL_IMMUNE_FAILAGE])
        ]
        # output_df.id = pd.to_numeric(output_df.id, downcast='signed')   # not sure why id didn't case to int, but we will cast it anyway
        output_df.id = output_df[KEY_INDIVIDUAL_ID].astype(int)
        index += 1
        continue
    # drop duplicates as dTK will call getModAquire() multiple time per individual per timesteps
    output_df = output_df.drop_duplicates(
        subset=[KEY_SIMULATION_TIMESTEP, KEY_INDIVIDUAL_ID])

    if debug:
        res_path = r'./individual_susceptibility.csv'
        if not os.path.exists(os.path.dirname(res_path)):
            os.makedirs(os.path.dirname(res_path))
        output_df.to_csv(res_path)

    return output_df