Python plot_data Beispiele, dtk_sft.plot_data Python Beispiele

Beispiel #1

0

Datei anzeigen

def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    coinfection_mortality_rate_off_art = data[2]
    coinfection_mortality_rate_on_art = data[3]

    died_of_coinfection = "died of CoInfection"
    state_active_symptomatic = "infectionstatechange TBActivation "
    time_to_death_data = []
    active_infections_dictionary = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if died_of_coinfection in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id in active_infections_dictionary.keys():
                    time_to_death_data.append(time_stamp - active_infections_dictionary[ind_id])
                else:
                    success = False
                    outfile.write("BAD: Individual {} died of coinfection without going active, at time {}."
                                  "\n".format(ind_id, time_stamp))
            elif state_active_symptomatic in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                start_time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id in active_infections_dictionary.keys():
                    outfile.write("Individual {} went active symptomatic while already being active symptomatic"
                                  "at time {}. \n".format(ind_id, start_time_stamp))
                else:
                    active_infections_dictionary[ind_id] = start_time_stamp
        # expected_data here only used for graphing purposes
        expected_data = map(int, np.random.exponential(1/coinfection_mortality_rate_off_art, len(time_to_death_data)))
        if not dtk_sft.test_exponential(time_to_death_data, coinfection_mortality_rate_off_art, outfile,
                                        integers=True, roundup=False, round_nearest=False):
            success = False
        outfile.write("Data points checked = {}.\n".format(len(time_to_death_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        dtk_sft.plot_data(sorted(time_to_death_data), sorted(expected_data), label1="Actual", label2="Expected",
                          title="Time from Smear Negative Off ART TBHIV to Death", xlabel="Data Points", ylabel="Days",
                          category="tbhiv_mortality_smear_negative_off_art", line = True, overlap=True)

Beispiel #2

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: yukikatase/EMOD

def create_report_file(report_data_obj, report_name, debug):
    with open(report_name, "w") as outfile:
        success = True
        immunity = calc_immunity(debug)
        new_infections = []
        expected_new_infections = []
        timestep = Outbreak_Start_Day
        if not report_data_obj:
            success = False
            outfile.write("BAD: There is no data in the PropertyReport report")
        else:
            for i in range(Number_Repetitions):
                for j in range(len(KEY_NEW_INFECTIONS_GROUP)):
                    new_infection = report_data_obj[
                        KEY_NEW_INFECTIONS_GROUP[j]][timestep]
                    statistical_population = report_data_obj[
                        KEY_STATISTICAL_POPULATION_GROUP[j]][timestep]
                    expected_new_infection = statistical_population * (
                        1.0 - immunity[j][i])
                    tolerance = 0.0 if expected_new_infection == 0.0 else 2e-2 * statistical_population
                    if math.fabs(new_infection -
                                 expected_new_infection) > tolerance:
                        success = False
                        outfile.write(
                            "BAD: At time step {0}, {1} has {2} reported, expected {3}.\n"
                            .format(timestep, KEY_NEW_INFECTIONS_GROUP[j],
                                    new_infection, expected_new_infection))
                    new_infections.append(new_infection)
                    expected_new_infections.append(expected_new_infection)
                timestep += Timesteps_Between_Repetitions
        outfile.write(sft.format_success_msg(success))
    sft.plot_data(new_infections,
                  expected_new_infections,
                  label1="Actual",
                  label2="Expected",
                  xlabel="group: 0-4 outbreak 1, 5-9 outbreak 2",
                  ylabel="new infection",
                  title="Actual new infection vs. expected new infection",
                  category='New_infections',
                  show=True)
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Beispiel #3

0

Datei anzeigen

def application(output_folder="output",
                stdout_filename="test.txt",
                config_filename="config.json",
                chart_name="InsetChart.json",
                report_name=sft.sft_output_filename,
                debug=False):
    if debug:
        print("output_folder: " + output_folder)
        print("stdout_filename: " + stdout_filename + "\n")
        print("config_filename: " + config_filename + "\n")
        print("chart_name: " + chart_name + "\n")
        print("report_name: " + report_name + "\n")
        print("debug: " + str(debug) + "\n")
    sft.wait_for_done()
    param_obj = load_emod_parameters(config_filename)
    total_timesteps = param_obj[dts.ConfigKeys.KEY_SimulationDuration]

    drug_start_time = param_obj["Drug_Start_Time"]
    start_timestep = param_obj[dts.ConfigKeys.KEY_StartTime]

    # Now process log output (probably) and compare to theory (not in this example) or to another report.
    cum_resistances, initial_resistances = parse_stdout_file(
        drug_start_time, stdout_filename, debug)
    sft.plot_data(cum_resistances,
                  label1="Cumulative Resistances",
                  label2="NA",
                  title="Cumulative Resistances over Time",
                  xlabel="Timestep",
                  ylabel="Resistances",
                  category="Cumulative_Resistances",
                  show=True)
    inset_days = parse_json_report(start_timestep, output_folder, chart_name,
                                   debug)

    # Now we've ingested all relevant inputs, let's do analysis

    create_report_file(cum_resistances, initial_resistances, drug_start_time,
                       param_obj, report_name, inset_days, debug)
    return None

Beispiel #4

0

Datei anzeigen

Datei: dtk_TBHIV_Multicore_Support.py Projekt: yukikatase/EMOD

def parse_json_report(keys,
                      insetchart_name="InsetChart.json",
                      output_folder="output",
                      debug=False):
    """
    creates report_data_obj structure with keys
    :param insetchart_name: file to parse (InsetChart.json)
    :param output_folder:
    :return: report_data_obj structure, dictionary with KEY_NEW_INFECTION etc., keys (e.g.)
    """
    insetchart_path = os.path.join(output_folder, insetchart_name)
    with open(insetchart_path) as infile:
        icj = json.load(infile)["Channels"]
    report_data_obj = {}
    try:
        for key in keys:
            data = icj[key]["Data"]
            report_data_obj[key] = data
        if debug:
            # this plot is for debugging only
            dtk_sft.plot_data(keys[0],
                              dist2=None,
                              label1=keys[0] + " channel",
                              label2="NA",
                              title=keys[0],
                              xlabel="time step",
                              ylabel=keys[0],
                              category=keys[0],
                              show=True,
                              line=False)

            with open("DEBUG_data_InsetChart.json", "w") as outfile:
                json.dump(report_data_obj, outfile, indent=4)
    except Exception as ex:
        print("Failed to parse {0}, got exception: {1}".format(
            insetchart_name, ex))

    return report_data_obj

Beispiel #5

0

Datei anzeigen

def create_report_file(param_obj, report_data_obj, report_name, debug):
    with open(report_name, "w") as outfile:
        success = True
        new_infection_portions = calc_expected_new_infection_portion(
            param_obj, debug)
        new_infections = []
        expected_new_infections = []
        # skip the first outbreak, which gives the natual immunity
        timestep = Outbreak_Start_Day + Timesteps_Between_Repetitions
        for i in range(len(KEY_NEW_INFECTIONS_GROUP)):
            new_infection = report_data_obj[
                KEY_NEW_INFECTIONS_GROUP[i]][timestep]
            statistical_population = report_data_obj[
                KEY_STATISTICAL_POPULATION_GROUP[i]][timestep]
            expected_new_infection = statistical_population * (
                new_infection_portions[i])
            tolerance = 0.0 if expected_new_infection == 0.0 else 2e-2 * statistical_population
            if math.fabs(new_infection - expected_new_infection) > tolerance:
                success = False
                outfile.write(
                    "BAD: At time step {0}, {1} has {2} reported, expected {3}.\n"
                    .format(timestep, KEY_NEW_INFECTIONS_GROUP[i],
                            new_infection, expected_new_infection))
            new_infections.append(new_infection)
            expected_new_infections.append(expected_new_infection)
        outfile.write(sft.format_success_msg(success))
    sft.plot_data(new_infections,
                  expected_new_infections,
                  label1="Actual",
                  label2="Expected",
                  xlabel="0: Control group, 1: Test group",
                  ylabel="new infection",
                  title="Actual new infection vs. expected new infection",
                  category='New_infections',
                  show=True)
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Beispiel #6

0

Datei anzeigen

def application( output_folder="output", stdout_filename="test.txt",
                 config_filename="config.json",campaign_filename="campaign.json",
                 demographics_filename = "demographics_multiplenodes.json",
                 insetchart_name="InsetChart.json",
                 report_name=sft.sft_output_filename,
                 debug=False):
    if debug:
        print( "output_folder: " + output_folder )
        print( "stdout_filename: " + stdout_filename+ "\n" )
        print( "config_filename: " + config_filename + "\n" )
        print( "campaign_filename: " + campaign_filename + "\n" )
        print( "demographics_filename: " + demographics_filename + "\n" )
        print( "insetchart_name: " + insetchart_name + "\n" )
        print( "report_name: " + report_name + "\n" )
        print( "debug: " + str(debug) + "\n" )

    sft.wait_for_done()

    param_obj = ips.load_emod_parameters(config_filename, debug)
    campaign_obj = load_campaign_file(campaign_filename, debug)
    demographics_obj = load_demographics_file(demographics_filename, debug)
    report_data_obj = ips.parse_json_report(output_folder, insetchart_name, debug)

    sft.plot_data(report_data_obj[KEY_NEW_INFECTIONS],
                           title="new infections",
                           label1= "New Infections",
                           label2 = "NA",
                           xlabel="time steps", ylabel="new infection",
                           category = 'New_infections',
                           show = True )
    sft.plot_data(report_data_obj[KEY_STATISTICAL_POPULATION],
                           title="Statistical Population",
                           label1= "Statistical Population",
                           label2 = "NA",
                           xlabel = "time steps", ylabel="Statistical Population",
                           category = 'Statistical_popupation',
                           show = True, line = True)
    create_report_file(param_obj, campaign_obj, demographics_obj, report_data_obj, report_name, debug)

Beispiel #7

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: clorton/EMOD

def create_report_file(param_obj, multipliers, infectiousness, report_name, debug):
    with open(report_name, "w") as outfile:
        success = True
        if not multipliers:
            outfile.write(sft.sft_no_test_data)
        sigma = param_obj[Param_keys.LOGNORMAL_SCALE]
        base_infectivity = param_obj[Param_keys.BASE_INFECTIVITY]
        if sigma > 0:
            mu = - sigma**2 / 2.0
            # test log_normal distribution
            success = sft.test_lognorm(multipliers,mu=mu, sigma=sigma,report_file=outfile,round = False)

            # test mean_l = 1
            mean_l = np.mean(multipliers)
            mean_infectiousness = np.mean(infectiousness)
            outfile.write("mean of the multipliers is {}, expected 1.0.\n".format(mean_l))
            outfile.write("mean of the Infectiousness is {0}, while base infectivity is {1}.\n".format(mean_infectiousness,
                                                                                           base_infectivity))

            tolerance  = 2e-2
            if math.fabs(mean_l - 1.0) > tolerance:
                outfile.write("BAD: mean of the multipliers is {}, expected 1.0.\n".format(mean_l))
                success = False
            # plotting
            size = len(multipliers)
            outfile.write("size is {}\n".format(size))
            scale = math.exp(mu)
            dist_lognormal = stats.lognorm.rvs(sigma, 0, scale, size)
            sft.plot_data(multipliers, dist_lognormal,
                          label1="Emod", label2="Scipy",
                          ylabel="Multiplier", xlabel="data point",
                          category="Emod_vs_Scipy",
                          title="Emod_vs_Scipy, sigma = {}".format(sigma),
                          show=True)
            sft.plot_probability(multipliers, dist_lognormal,
                                 precision=1, label1="Emod", label2="Scipy",
                                 category="Probability_mass_function_Emod_vs_Scipy",
                                 title="Emod_vs_Scipy, sigma = {}".format(sigma),
                                 show=True)
            sft.plot_cdf(multipliers,dist_lognormal,label1="Emod", label2="Scipy",
                                 category="cdf",
                                 title="cdf, sigma = {}".format(sigma),
                                 show=True, line = False)
            if debug:
                with open("scipy_data.txt", "w") as file:
                    for n in sorted(dist_lognormal):
                        file.write(str(n) + "\n")
                with open("emod_data.txt", "w") as file:
                    for n in sorted(multipliers):
                        file.write(str(n) + "\n")
        else:
            # sigma = 0, this feature is disabled
            for multiplier in multipliers:
                if multiplier != 1.0:
                    success = False
                    outfile.write("BAD: multiplier is {0} when {1} set to {2}, expected 1.0.\n".format(multiplier, Param_keys.LOGNORMAL_SCALE, sigma))
            # plotting
            sft.plot_data(multipliers, label1="Multiplier", label2="NA",
                          category="Multiplier", title="Multiplier_Sigma={}".format(sigma),
                          ylabel="Multiplier", xlabel="data point",
                          show=True)
            sft.plot_data(infectiousness, label1="Infectiousness",
                          label2="NA",category="Infectiousness",
                          title="Infectiousness_Sigma={0}_BaseInfectivity={1}".format(sigma,base_infectivity),
                          ylabel="Infectiousness",xlabel="data point",
                          show=True)
        outfile.write(sft.format_success_msg(success))

    if debug:
        print "SUMMARY: Success={0}\n".format(success)
    return success

Beispiel #8

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: yukikatase/EMOD

def create_report_file_incidence(column_diseasedeath, column_hivdeath,
                                 column_year, json_obj, param_obj, reporter_df,
                                 migration_df, node_list, stdout_filename,
                                 report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[trs.Config.config_name]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        simulation_duration = param_obj[Config.duration]
        # timestep = param_obj[Config.simulation_timestep]

        outfile.write(
            "Group the column_to_test by year and get the sum for all age bins:\n"
        )
        # the year column becomes the index of the groupby_df
        groupby_df = reporter_df.groupby(column_year).sum()

        if debug:
            with open("DEBUG_groupby_dataframe.csv", "w") as groupby_file:
                groupby_df.to_csv(groupby_file, header=True)
        outfile.write("checking some test conditions:\n")
        success = tms.check_test_condition(param_obj[Config.num_core],
                                           node_list, migration_df, outfile)

        if platform.system() is 'Windows':
            # in Windows, there is issue when merging multiple stdouts(from different cores) into a single one.
            outfile.write(
                "OS is {0}, let's only compare {1} column in report with {2} channel in insetchart output."
                "\n".format(platform.system(), column_diseasedeath,
                            InsetChart.disease_death))
            # actual test code is outside the if block, result = compare_report_json()
        # skip the following if OS is Windows
        else:
            outfile.write(
                "OS is {}, let's compare report with insetchart and stdout output.\n"
                .format(platform.system()))
            outfile.write("parse stdout({}) file:\n".format(stdout_filename))
            output_dict, exception_message = parse_output_file(
                stdout_filename, debug)
            if exception_message:
                success = False
                outfile.write(exception_message + '\n')
                outfile.write(
                    "parse stdout file failed, let's still compare report with insetchart output.\n"
                )
                # actual test code is outside the first if block, result = compare_report_json()
            # skip the following if we can't parse the stdout file
            else:
                outfile.write(
                    "parse stdout file succeed, let's compare report with both stdout and insetchart output.\n"
                )
                outfile.write("-- compare report with stdout file:\n")
                result1 = True
                if not groupby_df[column_diseasedeath].sum(
                ) and not groupby_df[column_hivdeath].sum():
                    success = False
                    outfile.write(
                        "BAD: there in no {0} or {1} in the report, please check the test.\n"
                        .format(column_diseasedeath, column_hivdeath))
                # skip the following if no interested data in report.
                else:
                    if not len(output_dict):
                        success = False
                        outfile.write(dtk_sft.sft_no_test_data)
                        outfile.write("BAD: stdout file has no test data")
                    # skip the following if parsing stdout doesn't throw exception but there is no test data in stdout file.
                    else:
                        outfile.write(
                            "Testing the {} count with log_valid for all year buckets:\n"
                            .format(column_diseasedeath + " and " +
                                    column_hivdeath))
                        test_columns = [
                            column_diseasedeath, column_hivdeath
                        ]  # two test columns that we are looking at
                        for n in range(len(test_columns)):
                            column_to_test = test_columns[n]
                            i = incidence_count = 0
                            years = groupby_df.index.values
                            incidence_counts = []

                            for t in output_dict:
                                if i < len(years):
                                    year = years[i]
                                    if t <= round(year * dtk_sft.DAYS_IN_YEAR):
                                        for core in output_dict[t]:
                                            incidence_count += output_dict[t][
                                                core][n]
                                    else:  # after the last time step of the reporting window
                                        reporter_sum = int(
                                            groupby_df[groupby_df.index ==
                                                       year][column_to_test])
                                        incidence_counts.append(
                                            incidence_count
                                        )  # collected for plot method
                                        if incidence_count != reporter_sum:
                                            success = result1 = False
                                            outfile.write(
                                                "BAD: in year {0} the {1} count get from reporter is {2}, while test.txt reports"
                                                " {3} cases.\n".format(
                                                    year, column_to_test,
                                                    reporter_sum,
                                                    incidence_count))
                                        incidence_count = 0  # initialize for next test window
                                        for core in output_dict[
                                                t]:  # collect the first time step data for each time window
                                            incidence_count += output_dict[t][
                                                core][n]
                                        i += 1
                                else:
                                    break

                            dtk_sft.plot_data(
                                incidence_counts,
                                dist2=np.array(groupby_df[column_to_test]),
                                label1="log_valid",
                                label2="reporter",
                                title=str(column_to_test),
                                xlabel="every half year",
                                ylabel=str(column_to_test),
                                category=str(column_to_test) + "_log_valid",
                                show=True,
                                line=False,
                                alpha=0.8,
                                overlap=True)

                            outfile.write(
                                "Testing whether the reporter year matches the simulation duration:\n"
                            )
                            if i != len(years):
                                success = result1 = False
                                outfile.write(
                                    "BAD: the reporter has data up to year {0} but the simulation duration is {1}, "
                                    "we are expecting not more than year {2} from reporter."
                                    "".format(
                                        max(years), simulation_duration,
                                        math.floor(simulation_duration / 180)))

                            if simulation_duration > round(
                                    max(years) * dtk_sft.DAYS_IN_YEAR) + 180:
                                success = result1 = False
                                outfile.write(
                                    "BAD: the reporter has data up to year {0} but the simulation duration is {1}, "
                                    "we are expecting data after year {0} from reporter."
                                    "".format(max(years), simulation_duration))
                        outfile.write(
                            "compare report with stdout file result is {}.\n".
                            format(result1))

                        outfile.write(
                            "-- checking if all cores are reporting in every time step in stdout file:\n"
                        )
                        core_list = [
                            int(n) for n in (range(param_obj[Config.num_core]))
                        ]
                        result2 = True
                        for t, cores in output_dict.items():
                            cores = list(cores.keys())
                            if core_list != sorted(
                                    cores):  # compare two list of cores
                                result2 = success = False
                                outfile.write(
                                    "BAD: at time step {0}, these cores reported to stdout.txt are: {1}, while "
                                    "expected cores are: {2}.\n".format(
                                        t, cores, core_list))
                        outfile.write(
                            "checking if all cores are reporting in every time step in stdout file: result is "
                            "{}.\n".format(result2))

                    outfile.write(
                        "-- compare {} report with json output(insetchart).\n".
                        format(column_diseasedeath))
                    # actual test code is outside the first if block, result = compare_report_json()

        result = tms.compare_report_json(column_diseasedeath,
                                         InsetChart.disease_death, groupby_df,
                                         json_obj, outfile)
        if not result:
            success = False
            outfile.write("BAD: report doesn't match insetchart.\n")
        outfile.write(
            "compare {0} in report with json output result is {1}.\n".format(
                column_diseasedeath, result))
        outfile.write(dtk_sft.format_success_msg(success))

        if debug:
            print("SUMMARY: Success={0}\n".format(success))
        return success

Beispiel #9

0

Datei anzeigen

def create_report_file(param_obj, campaign_obj, report_data_obj, report_name,
                       debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        base_infectivity = param_obj[KEY_BASE_INFECTIVITY]
        start_day = campaign_obj[KEY_START_DAY]
        new_infection = report_data_obj[KEY_NEW_INFECTION]
        immunity_acquisition_factor = param_obj[
            KEY_IMMUNITY_ACQUISITION_FACTOR]
        # calculate expected number of infections for a time period of 3 months:
        number_of_month = 3
        expected_new_infection = base_infectivity * dtk_sft.DAYS_IN_MONTH * number_of_month * immunity_acquisition_factor
        expected = [expected_new_infection
                    ] * (dtk_sft.MONTHS_IN_YEAR // number_of_month)
        # group new infections for every 3 months:
        value_to_test = []
        if len(new_infection) < start_day + dtk_sft.DAYS_IN_YEAR:
            success = False
            outfile.write(
                "BAD: the simulation duration is too short, please make sure it's at least {} days.\n"
                .format(start_day + dtk_sft.DAYS_IN_YEAR))
        outfile.write(
            "running chi-squared test for expected new infections for {0} {1}-months time bins: \n"
            "base_infectivity = {2}, immunity_acquisition_factor = {3}.\n".
            format(dtk_sft.MONTHS_IN_YEAR // number_of_month, number_of_month,
                   base_infectivity, immunity_acquisition_factor))
        actual_new_infection = 0
        i = 0
        for t in range(start_day, len(new_infection)):
            actual_new_infection += new_infection[t]
            i += 1
            if not i % (number_of_month * dtk_sft.DAYS_IN_MONTH):
                value_to_test.append(actual_new_infection)
                actual_new_infection = 0
        dtk_sft.plot_data(
            value_to_test,
            dist2=expected,
            label1="actual_new_infections",
            label2="expected_new_infection",
            title="actual vs. expected new infection for every {} months".
            format(number_of_month),
            xlabel="every {} months".format(number_of_month),
            ylabel="# of new infections",
            category='actual_vs_expected_new_infections',
            show=True,
            line=True)
        result = dtk_sft.test_multinomial(dist=value_to_test,
                                          proportions=expected,
                                          report_file=outfile,
                                          prob_flag=False)

        if not result:
            success = False
            outfile.write(
                "BAD: The Chi-squared test for number of new infections in every {} months failed.\n"
                .format(number_of_month))
        else:
            outfile.write(
                "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n"
                .format(number_of_month))
        outfile.write(dtk_sft.format_success_msg(success))

    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Beispiel #10

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: yukikatase/EMOD

def create_report_file(param_obj, output_dict, reporter_df, report_name,
                       debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[Config.config_name]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        simulation_duration = param_obj[Config.duration]
        timestep = param_obj[Config.simulation_timestep]
        if not len(output_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)
        else:
            outfile.write(
                "Group the incidence by year and get the sum for all age bins:\n"
            )
            # the year column becomes the index of the groupby_df
            groupby_df = reporter_df.groupby(ReportColumn.year).sum()

            if debug:
                with open("DEBUG_groupby_dataframe.csv", "w") as groupby_file:
                    groupby_df.to_csv(groupby_file, header=True)

            expected_max_time_step = math.floor(
                simulation_duration / timestep) * timestep
            if simulation_duration <= 180 or expected_max_time_step <= 180:
                success = False
                outfile.write(
                    "BAD: the simulation duration is too short, please increase the duration.\n"
                )

            elif not groupby_df[ReportColumn.incidence].sum():
                success = False
                outfile.write(
                    "BAD: there in no TB incidence in the test, please check the test.\n"
                )

            else:
                outfile.write(
                    "Testing the incidence count with log_valid for all year buckets:\n"
                )
                i = incidence_count = 0
                years = groupby_df.index.values
                incidence_counts = []

                for t in output_dict:
                    if i < len(years):
                        year = years[i]
                        if t <= round(year * dtk_sft.DAYS_IN_YEAR):
                            incidence_count += output_dict[t]
                        else:
                            reporter_sum = int(
                                groupby_df[groupby_df.index == year][
                                    ReportColumn.incidence])
                            incidence_counts.append(incidence_count)
                            if incidence_count != reporter_sum:
                                success = False
                                outfile.write(
                                    "BAD: in year {0} the incidence count get from reporter is {1}, while test.txt reports"
                                    " {2} cases.\n".format(
                                        year, reporter_sum, incidence_count))
                            incidence_count = output_dict[t]
                            i += 1
                    else:
                        break

                dtk_sft.plot_data(incidence_counts,
                                  dist2=np.array(
                                      groupby_df[ReportColumn.incidence]),
                                  label1="reporter",
                                  label2="log_valid",
                                  title="incidence",
                                  xlabel="every half year",
                                  ylabel="incidence",
                                  category='incidence',
                                  show=True,
                                  line=False,
                                  alpha=0.8)

                outfile.write(
                    "Testing whether the time step in log mathces the simulation duration:\n"
                )
                max_time_step = max(output_dict.keys())
                if max_time_step != expected_max_time_step:
                    success = False
                    outfile.write(
                        "BAD: the last time step in simulation is {0}, expected {1}."
                        "\n".format(max_time_step, expected_max_time_step))

                outfile.write(
                    "Testing whether the reporter year matches the simulation duration:\n"
                )
                if i != len(years):
                    success = False
                    outfile.write(
                        "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting "
                        "not more than year {2} from reporter.\n".format(
                            max(years), simulation_duration,
                            math.floor(simulation_duration /
                                       dtk_sft.DAYS_IN_YEAR)))
                    outfile.write("i={0}, len(years)={1}\n".format(
                        i, len(years)))
                if simulation_duration > round(
                        max(years) * dtk_sft.DAYS_IN_YEAR) + 180:
                    success = False
                    outfile.write(
                        "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting "
                        "data after year {0} from reporter.\n".format(
                            max(years), simulation_duration))

        outfile.write(dtk_sft.format_success_msg(success))

        if debug:
            print("SUMMARY: Success={0}\n".format(success))
        return success

Beispiel #11

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: yukikatase/EMOD

def create_report_file(param_obj, campaign_obj, output_df, reporter,
                       report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[Config.config_name]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        simulation_duration = param_obj[Config.duration]
        timestep = param_obj[Config.simulation_timestep]
        if not len(output_df):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)
        # reporter[1] is a boolean.
        # True means parse_custom_reporter succeed and reporter[0] is a dataframe collected from the csv report.
        # False measn parse_custom_reporter failed and reporter[0] is an error message
        elif not reporter[1]:
            success = False
            outfile.write(
                "BAD: failed to parse report, get exception : {}.\n".format(
                    reporter[0]))
        else:
            outfile.write("GOOD: parse report successfully.\n")
            reporter_df = reporter[0]
            outfile.write(
                "Group the custom envent column by year and get the sum for all age bins:\n"
            )
            # the year column becomes the index of the groupby_df
            groupby_df = reporter_df.groupby(ReportColumn.year).sum()

            if debug:
                with open("DEBUG_groupby_dataframe.csv", "w") as groupby_file:
                    groupby_df.to_csv(groupby_file, header=True)

            outfile.write("Checking whether we have enough test data:\n")
            expected_max_time_step = math.floor(
                simulation_duration / timestep) * timestep
            if simulation_duration <= 180 or expected_max_time_step <= 180:
                success = False
                outfile.write(
                    "BAD: the simulation duration is too short, please increase the duration.\n"
                )
            elif (not groupby_df[ReportColumn.negative].sum()) and (
                    not groupby_df[ReportColumn.default].sum()):
                success = False
                outfile.write(
                    "BAD: there in no {0} and {1} in the test, please check the test.\n"
                    .format(ReportColumn.negative, ReportColumn.default))
            else:
                outfile.write("Checking more test condition:\n")
                treatment_fraction = float(campaign_obj[Campaign.treatment])
                outfile.write("{0} in Campaign.json is {1}.\n".format(
                    Campaign.treatment, treatment_fraction))
                if treatment_fraction == 1:
                    test_treatment_only = False
                    outfile.write(
                        "Testing the {0} and {1} count with log_valid for all year buckets:\n"
                        .format(ReportColumn.negative, ReportColumn.default))

                else:
                    test_treatment_only = True
                    outfile.write(
                        "Testing the {0} count with log_valid for all year buckets:\n"
                        .format(ReportColumn.default))

                i = default_count = negative_count = 0
                years = groupby_df.index.values
                negative_counts = []
                default_counts = []

                for t in output_df.index.values.tolist():
                    if i < len(years):
                        year = years[i]
                        if t <= round(year * dtk_sft.DAYS_IN_YEAR):
                            default_count += output_df.loc[t][
                                ReportColumn.default]
                            negative_count += output_df.loc[t][
                                ReportColumn.negative]
                        else:
                            reporter_negative_sum = int(
                                groupby_df[groupby_df.index == year][
                                    ReportColumn.negative])
                            reporter_default_sum = int(
                                groupby_df[groupby_df.index == year][
                                    ReportColumn.default])
                            negative_counts.append(negative_count)
                            default_counts.append(default_count)

                            if not test_treatment_only:
                                if negative_count != reporter_negative_sum:
                                    success = False
                                    outfile.write(
                                        "BAD: in year {0} the {1} count get from reporter is {2}, while "
                                        "test.txt reports {3} cases.\n".format(
                                            year, ReportColumn.negative,
                                            reporter_negative_sum,
                                            negative_count))
                            if default_count != reporter_default_sum:
                                success = False
                                outfile.write(
                                    "BAD: in year {0} the {1} count get from reporter is {2}, while test.txt reports"
                                    " {3} cases.\n".format(
                                        year, ReportColumn.default,
                                        reporter_default_sum, default_count))
                            default_count = output_df.loc[t][
                                ReportColumn.default]
                            negative_count = output_df.loc[t][
                                ReportColumn.negative]
                            i += 1
                    else:
                        break

                if not test_treatment_only:
                    dtk_sft.plot_data(negative_counts,
                                      dist2=np.array(
                                          groupby_df[ReportColumn.negative]),
                                      label1="reporter",
                                      label2="log_valid",
                                      title=ReportColumn.negative,
                                      xlabel="every half year",
                                      ylabel=ReportColumn.negative,
                                      category=ReportColumn.negative,
                                      show=True,
                                      line=True,
                                      alpha=0.8,
                                      overlap=True)
                dtk_sft.plot_data(default_counts,
                                  dist2=np.array(
                                      groupby_df[ReportColumn.default]),
                                  label1="reporter",
                                  label2="log_valid",
                                  title=ReportColumn.default,
                                  xlabel="every half year",
                                  ylabel=ReportColumn.default,
                                  category=ReportColumn.default,
                                  show=True,
                                  line=True,
                                  alpha=0.8,
                                  overlap=True)

                outfile.write(
                    "Testing whether the time step in log mathces the simulation duration:\n"
                )
                max_time_step = max(output_df.index.values)
                if abs(max_time_step - expected_max_time_step) > 1:
                    success = False
                    outfile.write(
                        "BAD: the last time step in simulation is {0}, expected {1}."
                        "\n".format(max_time_step, expected_max_time_step))

                outfile.write(
                    "Testing whether the reporter year matches the simulation duration:\n"
                )
                if i != len(years):
                    success = False
                    outfile.write(
                        "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting "
                        "not more than year {2} from reporter.".format(
                            max(years), simulation_duration,
                            math.floor(simulation_duration / 180)))
                if simulation_duration > round(
                        max(years) * dtk_sft.DAYS_IN_YEAR) + 180:
                    success = False
                    outfile.write(
                        "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting "
                        "data after year {0} from reporter.".format(
                            max(years), simulation_duration))

        outfile.write(dtk_sft.format_success_msg(success))

        if debug:
            print("SUMMARY: Success={0}\n".format(success))
        return success

Beispiel #12

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: yukikatase/EMOD

def create_report_file(report_data_obj, report_name, debug):
    with open(report_name, "w") as outfile:
        success = True
        timestep = Outbreak_Start_Day
        effect = prime
        new_infections = []
        statistical_populations = []
        disease_deaths = []
        for i in range(len(KEY_NEW_INFECTIONS_GROUP)):
            new_infection = report_data_obj[
                KEY_NEW_INFECTIONS_GROUP[i]][timestep]
            statistical_population = report_data_obj[
                KEY_STATISTICAL_POPULATION_GROUP[i]][timestep]
            disease_death = report_data_obj[KEY_DISEASE_DEATHS_GROUP[i]][int(
                timestep + i / 2
            )]  # disease death in the last two groups happen 1 day later than the first two groups.
            new_infections.append(new_infection)
            statistical_populations.append(statistical_population)
            disease_deaths.append(disease_death)
        # test acquisition blocking
        new_infection_seed_test = new_infections[1]
        statistical_population_seed_test = statistical_populations[1]
        expected_new_infection_seed_test = statistical_population_seed_test * (
            1.0 - effect)
        tolerance_1 = 0.0 if expected_new_infection_seed_test == 0.0 else 2e-2 * statistical_population_seed_test
        if math.fabs(new_infection_seed_test -
                     expected_new_infection_seed_test) > tolerance_1:
            success = False
            outfile.write(
                "BAD: At time step {0}, {1} reported new infections in Group 2_Seed_Test, expected {2}.\n"
                .format(timestep, new_infection_seed_test,
                        expected_new_infection_seed_test))
        # test transmission blocking
        new_infection_seed_control = new_infections[0]
        new_infection_control = new_infections[2]
        new_infection_test = new_infections[3]
        expected_new_infection_test = (
            1.0 -
            effect) * new_infection_control * new_infection_seed_test / float(
                new_infection_seed_control)
        statistical_population_test = statistical_populations[3]
        tolerance_2 = 0.0 if expected_new_infection_test == 0.0 else 2e-2 * statistical_population_test
        if math.fabs(new_infection_test -
                     expected_new_infection_test) > tolerance_2:
            success = False
            outfile.write(
                "BAD: At time step {0}, {1} reported new infections in Group 4_Test, expected {2}.\n"
                .format(timestep, new_infectio_test,
                        expected_new_infection_test))
        #test mortality blocking
        disease_death_seed_test = disease_deaths[1]
        expected_disease_death_seed_test = new_infection_seed_test * (1.0 -
                                                                      effect)
        tolerance_3 = 0.0 if expected_disease_death_seed_test == 0.0 else 2e-2 * new_infection_seed_test
        if math.fabs(disease_death_seed_test -
                     expected_disease_death_seed_test) > tolerance_3:
            success = False
            outfile.write(
                "BAD: At time step {0}, {1} reported disease deaths in Group 2_Seed_Test, expected {2}.\n"
                .format(timestep, disease_death_seed_test,
                        expected_disease_death_seed_test))
        outfile.write(sft.format_success_msg(success))
    sft.plot_data(
        new_infections,
        disease_deaths,
        label1="new_infections",
        label2="disease_death",
        xlabel="0:1_Seed_Control, 1:2_Seed_Test, 2:3_Control, 4:3_Test",
        title="new_infections vs. disease_death",
        category='New_infections_vs_disease_death',
        show=True)
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Beispiel #13

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: yukikatase/EMOD

def create_report_file(report_data_obj, report_name, debug):
    with open(report_name, "w") as outfile:
        success = True
        timestep = Outbreak_Start_Day
        effects = calc_effect(debug)
        new_infections = []
        new_disease_deaths = []
        expected_new_disease_deaths = []
        actual_effects = []
        pre_disease_death = 0

        for i in range(len(Interventions)):
            new_infection = report_data_obj[KEY_NEW_INFECTIONS][timestep]
            disease_death = report_data_obj[KEY_DISEASE_DEATHS][timestep]
            new_disease_death = disease_death - pre_disease_death
            effect = effects[i]
            expected_new_disease_death = (1.0 - effect) * new_infection
            tolerance = 0.0 if expected_new_disease_death == 0.0 else 3e-2 * new_infection
            actual_effect = 1.0 - new_disease_death / float(
                new_infection) if new_infection != 0 else 0.0
            if math.fabs(new_disease_death -
                         expected_new_disease_death) > tolerance:
                success = False
                outfile.write(
                    "BAD: At time step {0}, outbreak {1}, {2} reported new disease death, expected {3}.\n"
                    .format(timestep, Interventions[i], new_disease_death,
                            expected_new_disease_death))
                outfile.write(
                    "actual MortalityBlocking effect is {0}, expected {1}.\n".
                    format(actual_effect, effect))
            new_disease_deaths.append(new_disease_death)
            expected_new_disease_deaths.append(expected_new_disease_death)
            actual_effects.append(actual_effect)
            new_infections.append(new_infection)
            timestep += Timesteps_Between_Repetitions
            pre_disease_death = disease_death
        sft.plot_data(new_disease_deaths,
                      expected_new_disease_deaths,
                      label1="Actual",
                      label2="Expected",
                      xlabel="outbreak",
                      ylabel="disease death",
                      title="Actual disease death vs. expected disease death",
                      category='Disease_death',
                      show=True)
        sft.plot_data(new_disease_deaths,
                      new_infections,
                      label1="death",
                      label2="infection",
                      xlabel="outbreak",
                      ylabel="population",
                      title="Actual disease death vs. new infections",
                      category='disease_death_vs_new_infections',
                      show=True)
        if debug:
            with open("New_disease_death.txt", "w") as file:
                for i in range(len(new_disease_deaths)):
                    file.write("{0}, {1}.\n".format(
                        new_disease_deaths[i], expected_new_disease_deaths[i]))
            with open("Effects.txt", "w") as file:
                for i in range(len(actual_effects)):
                    file.write("{0}, {1}.\n".format(actual_effects[i],
                                                    effects[i]))

        outfile.write(sft.format_success_msg(success))
        if debug:
            print("SUMMARY: Success={0}\n".format(success))
        return success

Beispiel #14

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: yukikatase/EMOD

def create_report_file(param_obj, node_list, campaign_obj, migration_df,
                       report_data_obj, stdout_filename, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        base_infectivity = param_obj[KEY_BASE_INFECTIVITY]
        start_day = campaign_obj[KEY_START_DAY]
        new_infection = report_data_obj[KEY_NEW_INFECTION]
        immunity_acquisition_factor = param_obj[
            KEY_IMMUNITY_ACQUISITION_FACTOR]
        decay_rate = param_obj[KEY_DECAY_RATE]

        outfile.write("checking some test conditions:\n")
        outfile.write("  -- simulation duration: {} days\n".format(
            len(new_infection)))
        if len(new_infection) < start_day + 1 + dtk_sft.DAYS_IN_YEAR:
            success = False
            outfile.write(
                "BAD: the simulation duration is too short, please make sure it's at least {} days.\n"
                .format(start_day + 1 + dtk_sft.DAYS_IN_YEAR))

        result = tms.check_test_condition(param_obj[KEY_NUM_CORES], node_list,
                                          migration_df, outfile)
        if not result:
            success = False
            # summary message is writen to the report file in the check_test_condition function

        number_of_month = 1
        outfile.write(
            "calculate expected number of infections for a time period of {} month("
            "unit is 1/years):\n".format((number_of_month)))
        t_initial = 0
        expected = []
        decay_rate *= dtk_sft.DAYS_IN_YEAR
        base_infectivity *= dtk_sft.DAYS_IN_YEAR
        step = number_of_month / dtk_sft.MONTHS_IN_YEAR
        for t_final in np.arange(step, 1.01, step):
            expected_new_infection = base_infectivity * (
                t_final - t_initial) - base_infectivity * (
                    1.0 - immunity_acquisition_factor) / decay_rate * math.exp(
                        -1 * decay_rate *
                        t_initial) * (1.0 - math.exp(-1 * decay_rate *
                                                     (t_final - t_initial)))
            expected_new_infection *= len(node_list)
            expected.append(expected_new_infection)
            t_initial = t_final
        # group new infections for every month:
        value_to_test = []
        outfile.write(
            "running chi-squared test for actual vs expected new infections for {0} {1}-months time bins: \n"
            "base_infectivity = {2}, immunity_acquisition_factor = {3}, decay rate = {4}.(unit is 1/years)\n"
            .format(dtk_sft.MONTHS_IN_YEAR // number_of_month, number_of_month,
                    base_infectivity, immunity_acquisition_factor, decay_rate))
        actual_new_infection = 0
        i = 0
        for t in range(start_day + 1, len(new_infection)):
            actual_new_infection += new_infection[t]
            i += 1
            if not i % (number_of_month * dtk_sft.DAYS_IN_MONTH):
                value_to_test.append(actual_new_infection)
                actual_new_infection = 0
        dtk_sft.plot_data(
            value_to_test,
            dist2=expected,
            label1="actual_new_infections",
            label2="expected_new_infection",
            title="actual vs. expected new infection for every {} month".
            format(number_of_month),
            xlabel="month",
            ylabel="# of new infections",
            category='actual_vs_expected_new_infections',
            show=True,
            line=False)
        result = dtk_sft.test_multinomial(dist=value_to_test,
                                          proportions=expected,
                                          report_file=outfile,
                                          prob_flag=False)

        if not result:
            success = False
            outfile.write(
                "BAD: The Chi-squared test for number of new infections in every {} months failed.\n"
                .format(number_of_month))
        else:
            outfile.write(
                "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n"
                .format(number_of_month))

        output_dict = parse_output_file(stdout_filename, debug)
        outfile.write(
            "checking if all cores are reporting in every time step in stdout file:\n"
        )
        core_list = [str(n) for n in (range(param_obj[KEY_NUM_CORES]))]
        for t, cores in output_dict.items():
            if core_list != sorted(cores):
                success = False
                outfile.write(
                    "BAD: at time step {0}, these cores reported to stdout.txt are: {1}, while "
                    "expected cores are: {2}.\n".format(t, cores, core_list))

        outfile.write(dtk_sft.format_success_msg(success))

    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Beispiel #15

0

Datei anzeigen

def create_report_file_incidence(column_to_test, column_year, param_obj,
                                 output_dict, reporter_df, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[Config.config_name]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        simulation_duration = param_obj[Config.duration]
        # timestep = param_obj[Config.simulation_timestep]
        if not len(output_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)
        else:
            outfile.write(
                "Group the {} by year and get the sum for all age bins:\n".
                format(column_to_test))
            # the year column becomes the index of the groupby_df
            groupby_df = reporter_df.groupby(column_year).sum()

            if debug:
                with open("DEBUG_groupby_dataframe.csv", "w") as groupby_file:
                    groupby_df.to_csv(groupby_file, header=True)

            if not groupby_df[column_to_test].sum():
                success = False
                outfile.write(
                    "BAD: there in no {} in the test, please check the test.\n"
                    .format(column_to_test))

            else:
                outfile.write(
                    "Testing the {} count with log_valid for all year buckets:\n"
                    .format(column_to_test))
                i = incidence_count = 0
                years = groupby_df.index.values
                incidence_counts = []

                for t in output_dict:
                    if i < len(years):
                        year = years[i]
                        if t <= round(year * dtk_sft.DAYS_IN_YEAR):
                            incidence_count += output_dict[t]
                        else:
                            reporter_sum = int(groupby_df[
                                groupby_df.index == year][column_to_test])
                            incidence_counts.append(incidence_count)
                            if incidence_count != reporter_sum:
                                success = False
                                outfile.write(
                                    "BAD: in year {0} the {1} count get from reporter is {2}, while test.txt reports"
                                    " {3} cases.\n".format(
                                        year, column_to_test, reporter_sum,
                                        incidence_count))
                            incidence_count = output_dict[t]
                            i += 1
                    else:
                        break

                dtk_sft.plot_data(incidence_counts,
                                  dist2=np.array(groupby_df[column_to_test]),
                                  label1="reporter",
                                  label2="log_valid",
                                  title=str(column_to_test),
                                  xlabel="every half year",
                                  ylabel=str(column_to_test),
                                  category=str(column_to_test),
                                  show=True,
                                  line=False,
                                  alpha=0.8,
                                  overlap=True)

                outfile.write(
                    "Testing whether the reporter year matches the simulation duration:\n"
                )
                if i != len(years):
                    success = False
                    outfile.write(
                        "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting "
                        "not more than year {2} from reporter.".format(
                            max(years), simulation_duration,
                            math.floor(simulation_duration / 180)))
                if simulation_duration > round(
                        max(years) * dtk_sft.DAYS_IN_YEAR) + 180:
                    success = False
                    outfile.write(
                        "BAD: the reporter has data up to year {0} but the simulation duration is {1}, we are expecting "
                        "data after year {0} from reporter.".format(
                            max(years), simulation_duration))

        outfile.write(dtk_sft.format_success_msg(success))

        if debug:
            print("SUMMARY: Success={0}\n".format(success))
        return success

Beispiel #16

0

Datei anzeigen

def create_report_file_prevalence(column_to_test, column_year, param_obj,
                                  output_dict, reporter_df, report_name,
                                  debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[Config.config_name]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        # simulation_duration = param_obj[Config.duration]
        # timestep = param_obj[Config.simulation_timestep]
        if not len(output_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)

        outfile.write(
            "Group the {} prevalence by year and get the sum for all age bins:\n"
            .format(column_to_test))
        # the year column becomes the index of the groupby_df
        groupby_df = reporter_df.groupby(column_year).sum()

        if not groupby_df[column_to_test].sum():
            success = False
            outfile.write(
                "BAD: there in no {} prevalence in the test, please check the test.\n"
                .format(column_to_test))

        if debug:
            with open("DEBUG_groupby_dataframe.csv", "w") as groupby_file:
                groupby_df.to_csv(groupby_file, header=True)

        outfile.write(
            "Testing the {} prevalence count with log_valid for all year buckets:\n"
            .format(column_to_test))
        i = prevalence_sum = 0
        years = groupby_df.index.values
        prevalence_counts = []
        prevalence_at_last_time_step = {}

        for t in output_dict:
            if i < len(years):
                year = years[i]
                if t <= round(year * dtk_sft.DAYS_IN_YEAR):
                    prevalence_sum += output_dict[t]
                    if t == round(year * dtk_sft.DAYS_IN_YEAR):
                        prevalence_at_last_time_step[t] = output_dict[t]
                else:
                    reporter_sum = int(
                        groupby_df[groupby_df.index == year][column_to_test])
                    prevalence = prevalence_sum / (years[0] *
                                                   dtk_sft.DAYS_IN_YEAR)
                    prevalence_counts.append(prevalence)
                    # uncomment the following lines to test average prevalence
                    # if prevalence != reporter_sum:
                    #     success = False
                    #     outfile.write("BAD: in year {0} the HIV prevalence count get from reporter is {1}, while test.txt reports"
                    #                   " {2} cases.\n".format(year, reporter_sum, prevalence))
                    if prevalence_at_last_time_step[sorted(
                            prevalence_at_last_time_step.keys())
                                                    [-1]] != reporter_sum:
                        success = False
                        outfile.write(
                            "BAD: in year {0} the {1} prevalence count get from reporter is {2}, while test.txt reports"
                            " {3} cases at the last time step of this report time window.\n"
                            .format(
                                year, column_to_test, reporter_sum,
                                prevalence_at_last_time_step[sorted(
                                    prevalence_at_last_time_step.keys())[-1]]))
                    prevalence_sum = output_dict[t]
                    i += 1
            else:
                break
        # uncomment the following lines to plot average prevalence from logging and prevalence from reporter
        # dtk_sft.plot_data(prevalence_counts, dist2=np.array(groupby_df[ReportColumn.HIV]), label1="log_valid_on_average",
        #                            label2="reporter", title="HIV prevalence",
        #                            xlabel="every half year", ylabel="HIV prevalence", category='HIV_prevalence',
        #                            show=True, line=True, alpha=0.8, overlap=True)
        dtk_sft.plot_data(
            [
                prevalence_at_last_time_step[key]
                for key in sorted(prevalence_at_last_time_step.keys())
            ],
            dist2=np.array(groupby_df[column_to_test]),
            label1="log_valid",
            label2="reporter",
            title="{} prevalence at last timestep of each report time window".
            format(column_to_test),
            xlabel="every half year",
            ylabel="{} prevalence".format(column_to_test),
            category='{}_prevalence_last_time_step'.format(column_to_test),
            show=True,
            line=True,
            alpha=0.8,
            overlap=True)
        if debug:
            with open('DEBUG_prevalence_at_last_time_step.json', 'w') as file:
                json.dump(prevalence_at_last_time_step, file, indent=4)
        outfile.write(dtk_sft.format_success_msg(success))

    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Beispiel #17

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: yukikatase/EMOD

def create_report_file(report_data_obj, report_name, debug):
    with open(report_name, "w") as outfile:
        success = True
        timestep = Outbreak_Start_Day
        effect_a = Prime_Acquire
        effect_t = Prime_Transmit
        effect_m = Prime_Mortality
        new_infections = []
        statistical_populations = []
        new_disease_deaths = []
        for x in range(Number_Repetitions):
            num_group = len(KEY_NEW_INFECTIONS_GROUP)
            for i in range(num_group):
                new_infection = report_data_obj[KEY_NEW_INFECTIONS_GROUP[i]][timestep]
                statistical_population = report_data_obj[KEY_STATISTICAL_POPULATION_GROUP[i]][timestep]
                pre_disease_death = report_data_obj[KEY_DISEASE_DEATHS_GROUP[i]][int(timestep + i/2 - 1)] # disease death in the last 2 groups happen 1 day later than the first 2 groups.
                disease_death = report_data_obj[KEY_DISEASE_DEATHS_GROUP[i]][int(timestep + i/2)]
                new_disease_death = disease_death - pre_disease_death
                new_infections.append(new_infection)
                statistical_populations.append(statistical_population)
                new_disease_deaths.append(new_disease_death)
            # test acquisition blocking
            new_infection_seed_test = new_infections[1 + x * num_group]
            statistical_population_seed_test = statistical_populations[1 + x * num_group]
            expected_new_infection_seed_test = statistical_population_seed_test * (1.0 - effect_a) * Outbreak_Demographic_Coverage
            tolerance_1 = 0.0 if expected_new_infection_seed_test == 0.0 else 2e-2 * statistical_population_seed_test
            if math.fabs(new_infection_seed_test - expected_new_infection_seed_test) > tolerance_1:
                success = False
                outfile.write("BAD: At time step {0}, {1} reported new infections in Group 2_Seed_Test, expected {2}.\n".format(
                    timestep, new_infection_seed_test, expected_new_infection_seed_test))
            # test transmission blocking
            new_infection_seed_control = new_infections[0 + x * num_group]
            new_infection_control = new_infections[2 + x * num_group]
            new_infection_test = new_infections[3+ x * num_group]
            expected_new_infection_test = (1.0 - effect_t) * new_infection_control * new_infection_seed_test/float(new_infection_seed_control)
            statistical_population_test = statistical_populations[3]
            tolerance_2 = 0.0 if expected_new_infection_test == 0.0 else 2e-2 * statistical_population_test
            if math.fabs(new_infection_test - expected_new_infection_test) > tolerance_2:
                success = False
                outfile.write("BAD: At time step {0}, {1} reported new infections in Group 4_Test, expected {2}.\n".format(
                    timestep, new_infection_test, expected_new_infection_test))
            #test mortality blocking
            disease_death_seed_test = new_disease_deaths[1 + x * num_group]
            expected_disease_death_seed_test = new_infection_seed_test * (1.0 - effect_m)
            tolerance_3 = 0.0 if expected_disease_death_seed_test == 0.0 else 2e-2 * new_infection_seed_test
            if math.fabs(disease_death_seed_test - expected_disease_death_seed_test) > tolerance_3:
                success = False
                outfile.write("BAD: At time step {0}, {1} reported disease deaths in Group 2_Seed_Test, expected {2}.\n".format(
                    timestep, disease_death_seed_test, expected_disease_death_seed_test))
            timestep += Timesteps_Between_Repetitions
            effect_a = effect_a + (1.0 - effect_a) * Boost_Acquire
            effect_t = effect_t + (1.0 - effect_t) * Boost_Transmit
            effect_m = effect_m + (1.0 - effect_m) * Boost_Mortality
        outfile.write(sft.format_success_msg(success))
    sft.plot_data(new_infections,new_disease_deaths,
                           label1= "new_infections", label2 = "disease_death",
                           xlabel= "0&4:Seed_Control, 1&5:Seed_Test, 2&6:Control, 4&7:Test",
                           title = "new_infections vs. new_disease_death",
                           category = 'New_infections_vs_new_disease_death',show = True )
    if debug:
        print( "SUMMARY: Success={0}\n".format(success) )
    return success

Beispiel #18

0

Datei anzeigen

def create_report_file(param_obj, output_df, report_df, report_name, debug):
    total_timesteps = int(param_obj[KEY_TOTAL_TIMESTEPS])
    simulation_timestep = float(param_obj[KEY_SIMULATION_TIMESTEP])
    base_infectivity = float(param_obj[KEY_BASE_INFECTIVITY])
    baseline = float(param_obj[KEY_INFECTIVITY_EXPONENTIAL_BASELINE])
    delay = float(param_obj[KEY_INFECTIVITY_EXPONENTIAL_DELAY])
    rate = float(param_obj[KEY_INFECTIVITY_EXPONENTIAL_RATE])
    infected = output_df[KEY_INFECTED]
    infectiousness = output_df[KEY_INFECTIOUSNESS]
    statpop = output_df[KEY_STAT_POP]
    new_infections = report_df[KEY_NEW_INFECTIONS]
    cumulative_infections = report_df[KEY_CUMULATIVE_INFECTIONS]
    dtk_sft.plot_data(
        new_infections,
        cumulative_infections,
        label1="new infections",
        label2="cumulative infections",
        title="Exponential_Delay: {0} days, Exponential_Rate: {1} ".format(
            delay, rate),
        xlabel="time step / simulation_timestep{0}".format(
            simulation_timestep),
        ylabel=None,
        category='New_infections_vs_cumulative_infections',
        show=True,
        line=True)

    with open(report_name, "w") as outfile:
        expected_infectiousness = [0] * (
            int(total_timesteps / simulation_timestep) + 1)
        pre_infected = int(infected[0])
        for index in range(1, len(infected)):
            new_infected = int(infected[index]) - pre_infected
            pre_infected = int(infected[index])
            if new_infected:
                new_expected_infectiousness = calculate_infectiousness(
                    new_infected, index, simulation_timestep, total_timesteps,
                    base_infectivity, baseline, delay, rate, debug)
                expected_infectiousness = list(
                    map(
                        sum,
                        zip(expected_infectiousness,
                            new_expected_infectiousness)))

        success = True
        actual_infectiousness_all = []
        calc_infectiousness_all = []
        for index in range(len(infectiousness)):
            timestep = index * simulation_timestep
            actual_infectiousness = float(infectiousness[index])
            calc_infectiousness = expected_infectiousness[index] / float(
                statpop[index])
            actual_infectiousness_all.append(actual_infectiousness)
            calc_infectiousness_all.append(calc_infectiousness)
            tolerance = 0 if calc_infectiousness == 0 else 3e-2 * calc_infectiousness
            if math.fabs(actual_infectiousness -
                         calc_infectiousness) > tolerance:
                success = False
                outfile.write(
                    "BAD: actual infectiousness at time step {0} is {1}, expected {2}.\n"
                    .format(timestep, actual_infectiousness,
                            calc_infectiousness))
        if debug:
            with open("actual_vs_calc_infectiousness.txt", "w") as file:
                for i in range(len(actual_infectiousness_all)):
                    file.write("Time Step: {0}, actual infectiousnes: {1},"
                               " expected_infectiousness: {2}.\n".format(
                                   i * simulation_timestep,
                                   actual_infectiousness_all[i],
                                   calc_infectiousness_all[i]))
        dtk_sft.plot_data(
            actual_infectiousness_all,
            calc_infectiousness_all,
            label1="actual infectiousness",
            label2="calc infectiousness",
            title="Exponential_Delay: {0} days, Exponential_Rate: {1} ".format(
                delay, rate),
            xlabel="time step / simulation_timestep{0}".format(
                simulation_timestep),
            ylabel="Infectiousness",
            category='Infectiousness',
            show=True,
            line=True)
        outfile.write(dtk_sft.format_success_msg(success))
        return success

Beispiel #19

0

Datei anzeigen

def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[
        2]  # this test assumes the vector is constant

    # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect
    # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than
    # what StartedArt distribution would extremely likely give us
    big_magic_number = 2000
    stopped_art_latency_data = []
    started_art_latency_data = []
    art_events_dict = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "has event" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                art_status = line.split(" ")[9].strip(
                    ".")  # get_val only gets digits
                art_events_dict[ind_id] = art_status
            if "LifeCourseLatencyTimerUpdate" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                new_incubation_timer = float(dtk_sft.get_val("timer ", line))
                if ind_id in art_events_dict.keys():
                    if art_events_dict.get(ind_id) == "StartedART":
                        started_art_latency_data.append(new_incubation_timer)
                    else:
                        stopped_art_latency_data.append(new_incubation_timer)
                    art_events_dict.pop(ind_id)
                else:
                    success = False
                    outfile.write(
                        "BAD: No art-related event found in the logs for this timer update for Individual {},"
                        " at time {}.\n".format(
                            ind_id, int(dtk_sft.get_val("time= ", line))))
        # we want the stopped art latency data to NOT match the started art latency data
        # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector
        if dtk_sft.test_exponential(stopped_art_latency_data,
                                    tb_cd4_activation_vector[2],
                                    integers=False,
                                    roundup=False,
                                    round_nearest=False):
            outfile.write(
                "BAD: The StoppedArt latency data distribution matches the StartedArt latency data"
                " distribution, but shouldn't.\n")
            success = False
        expected_stopped_art_data = np.random.exponential(
            1 / tb_cd4_activation_vector[0], len(stopped_art_latency_data))
        small_duration_count = 0
        for duration in stopped_art_latency_data:
            if duration < big_magic_number:
                small_duration_count += 1
        proportion_small = small_duration_count / float(
            len(stopped_art_latency_data))
        if proportion_small > 0.01:
            outfile.write(
                "BAD: More than 0.5% of our durations are suspiciously small, it is {}. "
                "Please Investigate.\n".format(proportion_small))
            success = False
        outfile.write("Data points checked = {}.\n".format(
            len(stopped_art_latency_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        dtk_sft.plot_data(
            sorted(stopped_art_latency_data),
            sorted(expected_stopped_art_data),
            label1="Actual",
            label2="Expected",
            title=
            "StoppedART Latency data should have a similar shape/scale of duration but will not "
            "match",
            xlabel="Data Points",
            ylabel="Days",
            category="tb_activation_and_cd4_hiv_first_on_art_off_art",
            line=True,
            overlap=True)

Beispiel #20

0

Datei anzeigen

def create_report_file(param_obj, campaign_obj, output_dict, report_dict,
                       report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        sensitivity = campaign_obj[KEY_BASE_SENSITIVITY]
        treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION]
        prob = sensitivity * treatment_fraction
        binomial_test_count = 0
        positive = []
        total = []
        if not len(report_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)
        for t in report_dict:
            num_success = report_dict[t][KEY_POSITIVE]
            num_trials = output_dict[t][KEY_POSITIVE_HUMAN] + output_dict[t][
                KEY_NEGATIVE_HUMAN]
            positive.append(num_success)
            total.append(num_trials)
            # the logging includes positive, negative and default, please see issue #2279
            if num_trials * prob < 5 or num_trials * (1 - prob) < 5:
                outfile.write(
                    "At timestep {0}, there is not enough sample size : mean = {1}, sample size - mean = {2}"
                    ".\n".format(t, num_trials * prob,
                                 num_trials * (1 - prob)))
            else:
                result = dtk_sft.test_binomial_95ci(
                    num_success,
                    num_trials,
                    prob,
                    report_file=outfile,
                    category="TB test positive")
                outfile.write(
                    "At timestep {0}, the binomial 95% test result is {1}.\n".
                    format(t, result))
                binomial_test_count += 1
                if not result:
                    success = False
        if not binomial_test_count:
            success = False
            outfile.write(
                "BAD: There is not enough sample size for binomial test in every time step, please fix the test.\n"
            )
        dtk_sft.plot_data(
            positive,
            dist2=total,
            label1="TBTestPositive",
            label2="Total tested",
            title="Test positive vs. total, positive proportion = {}".format(
                prob),
            xlabel="time step",
            ylabel="# of individuals",
            category='Test_positive_vs_total',
            show=True,
            line=False)
        # TODO: write test to check if report matches debug logging. Pending on #2279. May not need this part.
        outfile.write(dtk_sft.format_success_msg(success))
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Beispiel #21

0

Datei anzeigen

def create_report_file(data, debug=False):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[
        2]  # this test assumes the vector is constant

    latency_data = {}
    duration_data = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "InitializeLatentInfection" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                start_time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id in latency_data.keys():
                    outfile.write(
                        "Individual {} incubation timer reset at time {}. Please check. "
                        "\n".format(ind_id, start_time_stamp))
                latency_data[ind_id] = start_time_stamp
            elif "TBActivationPresymptomatic" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                end_time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id not in latency_data.keys():
                    outfile.write(
                        "Individual {} went presymptomatic without incubation timer update at time {}. "
                        "Please check. \n".format(ind_id, end_time_stamp))
                else:
                    duration = end_time_stamp - latency_data.get(ind_id)
                    duration_data[ind_id] = duration

        if debug:
            with open("DEBUG_duration_data.json", "w") as debug_outfile:
                json.dump(duration_data, debug_outfile, indent=4)
        durations = list(duration_data.values())

        # expected_data here only used for graphing purposes
        expected_data = [
            int(x + 1) for x in np.random.exponential(
                1 / tb_cd4_activation_vector[0], len(duration_data))
        ]
        success = dtk_sft.test_exponential(durations,
                                           tb_cd4_activation_vector[0],
                                           outfile,
                                           integers=True,
                                           roundup=True,
                                           round_nearest=False)
        outfile.write("Data points checked = {}.\n".format(len(duration_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        dtk_sft.plot_data(sorted(durations),
                          sorted(expected_data),
                          label1="Actual",
                          label2="Expected",
                          title="Latency Duration HIV then TB (Sorted)",
                          xlabel="Data Points",
                          ylabel="Days",
                          category="tb_activation_and_cd4_hiv_first",
                          line=True,
                          overlap=True)

Beispiel #22

0

Datei anzeigen

def create_report_file(param_obj, campaign_obj, output_dict, report_dict,
                       report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        sensitivity = campaign_obj[KEY_BASE_SENSITIVITY]
        treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION]
        proportions = [
            sensitivity * treatment_fraction,
            (1.0 - sensitivity) * treatment_fraction, 1.0 - treatment_fraction
        ]
        positive = []
        negative = []
        default = []
        total = []
        failed_timestep = []
        if not len(report_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)
        for t in report_dict:
            value_to_test = [
                report_dict[t][KEY_POSITIVE], report_dict[t][KEY_NEGATIVE],
                report_dict[t][KEY_DEFAULT]
            ]
            positive.append(report_dict[t][KEY_POSITIVE])
            negative.append(report_dict[t][KEY_NEGATIVE])
            default.append(report_dict[t][KEY_DEFAULT])
            total.append(sum(value_to_test))
            outfile.write("Run Chi-squared test at time step {}.\n".format(t))
            result = dtk_sft.test_multinomial(dist=value_to_test,
                                              proportions=proportions,
                                              report_file=outfile)
            if not result:
                failed_timestep.append(t)
                outfile.write(
                    "Warning: At timestep {0}, the Chi-squared test failed.\n".
                    format(t))
        if len(failed_timestep) > math.ceil(0.05 * len(report_dict)):
            success = False
            outfile.write(
                "BAD: the Chi-squared test failed at timestep {0}.\n".format(
                    ', '.join(str(x) for x in failed_timestep)))
        else:
            outfile.write(
                "GOOD: the Chi-squared test failed {} times, less than 5% of the total timestep.\n"
                .format(len(failed_timestep)))

        dtk_sft.plot_data(
            positive,
            dist2=total,
            label1="TBTestPositive",
            label2="Total tested",
            title="Test positive vs. total, positive proportion = {}".format(
                sensitivity * treatment_fraction),
            xlabel="time step",
            ylabel="# of individuals",
            category='Test_positive_vs_total',
            show=True,
            line=False)
        dtk_sft.plot_data(
            negative,
            dist2=total,
            label1="TBTestNegative",
            label2="Total tested",
            title="Test negative vs. total, negative proportion = {}".format(
                (1.0 - sensitivity) * treatment_fraction),
            xlabel="time step",
            ylabel="# of individuals",
            category='Test_negative_vs_total',
            show=True,
            line=False)
        dtk_sft.plot_data(
            default,
            dist2=total,
            label1="TBTestDefault",
            label2="Total tested",
            title="Test default vs. total, default proportion = {}".format(
                1.0 - treatment_fraction),
            xlabel="time step",
            ylabel="# of individuals",
            category='Test_default_vs_total',
            show=True,
            line=False)
        # TODO: write test to check if report matches debug logging. Pending on #2279. May not need this part.
        outfile.write(dtk_sft.format_success_msg(success))
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Beispiel #23

0

Datei anzeigen

def create_report_file(Resistances, initial_resistances, drug_start_time,
                       param_obj, report_name, inset_days, debug):
    with open(report_name, "w") as outfile:
        starting_pop = inset_days[0][
            dts.InsetChart.Channels.KEY_StatisticalPopulation]
        # success = sft.test_binomial_95ci( initial_resistances, starting_pop, param_obj["TB_Drug_Resistance_Rate_HIV"], outfile, "???" )
        success = True
        progression = []
        bad_msgs = []
        for x in range(len(inset_days)):
            inset_day = inset_days[x]
            inset_mdr_prevalence = inset_day[
                dts.InsetChart.Channels.KEY_MdrTbPrevalence]
            stdout_resistants = Resistances[x]
            if x >= drug_start_time:
                progression.append(stdout_resistants)
            if debug:
                outfile.write("Day: {0}\n".format(x))
                outfile.write(str(inset_day) + "\n")
                outfile.write(
                    "StdOut resistants: {0}\n".format(stdout_resistants))
            stdout_predicted_prevalence = stdout_resistants / float(
                inset_day[dts.InsetChart.Channels.KEY_StatisticalPopulation])
            if abs(inset_mdr_prevalence - stdout_predicted_prevalence) > 0.03:
                bad_msgs.append(
                    "BAD: at timestep {0}, expected MDR prevalence: {1}, InsetChart had: {2}\n"
                    .format(x, stdout_predicted_prevalence,
                            inset_mdr_prevalence))

        tb_drug_resistance_rate_hiv = param_obj["TB_Drug_Resistance_Rate_HIV"]
        new_resistances = []
        pre_resistance = 0
        failed_count = 0
        total_test = 0
        for x in range(drug_start_time + 1, len(Resistances)):
            resistance = Resistances[x]
            new_resistance = resistance - pre_resistance
            pre_resistance = resistance
            new_resistances.append(new_resistance)
            expected_mean = (starting_pop -
                             resistance) * tb_drug_resistance_rate_hiv
            total_test += 1
            if expected_mean >= 5:  # advoid failing with too small mean
                result = sft.test_binomial_99ci(
                    new_resistance,
                    starting_pop - resistance,
                    tb_drug_resistance_rate_hiv,
                    outfile,
                    category="time step {}".format(x + 1))
                if not result:
                    failed_count += 1
                    outfile.write(
                        "Warning: New Resistance test fails for rate = {0} at time step {1}.\n"
                        .format(tb_drug_resistance_rate_hiv, x + 1))
            else:
                error_tolerance = 3 * math.sqrt(
                    tb_drug_resistance_rate_hiv *
                    (1 - tb_drug_resistance_rate_hiv) *
                    (starting_pop - resistance))  # 3 sigma
                result = math.fabs(new_resistance -
                                   expected_mean) <= error_tolerance
                if not result:
                    failed_count += 1
                    outfile.write(
                        "Warning: New Resistance test fails for rate = {0} at time step {1}, "
                        "new resistance = {2}, expected mean = {3}, error tolerance = {4}.\n"
                        .format(tb_drug_resistance_rate_hiv, x + 1,
                                new_resistance, expected_mean,
                                error_tolerance))

        if failed_count > math.ceil(total_test * 0.01):
            success = False
            outfile.write(
                "BAD: test failed {0} times out of {1} timestep, please check the warning message.\n"
                "".format(failed_count, total_test))
        if debug:
            sft.plot_data(new_resistances,
                          title="new resistance over time",
                          category="new_resistance",
                          show=True)
            series = sft.create_geometric_dis(
                param_obj["TB_Drug_Resistance_Rate_HIV"],
                starting_pop,
                len(progression),
                test_decay=False)
            sft.plot_data(progression,
                          series,
                          label1="progression",
                          label2="geomatric dis",
                          xlabel="days",
                          ylabel="resistance",
                          title="progression vs geomatric",
                          category="progression_vs_geomatric",
                          show=True,
                          line=True)
            sft.plot_cdf(progression,
                         series,
                         label1="progression",
                         label2="geomatric dis",
                         title="progression vs geomatric cdf",
                         category="progression_vs_geomatric_cdf",
                         show=True)
        # success = sft.test_geometric_decay(progression, param_obj["TB_Drug_Resistance_Rate_HIV"], starting_pop, test_decay=False, report_file=outfile, debug=debug)

        if len(bad_msgs) > 0:
            success = False
            outfile.writelines(bad_msgs)

        outfile.write(sft.format_success_msg(success))

Beispiel #24

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: clorton/EMOD

def create_report_file(param_obj, multipliers, infectiousness, report_name,
                       debug):
    with open(report_name, "w") as outfile:
        success = True
        if not multipliers:
            outfile.write(sft.sft_no_test_data)
        sigma = param_obj[Param_keys.LOGNORMAL_SCALE]
        base_infectivity = param_obj[Param_keys.BASE_INFECTIVITY]
        if sigma > 0:
            mu = -sigma**2 / 2.0
            # test log_normal distribution
            success = sft.test_lognorm(multipliers,
                                       mu=mu,
                                       sigma=sigma,
                                       report_file=outfile,
                                       round=False)

            # test mean_l = 1
            mean_l = np.mean(multipliers)
            mean_infectiousness = np.mean(infectiousness)
            outfile.write(
                "mean of the multipliers is {}, expected 1.0.\n".format(
                    mean_l))
            outfile.write(
                "mean of the Infectiousness is {0}, while base infectivity is {1}.\n"
                .format(mean_infectiousness, base_infectivity))

            tolerance = 2e-2
            if math.fabs(mean_l - 1.0) > tolerance:
                outfile.write(
                    "BAD: mean of the multipliers is {}, expected 1.0.\n".
                    format(mean_l))
                success = False
            # plotting
            size = len(multipliers)
            outfile.write("size is {}\n".format(size))
            scale = math.exp(mu)
            dist_lognormal = stats.lognorm.rvs(sigma, 0, scale, size)
            sft.plot_data(multipliers,
                          dist_lognormal,
                          label1="Emod",
                          label2="Scipy",
                          ylabel="Multiplier",
                          xlabel="data point",
                          category="Emod_vs_Scipy",
                          title="Emod_vs_Scipy, sigma = {}".format(sigma),
                          show=True)
            sft.plot_probability(
                multipliers,
                dist_lognormal,
                precision=1,
                label1="Emod",
                label2="Scipy",
                category="Probability_mass_function_Emod_vs_Scipy",
                title="Emod_vs_Scipy, sigma = {}".format(sigma),
                show=True)
            sft.plot_cdf(multipliers,
                         dist_lognormal,
                         label1="Emod",
                         label2="Scipy",
                         category="cdf",
                         title="cdf, sigma = {}".format(sigma),
                         show=True,
                         line=False)
            if debug:
                with open("scipy_data.txt", "w") as file:
                    for n in sorted(dist_lognormal):
                        file.write(str(n) + "\n")
                with open("emod_data.txt", "w") as file:
                    for n in sorted(multipliers):
                        file.write(str(n) + "\n")
        else:
            # sigma = 0, this feature is disabled
            for multiplier in multipliers:
                if multiplier != 1.0:
                    success = False
                    outfile.write(
                        "BAD: multiplier is {0} when {1} set to {2}, expected 1.0.\n"
                        .format(multiplier, Param_keys.LOGNORMAL_SCALE, sigma))
            # plotting
            sft.plot_data(multipliers,
                          label1="Multiplier",
                          label2="NA",
                          category="Multiplier",
                          title="Multiplier_Sigma={}".format(sigma),
                          ylabel="Multiplier",
                          xlabel="data point",
                          show=True)
            sft.plot_data(
                infectiousness,
                label1="Infectiousness",
                label2="NA",
                category="Infectiousness",
                title="Infectiousness_Sigma={0}_BaseInfectivity={1}".format(
                    sigma, base_infectivity),
                ylabel="Infectiousness",
                xlabel="data point",
                show=True)
        outfile.write(sft.format_success_msg(success))

    if debug:
        print "SUMMARY: Success={0}\n".format(success)
    return success

Beispiel #25

0

Datei anzeigen

Datei: dtk_InfectivityScalingSinusoidal_Support.py Projekt: yukikatase/EMOD

def create_report_file(param_obj, output_df, report_df, report_name, debug):
    total_timesteps = int(param_obj[KEY_TOTAL_TIMESTEPS])
    simulation_timestep = float(param_obj[KEY_SIMULATION_TIMESTEP])
    base_infectivity = float(param_obj[KEY_BASE_INFECTIVITY])
    amplitude = float(param_obj[KEY_AMPLITUDE])
    phase = float(param_obj[KEY_PHASE])
    infected = output_df[KEY_INFECTED]
    infectiousness = output_df[KEY_INFECTIOUSNESS]
    statpop = output_df[KEY_STAT_POP]
    new_infections = report_df[KEY_NEW_INFECTIONS]
    if debug:
        dtk_sft.plot_data(
            new_infections,
            label1="new infections",
            label2="NA",
            title="Phase: {0} day, amplitude: {1}, base_infectivity: {2}".
            format(phase, amplitude, base_infectivity),
            xlabel="Time_Step_{0}_Days".format(simulation_timestep),
            ylabel=None,
            category='New_infections',
            show=True,
            line=True)
    with open(report_name, "w") as outfile:
        expected_infectiousness = []
        for index in range(len(infected)):
            infected_pop = int(infected[index])
            expected_infectiousness.append(
                calculate_infectiousness(infected_pop, index,
                                         simulation_timestep, phase,
                                         base_infectivity, amplitude, debug))
        success = True
        actual_infectiousness_all = []
        calc_infectiousness_all = []
        for index in range(len(infectiousness)):
            timestep = index * simulation_timestep
            actual_infectiousness = float(infectiousness[index])
            calc_infectiousness = expected_infectiousness[index] / float(
                statpop[index])
            actual_infectiousness_all.append(actual_infectiousness)
            calc_infectiousness_all.append(calc_infectiousness)
            tolerance = 0 if calc_infectiousness == 0 else 5e-2 * calc_infectiousness
            if math.fabs(actual_infectiousness -
                         calc_infectiousness) > tolerance:
                success = False
                outfile.write(
                    "BAD: actual infectiousness at time step {0} is {1}, expected {2}.\n"
                    .format(timestep, actual_infectiousness,
                            calc_infectiousness))
        outfile.write(dtk_sft.format_success_msg(success))
    dtk_sft.plot_data(
        actual_infectiousness_all,
        calc_infectiousness_all,
        label1="actual infectiousness",
        label2="calc infectiousness",
        title="Phase: {0} day, amplitude: {1}, base_infectivity: {2}".format(
            phase, amplitude, base_infectivity),
        xlabel="Time_Step_{0}_Days".format(simulation_timestep),
        ylabel="Infectiousness",
        category='Infectiousness',
        show=True,
        line=True)
    return success

Beispiel #26

0

Datei anzeigen

def create_report_file(drug_start_timestep, inactivation_times, active_count, inactivations, drug_inactivation_rate, report_name, debug = False):
    with open(report_name, "w") as outfile:
        success = True
        # ks exponential test doesn't work very well with large rate, use chi squared test instead.
        # while rate is small ks test for exponential distribution is more sensitive to catch the difference
        if drug_inactivation_rate < 0.1:
            outfile.write( "Testing inactivation times as draws from exponential distrib with rate {0}. "
                           "Dataset size = {1}.\n".format( drug_inactivation_rate, len( inactivation_times ) ) )
            success = dtk_sft.test_exponential( inactivation_times, drug_inactivation_rate, outfile, integers=True,
                                                roundup=True, round_nearest=False )
            if not success:
                outfile.write("BAD: ks test for rate {} is False.\n".format(drug_inactivation_rate))
            size = len(inactivation_times)
            scale = 1.0 / drug_inactivation_rate
            dist_exponential_np = numpy.random.exponential(scale, size)
            dist_exponential_np = [math.ceil(x) for x in dist_exponential_np]
            dtk_sft.plot_data_sorted(inactivation_times, dist_exponential_np,
                              label1="test times", label2="numpy data",
                              title="inactivation_times_actual_vs_numpy",
                              xlabel="data points", ylabel="Inactivation times",
                              category="inactivation_times", show = True, line = True, overlap=True)
            dtk_sft.plot_cdf(inactivation_times, dist_exponential_np,
                             label1="test times", label2="numpy data",
                             title="inactivation_times_cdf",
                             xlabel="days", ylabel="probability",
                             category="inactivation_times_cdf", show = True)
            dtk_sft.plot_probability(inactivation_times, dist_exponential_np,
                                     label1="test times", label2="numpy data",
                                     title="inactivation_times_pdf",
                                     xlabel="days", ylabel="probability",
                                     category="inactivation_times_pdf", show = True)
        else:
            outfile.write("Testing inactivation count per day with rate {0}. \n".format( drug_inactivation_rate) )
            expected_inactivation = []
            for t in range( len(inactivations)):
                if t < drug_start_timestep :
                    if inactivations[t] > 0:
                        success = False
                        outfile.write("BAD: expected no inactivations on drugs before day {0}, get {1} cases at timestep {2}.\n"
                                      "".format(drug_start_timestep , inactivations[t], t))
                elif active_count[t] > 0:
                    expected_inactivation.append(drug_inactivation_rate * active_count[t])
            if len(inactivations) <= len(expected_inactivation) + drug_start_timestep:
                test_inactivation_dates = inactivations[drug_start_timestep+1:]
                expected_inactivation = expected_inactivation[:len(test_inactivation_dates)]
            else:
                test_inactivation_dates = inactivations[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_inactivation)]
            #print (len(inactivations), len(test_inactivation_dates), len(expected_inactivation))
            #print (test_inactivation_dates, expected_inactivation)
            dtk_sft.plot_data(test_inactivation_dates, expected_inactivation,
                                     label1="actual inactivation", label2="expected inactivation",
                                     title="inactivation per day",
                                     xlabel="date after drug start day", ylabel="inactivation per day",
                                     category="inactivation_counts", show=True, line=True, overlap=True, sort=False)

            chi_result = dtk_sft.test_multinomial(dist=test_inactivation_dates, proportions=expected_inactivation,
                                                  report_file=outfile, prob_flag=False)
            if not chi_result:
                success = False
                outfile.write("BAD: Chi-squared test reuslt is False.\n")
        outfile.write(dtk_sft.format_success_msg(success))
        if debug:
            print(dtk_sft.format_success_msg(success))
        return success

Beispiel #27

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: yukikatase/EMOD

def create_report_file(param_obj, campaign_obj, output_dict, report_dict,
                       report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        specificity = campaign_obj[KEY_BASE_SPECIFICITY]
        treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION]
        prob = (1.0 - specificity) * treatment_fraction
        binomial_test_count = 0
        positive = []
        total = []
        if not len(report_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)
        for t in report_dict:
            num_success = report_dict[t][KEY_POSITIVE]
            num_trials = report_dict[t][KEY_NEGATIVE] + num_success
            positive.append(num_success)
            total.append(num_trials)
            if num_trials * prob < 5 or num_trials * (1 - prob) < 5:
                outfile.write(
                    "At timestep {0}, there is not enough sample size : mean = {1}, sample size - mean = {2}"
                    ".\n".format(t, num_trials * prob,
                                 num_trials * (1 - prob)))
            else:
                result = dtk_sft.test_binomial_95ci(num_success,
                                                    num_trials,
                                                    prob,
                                                    report_file=outfile,
                                                    category="TB positive")
                outfile.write(
                    "At timestep {0}, the binomial 95% test result is {1}.\n".
                    format(t, result))
                binomial_test_count += 1
                if not result:
                    success = False
        if not binomial_test_count:
            success = False
            outfile.write(
                "BAD: There is not enough sample size for binomial test in every time step, please fix the test.\n"
            )
        dtk_sft.plot_data(
            positive,
            dist2=total,
            label1="TBTestPositive",
            label2="Total tested",
            title="Test positive vs. total, positive proportion = {}".format(
                prob),
            xlabel="time step",
            ylabel="# of individuals",
            category='Test_positive_vs_total',
            show=True,
            line=False)
        # When Treatment_fraction is set to 1, the report should match debug log. Here is the test for it:
        for t in output_dict:
            log_positive = output_dict[t][KEY_POSITIVE]
            log_negative = output_dict[t][KEY_NEGATIVE]
            match_error = 0
            if log_negative or log_positive:
                report_positive = report_dict[t][KEY_POSITIVE]
                report_negative = report_dict[t][KEY_NEGATIVE]
                if report_positive != log_positive:
                    match_error += 1
                    success = False
                    outfile.write(
                        "BAD: at time step {0} the TBTestPositive is {1} from ReportEventRecorder.csv and {2} from"
                        "debug logging.\n".format(t, report_positive,
                                                  log_positive))
                if report_negative != log_negative:
                    match_error += 1
                    success = False
                    outfile.write(
                        "BAD: at time step {0} the TBTestNegative is {1} from ReportEventRecorder.csv and {2} from"
                        "debug logging.\n".format(t, report_negative,
                                                  log_negative))
            else:
                if t in report_dict:
                    report_positive = report_dict[t][KEY_POSITIVE]
                    report_negative = report_dict[t][KEY_NEGATIVE]
                    match_error += 1
                    success = False
                    outfile.write(
                        "BAD: at time step {0} the TBTestPositive and TBTestNegative are {1} and {2} from "
                        "ReportEventRecorder.csv and {2} and {3} from debug logging. They should be matched\n"
                        "".format(t, report_positive, report_negative,
                                  log_positive, log_negative))
        if not match_error:
            outfile.write(
                "GOOD: The ReportEventRecorder.csv matches the debug logging.\n"
            )
        else:
            outfile.write(
                "BAD: The ReportEventRecorder.csv doesn't match the debug logging.\n"
            )

        outfile.write(dtk_sft.format_success_msg(success))
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success

Beispiel #28

0

Datei anzeigen

Datei: dtk_post_process.py Projekt: yukikatase/EMOD

def create_report_file(report_data_obj, report_name, debug):
    with open(report_name, "w") as outfile:
        success = True
        timestep = Outbreak_Start_Day
        tb_effects = calc_tb_effect(debug)
        tb_effect_baseline = float(
            tb_effects[0]
        )  # use the number of new infection from the 1st outbreak as a baseline
        new_infection_baseline = report_data_obj[
            KEY_NEW_INFECTIONS_GROUP[1]][timestep]
        statistical_population = report_data_obj[
            KEY_STATISTICAL_POPULATION_GROUP[1]][timestep]  # no any death
        new_infections = []
        expected_new_infections = []
        new_infections.append(new_infection_baseline)
        expected_new_infections.append(new_infection_baseline)
        actual_tb_effects = []
        actual_tb_effects.append(tb_effect_baseline)

        for i in range(1,
                       len(Interventions)):  # no need to test the 1st outbreak
            timestep += Timesteps_Between_Repetitions
            new_infection = report_data_obj[
                KEY_NEW_INFECTIONS_GROUP[1]][timestep]
            tb_effect = tb_effects[i]
            # because expected_new_infection / (1.0 - tb_effect) = new_infection_baseline / (1.0- tb_effect_baseline), so
            expected_new_infection = (1.0 -
                                      tb_effect) * new_infection_baseline / (
                                          1.0 - tb_effect_baseline)
            tolerance = 0.0 if expected_new_infection == 0.0 else 2e-2 * statistical_population
            actual_tb_effect = 1.0 - (new_infection *
                                      (1.0 - tb_effect_baseline) /
                                      new_infection_baseline)
            if math.fabs(new_infection - expected_new_infection) > tolerance:
                success = False
                outfile.write(
                    "BAD: At time step {0}, outbreak {1}, {2} reported new infections, expected {3}.\n"
                    .format(timestep, Interventions[i], new_infection,
                            expected_new_infection))
                outfile.write(
                    "actual TransmissionBlocking effect is {0}, expected {1}.\n"
                    .format(actual_tb_effect, tb_effect))
            new_infections.append(new_infection)
            expected_new_infections.append(expected_new_infection)
            actual_tb_effects.append(actual_tb_effect)
        sft.plot_data(new_infections,
                      expected_new_infections,
                      label1="Actual",
                      label2="Expected",
                      xlabel="outbreak",
                      ylabel="new infection",
                      title="Actual new infection vs. expected new infection",
                      category='New_infections',
                      show=True)
        if debug:
            with open("New_infections.txt", "w") as file:
                for i in range(len(new_infections)):
                    file.write("{0}, {1}.\n".format(
                        new_infections[i], expected_new_infections[i]))
            with open("Effects.txt", "w") as file:
                for i in range(len(actual_tb_effects)):
                    file.write("{0}, {1}.\n".format(actual_tb_effects[i],
                                                    tb_effects[i]))

        outfile.write(sft.format_success_msg(success))
        if debug:
            print("SUMMARY: Success={0}\n".format(success))
        return success

Beispiel #29

0

Datei anzeigen

Datei: dtk_ImportPressure_Support.py Projekt: yukikatase/EMOD

def create_report_file(param_obj, campaign_obj, demographics_obj,
                       report_data_obj, report_name, debug):
    with open(report_name, "w") as outfile:
        success = True
        total_timesteps = param_obj[KEY_TOTAL_TIMESTEPS]
        start_timestep = param_obj[KEY_START_TIME]
        initial_population = demographics_obj[KEY_INITIAL_POPULATION]
        rates = campaign_obj[KEY_CAMPAIGN_DIP]
        durations = campaign_obj[KEY_CAMPAIGN_DURATIONS]
        if not report_data_obj:  # todo: maybe use try
            success = False
            outfile.write("BAD: There is no data in the InsetChart report")
        else:
            new_infections = report_data_obj[KEY_NEW_INFECTIONS]
            statistical_population = report_data_obj[
                KEY_STATISTICAL_POPULATION]

            length = len(rates)
            start_duration = start_timestep
            new_infections_dict = {}
            calculate_new_population = initial_population
            for i in range(length):
                rate = rates[i]
                duration = durations[i]
                calculate_new_population = rate * duration + calculate_new_population
                end_duration = duration + start_duration
                if rate not in new_infections_dict:
                    new_infections_dict[rate] = []
                for j in range(start_duration + 1, end_duration + 1):
                    if j < total_timesteps + start_timestep:
                        new_infections_dict[rate].append(new_infections[j])
                        j += 1
                    else:
                        break
                if end_duration > total_timesteps + start_timestep:
                    calculate_new_population -= rate * (
                        end_duration - total_timesteps - start_timestep)
                    break
                start_duration = end_duration
            if end_duration < total_timesteps + start_timestep:
                rate = 0.0
                if rate not in new_infections_dict:
                    new_infections_dict[rate] = []
                for j in range(end_duration + 1, len(new_infections)):
                    new_infections_dict[rate].append(new_infections[j])
            with open("new_infections_parsed.json", "w") as file:
                json.dump(new_infections_dict, file, indent=4)

            # test statistical population channel
            diff_population = math.fabs(calculate_new_population -
                                        statistical_population[-1])
            if debug:
                print( "calculated population is {0}, statistical population " \
                      "from InsetChart is {1}.".format(calculate_new_population,
                                                       statistical_population[-1]) )
            error_tolerance = math.fabs(calculate_new_population -
                                        initial_population) * 0.1
            if debug:
                print("diff_population is {0}, error_tolerance is {1}".format(
                    diff_population, error_tolerance))
            if diff_population > error_tolerance:
                success = False
                outfile.write(
                    "BAD: statistical population is {0}, expected about {1}.\n"
                    .format(statistical_population[-1],
                            calculate_new_population))

            # test poisson distribution for new infections
            for rate in new_infections_dict:
                dist = new_infections_dict[rate]
                title = "rate = " + str(rate)
                result = sft.test_poisson(dist,
                                          rate,
                                          route=title,
                                          report_file=outfile,
                                          normal_approximation=False)
                # print result, rate, len(dist)
                if not result:
                    success = False
                    outfile.write(
                        "BAD: ks poisson test for {0} is {1}.\n".format(
                            title, result))
                numpy_distro = np.random.poisson(rate, len(dist))
                sft.plot_data(
                    dist,
                    numpy_distro,
                    title="new infections for {}".format(title),
                    label1="new infection from model, {}".format(title),
                    label2="Poisson distro from numpy",
                    xlabel="data points",
                    ylabel="new infection",
                    category="plot_data_{0}".format(title),
                    show=True)
                sft.plot_probability(
                    dist,
                    numpy_distro,
                    title="probability mass function for {}".format(title),
                    label1="new infection probability from model",
                    label2="new infection probability from numpy distro",
                    category="plot_probability_{0}".format(title),
                    show=True)

        outfile.write(sft.format_success_msg(success))
        if debug:
            print("SUMMARY: Success={0}\n".format(success))
        return success

Beispiel #30

0

Datei anzeigen

Datei: dtk_ImportPressure_Support.py Projekt: clorton/EMOD

def create_report_file(param_obj, campaign_obj, demographics_obj, report_data_obj, report_name, debug):
    with open(report_name, "w") as outfile:
        success = True
        total_timesteps = param_obj[KEY_TOTAL_TIMESTEPS]
        start_timestep = param_obj[KEY_START_TIME]
        initial_population = demographics_obj[KEY_INITIAL_POPULATION]
        rates = campaign_obj[KEY_CAMPAIGN_DIP]
        durations = campaign_obj[KEY_CAMPAIGN_DURATIONS]
        if not report_data_obj: # todo: maybe use try
            success = False
            outfile.write("BAD: There is no data in the InsetChart report")
        else:
            new_infections = report_data_obj[KEY_NEW_INFECTIONS]
            statistical_population = report_data_obj[KEY_STATISTICAL_POPULATION]

            length = len(rates)
            start_duration = start_timestep
            new_infections_dict = {}
            calculate_new_population = initial_population
            for i in range(length):
                rate = rates[i]
                duration = durations[i]
                calculate_new_population = rate * duration + calculate_new_population
                end_duration = duration + start_duration
                if rate not in new_infections_dict:
                    new_infections_dict[rate] = []
                for j in range(start_duration + 1, end_duration + 1):
                    if j < total_timesteps + start_timestep:
                        new_infections_dict[rate].append(new_infections[j])
                        j += 1
                    else:
                        break
                if end_duration > total_timesteps + start_timestep:
                    calculate_new_population -= rate * (end_duration - total_timesteps - start_timestep)
                    break
                start_duration = end_duration
            if end_duration < total_timesteps + start_timestep:
                rate = 0.0
                if rate not in new_infections_dict:
                    new_infections_dict[rate] = []
                for j in range(end_duration + 1, len(new_infections)):
                    new_infections_dict[rate].append(new_infections[j])
            with open("new_infections_parsed.json","w") as file:
                json.dump(new_infections_dict, file, indent = 4)

            # test statistical population channel
            diff_population = math.fabs(calculate_new_population - statistical_population[-1])
            if debug:
                print "calculated population is {0}, statistical population " \
                      "from InsetChart is {1}.".format(calculate_new_population,
                                                       statistical_population[-1])
            error_tolerance = math.fabs(calculate_new_population - initial_population)* 0.1
            if debug:
                print "diff_population is {0}, error_tolerance is {1}".format(diff_population, error_tolerance)
            if diff_population  > error_tolerance:
                success = False
                outfile.write("BAD: statistical population is {0}, expected about {1}.\n".format(statistical_population[-1], calculate_new_population))

            # test poisson distribution for new infections
            for rate in new_infections_dict:
                dist = new_infections_dict[rate]
                title = "rate = " + str(rate)
                result = sft.test_poisson(dist, rate, route = title, report_file = outfile, normal_approximation = False)
                # print result, rate, len(dist)
                if not result:
                    success = False
                    outfile.write("BAD: ks poisson test for {0} is {1}.\n".format(title, result))
                numpy_distro = np.random.poisson(rate, len(dist))
                sft.plot_data(dist, sorted(numpy_distro),
                              title="new infections for {}".format(title),
                              label1="new infection from model, {}".format(title),
                              label2="Poisson distro from numpy",
                              xlabel="data points", ylabel="new infection",
                              category="plot_data_{0}".format(title), show=True)
                sft.plot_probability(dist, numpy_distro,
                                     title="probability mass function for {}".format(title),
                                     label1="new infection probability from model",
                                     label2="new infection probability from numpy distro",
                                     category="plot_probability_{0}".format(title), show=True)

        outfile.write(sft.format_success_msg(success))
        if debug:
            print "SUMMARY: Success={0}\n".format(success)
        return success

Beispiel #31

0

Datei anzeigen

def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[2]  # this test assumes the vector is constant

    # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect
    # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than
    # what StartedArt distribution would extremely likely give us
    big_magic_number = 2000
    stopped_art_latency_data = []
    started_art_latency_data = []
    tb_on_art_latency_data = []
    art_events_dict = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "has event" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                art_status = line.split(" ")[9].strip(".")  # get_val only gets digits
                art_events_dict[ind_id] = art_status
            if "Incubation_timer calculated as" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                infection_timer = float(dtk_sft.get_val("calculated as ", line))
                reconstitute = int(dtk_sft.get_val("reconstitute=", line))
                if reconstitute:  # ignore people who are not reconstituting.
                    tb_on_art_latency_data.append(infection_timer)
            if "LifeCourseLatencyTimerUpdate" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                new_incubation_timer = float(dtk_sft.get_val("timer ", line))
                if ind_id in art_events_dict.keys():
                    if art_events_dict.get(ind_id) == "StartedART":
                        # we ignore this for this test, people are already on art when they get TB
                        started_art_latency_data.append(new_incubation_timer)
                    else:
                        stopped_art_latency_data.append(new_incubation_timer)
                    art_events_dict.pop(ind_id)
                else:
                    success = False
                    outfile.write("BAD: No art-related event found in the logs for this timer update for Individual {},"
                                  " at time {}.\n".format(ind_id, int(dtk_sft.get_val("time= ", line))))
        # we want the stopped art latency data to NOT match the started art latency data
        # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector
        if dtk_sft.test_exponential(stopped_art_latency_data, tb_cd4_activation_vector[2], integers=True,
                                    roundup=True, round_nearest=False):
            outfile.write("BAD: The StoppedArt latency data distribution matches the initial latency data"
                          " distribution, but shouldn't.\n")
            success = False
        small_duration_count = 0
        for duration in stopped_art_latency_data:
            if duration < big_magic_number:
                small_duration_count += 1
        proportion_small = small_duration_count / float(len(stopped_art_latency_data))
        if proportion_small > 0.006:
            outfile.write("BAD: More than 0.006 of our durations are suspiciously small, it is {}. "
                          "Please Investigate.\n".format(proportion_small))
            success = False

        if not dtk_sft.test_exponential(tb_on_art_latency_data, tb_cd4_activation_vector[2], outfile, integers=False,
                                        roundup=False, round_nearest=False):
            # this is testing the internal timer which is float type
            # so 'integers=False'
            success = False
            outfile.write("BAD: Initial TB infection (with HIV and ART) latency doesn't match expected distribution.")
        outfile.write("Data points checked = {}."
                      "\n".format(len(tb_on_art_latency_data), 0))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        # for graphing purposes only
        expected_tb_on_art_latency_data = np.random.exponential(1/tb_cd4_activation_vector[2],
                                                                len(tb_on_art_latency_data))

        dtk_sft.plot_data(sorted(tb_on_art_latency_data), sorted(expected_tb_on_art_latency_data), label1="Actual",
                          label2="Expected",
                          title="HIV+ART then TB latency data",
                          xlabel="Data Points", ylabel="Days",
                          category="tb_activation_and_cd4_hiv_art_tb_offart", line = True, overlap=True)

Beispiel #32

0

Datei anzeigen

def create_report_file(drug_start_timestep, disease_deaths, cum_deaths, deaths, infected_individuals, death_times, drug_mortality_rate_HIV, report_name ):
    with open(report_name, "w") as outfile:
        success = True
        length = len(cum_deaths)
        if sum(disease_deaths)==0 or sum(cum_deaths)==0 or len(death_times)==0:
            success = False
            outfile.write(dtk_sft.no_test_data)
        for x in range(length):
            if disease_deaths[x] != cum_deaths[x]:
                success = False
                outfile.write("BAD: at timestep {0}, disease deaths is {1} in InsetChart.json and {2} in stdout.txt.\n".format(x+1, disease_deaths[x], cum_deaths[x]))
        # ks exponential test doesn't work very well with large rate, use chi squared test instead
        # while rate is small ks test for exponential distribution is more sensitive to catch the difference
        if drug_mortality_rate_HIV < 0.1:
            outfile.write("Testing death times as draws from exponential distrib with rate {0}. "
                          "Dataset size = {1}.\n".format(drug_mortality_rate_HIV, len(death_times)))
            ks_result = dtk_sft.test_exponential( death_times, drug_mortality_rate_HIV, report_file = outfile,
                                                  integers=True, roundup=True, round_nearest=False )
            if not ks_result:
                success = False
                outfile.write("BAD: ks test reuslt is False.\n")
            size = len(death_times)
            scale = 1.0 / drug_mortality_rate_HIV
            dist_exponential_np = numpy.random.exponential(scale, size)
            dist_exponential_np = [math.ceil(x) for x in dist_exponential_np]
            dtk_sft.plot_data_sorted(death_times, dist_exponential_np,
                              label1="death times", label2="numpy data",
                              title="death_times_actual_vs_numpy",
                              xlabel="data points", ylabel="death times",
                              category="death_times", show=True, line = True, overlap=True)
            dtk_sft.plot_cdf(death_times, dist_exponential_np,
                             label1="death times", label2="numpy data",
                             title="death_times_cdf",
                             xlabel="days", ylabel="probability",
                             category="death_times_cdf", show=True)
        else:
            outfile.write("Testing death count per day with rate {0}. \n".format(drug_mortality_rate_HIV))
            expected_mortality = []
            for t in range( len(deaths)):
                if t < drug_start_timestep + 1:
                    if deaths[t] > 0:
                        success = False
                        outfile.write("BAD: expected no disease death on drugs before day {0}, get {1} cases at timestep {2}.\n"
                                      "".format(drug_start_timestep + 1, deaths[t], t))
                elif infected_individuals[t] > 0:
                    expected_mortality.append(drug_mortality_rate_HIV * infected_individuals[t])
            expected_mortality.pop(0) # the Infected is off by one day
            test_death_dates = deaths[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_mortality)]
            dtk_sft.plot_data(test_death_dates, expected_mortality,
                                     label1="actual death", label2="expected death",
                                     title="death per day",
                                     xlabel="date after drug start day", ylabel="death per day",
                                     category="death_counts", show=True, line=True, overlap=True, sort=False)

            chi_result = dtk_sft.test_multinomial(dist=test_death_dates, proportions=expected_mortality,
                                                  report_file=outfile, prob_flag=False)
            if not chi_result:
                success = False
                outfile.write("BAD: Chi-squared test reuslt is False.\n")

        outfile.write(dtk_sft.format_success_msg(success))
        return success