Beispiel #1
0
def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[
        2]  # this test assumes the vector is constant

    latency_update_data = []
    original_latency_data = []
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "Incubation_timer calculated as" in line:
                incubation_timer = float(dtk_sft.get_val("as ", line))
                original_latency_data.append(incubation_timer)
            if "LifeCourseLatencyTimerUpdate" in line:
                new_incubation_timer = float(dtk_sft.get_val("timer ", line))
                latency_update_data.append(new_incubation_timer)

        # expecting the original distribution to NOT match the art-triggered update distribution
        if dtk_sft.test_exponential(original_latency_data,
                                    tb_cd4_activation_vector[2],
                                    integers=True,
                                    roundup=True,
                                    round_nearest=False):
            outfile.write(
                "BAD: The updated latency data matches the original distribution.\n"
            )
            success = False
        expected_update_data = np.random.exponential(
            1 / tb_cd4_activation_vector[2], len(latency_update_data))
        if not dtk_sft.test_exponential(latency_update_data,
                                        tb_cd4_activation_vector[2],
                                        outfile,
                                        integers=True,
                                        roundup=True,
                                        round_nearest=False):
            # as it should fail , success = bad.
            outfile.write(
                "BAD: The updated latency data does not match the expected distribution.\n"
            )
            success = False
        outfile.write("Data points checked = {}.\n".format(
            len(latency_update_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        dtk_sft.plot_data(sorted(latency_update_data),
                          sorted(expected_update_data),
                          label1="Actual",
                          label2="Expected",
                          title="Latency Duration recalculated for ART",
                          xlabel="Data Points",
                          ylabel="Days",
                          category="tb_activation_and_cd4_hiv_first_on_art",
                          line=True,
                          overlap=True)
Beispiel #2
0
def create_report_file( clearance_times, param_obj, report_name, stdout_days, inset_days, debug):

    with open(report_name, "w") as outfile:
        outfile.write("Cleared infections: {0} \n".format(len(clearance_times)))

        # success = dtk_sft.test_exponential( numpy.array( clearance_times ).astype(float), param_obj["TB_Drug_Cure_Rate_HIV"] )
        success = dtk_sft.test_exponential(clearance_times,
                                           param_obj["TB_Drug_Cure_Rate_HIV"],
                                           outfile, integers=True, roundup=True, round_nearest=False)

        for day in sorted(inset_days.keys()):
            inset_count = inset_days[day]
            stdout_count = stdout_days[day]
            if inset_count != stdout_count:
                outfile.write("BAD: day {0} std out has {1} clears, inset chart has {2}\n".format(day, stdout_count, inset_count))
                success = False
        outfile.write("SUMMARY: Success={0}\n".format(success))
Beispiel #3
0
def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    coinfection_mortality_rate_off_art = data[2]
    coinfection_mortality_rate_on_art = data[3]

    died_of_coinfection = "died of CoInfection"
    state_active_symptomatic = "infectionstatechange TBActivation "
    time_to_death_data = []
    active_infections_dictionary = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if died_of_coinfection in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id in active_infections_dictionary.keys():
                    time_to_death_data.append(time_stamp - active_infections_dictionary[ind_id])
                else:
                    success = False
                    outfile.write("BAD: Individual {} died of coinfection without going active, at time {}."
                                  "\n".format(ind_id, time_stamp))
            elif state_active_symptomatic in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                start_time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id in active_infections_dictionary.keys():
                    outfile.write("Individual {} went active symptomatic while already being active symptomatic"
                                  "at time {}. \n".format(ind_id, start_time_stamp))
                else:
                    active_infections_dictionary[ind_id] = start_time_stamp
        # expected_data here only used for graphing purposes
        expected_data = map(int, np.random.exponential(1/coinfection_mortality_rate_off_art, len(time_to_death_data)))
        if not dtk_sft.test_exponential(time_to_death_data, coinfection_mortality_rate_off_art, outfile,
                                        integers=True, roundup=False, round_nearest=False):
            success = False
        outfile.write("Data points checked = {}.\n".format(len(time_to_death_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        dtk_sft.plot_data(sorted(time_to_death_data), sorted(expected_data), label1="Actual", label2="Expected",
                          title="Time from Smear Negative Off ART TBHIV to Death", xlabel="Data Points", ylabel="Days",
                          category="tbhiv_mortality_smear_negative_off_art", line = True, overlap=True)
Beispiel #4
0
def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[2]  # this test assumes the vector is constant

    # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect
    # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than
    # what StartedArt distribution would extremely likely give us
    big_magic_number = 2000
    stopped_art_latency_data = []
    started_art_latency_data = []
    tb_on_art_latency_data = []
    art_events_dict = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "has event" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                art_status = line.split(" ")[9].strip(".")  # get_val only gets digits
                art_events_dict[ind_id] = art_status
            if "Incubation_timer calculated as" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                infection_timer = float(dtk_sft.get_val("calculated as ", line))
                reconstitute = int(dtk_sft.get_val("reconstitute=", line))
                if reconstitute:  # ignore people who are not reconstituting.
                    tb_on_art_latency_data.append(infection_timer)
            if "LifeCourseLatencyTimerUpdate" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                new_incubation_timer = float(dtk_sft.get_val("timer ", line))
                if ind_id in art_events_dict.keys():
                    if art_events_dict.get(ind_id) == "StartedART":
                        # we ignore this for this test, people are already on art when they get TB
                        started_art_latency_data.append(new_incubation_timer)
                    else:
                        stopped_art_latency_data.append(new_incubation_timer)
                    art_events_dict.pop(ind_id)
                else:
                    success = False
                    outfile.write("BAD: No art-related event found in the logs for this timer update for Individual {},"
                                  " at time {}.\n".format(ind_id, int(dtk_sft.get_val("time= ", line))))
        # we want the stopped art latency data to NOT match the started art latency data
        # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector
        if dtk_sft.test_exponential(stopped_art_latency_data, tb_cd4_activation_vector[2], integers=True,
                                    roundup=True, round_nearest=False):
            outfile.write("BAD: The StoppedArt latency data distribution matches the initial latency data"
                          " distribution, but shouldn't.\n")
            success = False
        small_duration_count = 0
        for duration in stopped_art_latency_data:
            if duration < big_magic_number:
                small_duration_count += 1
        proportion_small = small_duration_count / float(len(stopped_art_latency_data))
        if proportion_small > 0.006:
            outfile.write("BAD: More than 0.006 of our durations are suspiciously small, it is {}. "
                          "Please Investigate.\n".format(proportion_small))
            success = False

        if not dtk_sft.test_exponential(tb_on_art_latency_data, tb_cd4_activation_vector[2], outfile, integers=False,
                                        roundup=False, round_nearest=False):
            # this is testing the internal timer which is float type
            # so 'integers=False'
            success = False
            outfile.write("BAD: Initial TB infection (with HIV and ART) latency doesn't match expected distribution.")
        outfile.write("Data points checked = {}."
                      "\n".format(len(tb_on_art_latency_data), 0))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        # for graphing purposes only
        expected_tb_on_art_latency_data = np.random.exponential(1/tb_cd4_activation_vector[2],
                                                                len(tb_on_art_latency_data))

        dtk_sft.plot_data(sorted(tb_on_art_latency_data), sorted(expected_tb_on_art_latency_data), label1="Actual",
                          label2="Expected",
                          title="HIV+ART then TB latency data",
                          xlabel="Data Points", ylabel="Days",
                          category="tb_activation_and_cd4_hiv_art_tb_offart", line = True, overlap=True)
Beispiel #5
0
def create_report_file(param_obj, output_dict, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        fast_progressor_rate = param_obj[KEY_FAST_PROGRESSOR_RATE]
        latent_cure_rate = param_obj[KEY_LATENT_CURE_RATE]
        child_fast_fraction = param_obj[KEY_CHILD_FRACTION]
        adult_fast_fraction = param_obj[KEY_ADULT_FRACTION]
        simulation_duration = param_obj[KEY_DURATION]
        if not len(output_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)

        outfile.write("checking test conditions: \n")
        if not child_fast_fraction or not adult_fast_fraction:
            success = False
            outfile.write(
                "BAD: expected {0} and {1} = 1, got {2} and {3} from config.json. "
                "Please fix the test.\n".format(KEY_CHILD_FRACTION,
                                                KEY_ADULT_FRACTION,
                                                child_fast_fraction,
                                                adult_fast_fraction))
        dist_exponential_np_fast = np.random.exponential(
            1 / fast_progressor_rate, 100)
        if min(dist_exponential_np_fast) < simulation_duration:
            success = False
            outfile.write(
                "BAD: expected a small {0} to avoid moving individual to active disease state, got {1} from config.json. Please "
                "fix the test.\n".format(KEY_FAST_PROGRESSOR_RATE,
                                         fast_progressor_rate))
        outfile.write("conditional check result is {}.\n".format(success))

        actual_timer = []
        outfile.write(
            "collecting the actual timestep between latent and cleared:\n")
        for id in output_dict:
            cleared_time = presymptomatic_time = latent_time = None
            if KEY_CLEARED in output_dict[id]:
                cleared_time = output_dict[id][KEY_CLEARED]
            if KEY_LATENT in output_dict[id]:
                latent_time = output_dict[id][KEY_LATENT]
            if KEY_PRESYMPTOMATIC in output_dict[id]:
                presymptomatic_time = output_dict[id][KEY_PRESYMPTOMATIC]
            if latent_time:
                if cleared_time:  # some individual may not move to cleared state at the end of the simulation
                    actual_timer.append(cleared_time - latent_time)
                else:
                    outfile.write(
                        "Individual {0} moved to latent state at timestep {1} and is not cleared yet at the "
                        "end of simulation (duration = {2})."
                        "\n".format(id, latent_time, simulation_duration))
            else:
                success = False
                outfile.write(
                    "BAD: individual {0} moved to cleared state at timerstep {1} before entering "
                    "latent state.\n".format(id, cleared_time))
            if presymptomatic_time:
                success = False
                outfile.write(
                    "BAD: individual {0} moved to presymptomatic at timestep {1}, expected no active disease"
                    " in this simulation, please double check the config.\n".
                    format(id, presymptomatic_time))
        if not len(actual_timer):
            success = False
            outfile.write(
                "BAD: There is no latent to cleared transition in this test, please fix the test.\n"
            )

        outfile.write(
            "Running ks test for latent to cleared duration and numpy exponential distribution: \n"
        )
        size = len(actual_timer)
        scale = 1.0 / latent_cure_rate
        dist_exponential_np = np.random.exponential(scale, size)
        dtk_sft.plot_data_sorted(
            actual_timer,
            dist2=np.array(dist_exponential_np),
            label1="latent to cleared duration",
            label2="numpy exponential",
            title="exponential rate = {}".format(latent_cure_rate),
            xlabel="data point",
            ylabel="latent to cleared duration",
            category='latent_to_cleared_duration',
            show=True,
            line=False,
            overlap=True)
        result = dtk_sft.test_exponential(actual_timer,
                                          p1=latent_cure_rate,
                                          report_file=outfile,
                                          integers=True,
                                          roundup=True,
                                          round_nearest=False)
        outfile.write(
            "ks test result is {0}, exponential rate = {1}, # of data point = {2}.\n"
            .format(result, latent_cure_rate, size))
        if not result:
            success = False
            outfile.write(
                "BAD: test exponential for latent to cleared duration failed with {0} "
                "= {1}.\n".format(KEY_LATENT_CURE_RATE, latent_cure_rate))
        else:
            outfile.write(
                "GOOD: test exponential for latent to cleared duration passed with {0} "
                "= {1}.\n".format(KEY_LATENT_CURE_RATE, latent_cure_rate))

        outfile.write(dtk_sft.format_success_msg(success))
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success
Beispiel #6
0
def create_report_file(param_obj, output_dict, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        death_rate = param_obj[KEY_DEATH_RATE]
        simulation_duration = param_obj[KEY_DURATION]
        if not len(output_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)
        actual_timer = []
        outfile.write(
            "collecting the actual timestep between active and death:\n")
        for id in output_dict:
            death_time = timer = active_time = None
            if KEY_DEATH in output_dict[id]:
                death_time = output_dict[id][KEY_DEATH]
            if KEY_SYMPTOMATIC in output_dict[id]:
                active_time = output_dict[id][KEY_SYMPTOMATIC][0]
            if active_time:
                if death_time:  # some individual may not die yet at the end of the simulation
                    actual_timer.append(death_time - active_time)
                else:
                    outfile.write(
                        "Individual {0} moved to symptomatic active at timestep {1} and is not dead yet at "
                        "the end of simulation (duration = {2}).\n".format(
                            id, active_time, simulation_duration))
            else:
                success = False
                outfile.write(
                    "BAD: individual {0} died before entering active symptomatic state.\n"
                    .format(id))
        if not len(actual_timer):
            success = False
            outfile.write(
                "BAD: There is no death in this test, please fix the test.\n")

        outfile.write(
            "Running ks test for death time and numpy exponential distribution: \n"
        )
        size = len(actual_timer)
        scale = 1.0 / death_rate
        dist_exponential_np = np.random.exponential(scale, size)
        dtk_sft.plot_data_sorted(
            actual_timer,
            dist2=np.array(dist_exponential_np),
            label1="death timer",
            label2="numpy exponential",
            title="exponential rate = {}".format(death_rate),
            xlabel="data point",
            ylabel="death timer",
            category='Death_timer',
            show=True,
            line=False,
            overlap=True)
        result = dtk_sft.test_exponential(actual_timer,
                                          p1=death_rate,
                                          report_file=outfile,
                                          integers=True,
                                          roundup=True,
                                          round_nearest=False)
        outfile.write(
            "ks test result is {0}, exponential rate = {1}, # of data point = {2}.\n"
            .format(result, death_rate, size))
        if not result:
            success = False
            outfile.write(
                "BAD: test exponential for death timer failed with death rate = {}.\n"
                .format(death_rate))

        outfile.write(dtk_sft.format_success_msg(success))
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success
Beispiel #7
0
def create_report_file(param_obj, output_dict, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        slow_progressor_rate = param_obj[KEY_SLOW_PROGRESSOR_RATE]
        latent_cure_rate = param_obj[KEY_LATENT_CURE_RATE]
        presymptomatic_cure_rate = param_obj[KEY_PRESYMPTOMATIC_CURE_RATE]
        presymptomatic_rate = param_obj[KEY_PRESYMPTOMATIC_RATE]
        base_infectivity = param_obj[KEY_BASE_INFECTIVITY]
        simulation_duration = param_obj[KEY_DURATION]
        if not len(output_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)

        outfile.write("checking test conditions: \n")
        dist_exponential_np_slow = np.random.exponential(
            1 / slow_progressor_rate, 100)
        if min(dist_exponential_np_slow) < simulation_duration:
            success = False
            outfile.write(
                "BAD: expected a small {0} to avoid moving individual to active disease state, got {1} from config.json. Please "
                "fix the test.\n".format(KEY_SLOW_PROGRESSOR_RATE,
                                         slow_progressor_rate))
        dist_exponential_np_latent_cure = np.random.exponential(
            1 / latent_cure_rate, 100)
        if min(dist_exponential_np_latent_cure) < simulation_duration:
            success = False
            outfile.write(
                "BAD: expected a small {0} to avoid Latent to Cleared state transition(all Latent state will progress to "
                "PreSymptomatic), got {1} from config.json. Please fix the test.\n"
                .format(KEY_LATENT_CURE_RATE, latent_cure_rate))
        dist_exponential_np_presymptomatic = np.random.exponential(
            1 / presymptomatic_rate, 100)
        if min(dist_exponential_np_presymptomatic) < simulation_duration:
            success = False
            outfile.write(
                "BAD: expected a small {0} to avoid PreSymptomatic to Symptomatic state transition(all PreSymptomatic "
                "state will progress to Cleared), got {1} from config.json. Please fix the test.\n"
                .format(KEY_PRESYMPTOMATIC_RATE, presymptomatic_rate))
        if base_infectivity:
            success = False
            outfile.write(
                "BAD: expected {0} = 0 to look only at progression, got {1} from config.json. Please fix"
                "the test.\n".format(KEY_BASE_INFECTIVITY, base_infectivity))
        outfile.write("conditional check result is {}.\n".format(success))

        actual_timer = []
        internal_timer = []
        outfile.write(
            "collecting the actual timestep between PreSymptomatic and Cleared:\n"
        )
        outfile.write(
            "checking if the internal timer matches the PreSymptomatic to Cleared duration:\n"
        )
        for id in output_dict:
            cleared_time = presymptomatic_time = timer = None
            if KEY_CLEARED in output_dict[id]:
                cleared_time = output_dict[id][KEY_CLEARED][0]
                timer = output_dict[id][KEY_CLEARED][1]
                internal_timer.append(timer)
            if KEY_PRESYMPTOMATIC in output_dict[id]:
                presymptomatic_time = output_dict[id][KEY_PRESYMPTOMATIC][0]
            if presymptomatic_time:
                if cleared_time:  # some individual may not move to cleared state at the end of the simulation
                    actual_timer.append(cleared_time - presymptomatic_time)
                    if cleared_time - presymptomatic_time != math.ceil(timer):
                        success = False
                        outfile.write(
                            "BAD: individual {0} has internal timer = {1} but the actual timer is {2} (enter "
                            "PreSymptomatic state at timestep {3}, moved to Cleared state at "
                            "timestep {4}).\n".format(
                                id, timer, cleared_time - presymptomatic_time,
                                presymptomatic_time, cleared_time))
                else:
                    outfile.write(
                        "Individual {0} moved to PreSymptomatic state at timestep {1} and is not cleared yet at the "
                        "end of simulation (duration = {2})."
                        "\n".format(id, presymptomatic_time,
                                    simulation_duration))
            else:
                success = False
                outfile.write(
                    "BAD: individual {0} moved to cleared state at timerstep {1} before entering "
                    "PreSymptomatic state.\n".format(id, cleared_time))

        if not len(actual_timer):
            success = False
            outfile.write(
                "BAD: There is no PreSymptomatic to cleared transition in this test, please fix the test.\n"
            )

        outfile.write(
            "Running ks test for PreSymptomatic to cleared internal timer and numpy exponential distribution: \n"
        )
        size = len(internal_timer)
        scale = 1.0 / presymptomatic_cure_rate
        dist_exponential_np = np.random.exponential(scale, size)
        dtk_sft.plot_data_sorted(
            internal_timer,
            dist2=np.array(dist_exponential_np),
            label1="PreSymptomatic to cleared duration",
            label2="numpy exponential",
            title="exponential rate = {}".format(presymptomatic_cure_rate),
            xlabel="data point",
            ylabel="PreSymptomatic to cleared duration",
            category='PreSymptomatic_to_cleared_duration',
            show=True,
            line=True,
            overlap=True)
        result = dtk_sft.test_exponential(internal_timer,
                                          p1=presymptomatic_cure_rate,
                                          report_file=outfile,
                                          integers=False,
                                          roundup=False,
                                          round_nearest=False)
        outfile.write(
            "ks test result is {0}, exponential rate = {1}, # of data point = {2}.\n"
            .format(result, presymptomatic_cure_rate, size))
        if not result:
            success = False
            outfile.write(
                "BAD: test exponential for PreSymptomatic to cleared duration failed with {0} "
                "= {1}.\n".format(KEY_PRESYMPTOMATIC_CURE_RATE,
                                  presymptomatic_cure_rate))
        else:
            outfile.write(
                "GOOD: test exponential for PreSymptomatic to cleared duration passed with {0} "
                "= {1}.\n".format(KEY_PRESYMPTOMATIC_CURE_RATE,
                                  presymptomatic_cure_rate))

        outfile.write(dtk_sft.format_success_msg(success))
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success
Beispiel #8
0
def create_report_file(data, debug=False):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[
        2]  # this test assumes the vector is constant

    latency_data = {}
    duration_data = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "LifeCourseLatencyTimerUpdate" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                start_time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id in latency_data.keys():
                    outfile.write(
                        "Individual {} incubation timer reset at time {}. Please check. "
                        "\n".format(ind_id, start_time_stamp))
                latency_data[ind_id] = start_time_stamp
            elif "TBActivationPresymptomatic" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                end_time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id not in latency_data.keys():
                    outfile.write(
                        "Individual {} went presymptomatic without incubation timer update at time {}. "
                        "Please check. \n".format(ind_id, end_time_stamp))
                else:
                    duration = end_time_stamp - latency_data.get(ind_id)
                    duration_data[ind_id] = duration
        if debug:
            with open("DEBUG_duration_data.json", "w") as debug_outfile:
                json.dump(duration_data, debug_outfile, indent=4)
        durations = list(duration_data.values())

        if not dtk_sft.test_exponential(durations,
                                        tb_cd4_activation_vector[0],
                                        outfile,
                                        integers=True,
                                        roundup=True,
                                        round_nearest=False):
            success = False
        outfile.write("Data points checked = {}.\n".format(len(duration_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        # only used for graphing purposes
        expected_data = map(
            math.ceil,
            np.random.exponential(1 / tb_cd4_activation_vector[0],
                                  len(duration_data)))
        expected_durations = list(expected_data)
        dtk_sft.plot_data_sorted(
            durations,
            expected_durations,
            label1="Actual",
            label2="Expected",
            title="Recalculated Latency Duration TB then HIV(Sorted)",
            xlabel="Data Points",
            ylabel="Days",
            category="tb_activation_and_cd4_tb_first",
            line=True,
            overlap=True)
Beispiel #9
0
def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[
        2]  # this test assumes the vector is constant

    # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect
    # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than
    # what StartedArt distribution would extremely likely give us
    big_magic_number = 2000
    stopped_art_latency_data = []
    started_art_latency_data = []
    art_events_dict = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "has event" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                art_status = line.split(" ")[9].strip(
                    ".")  # get_val only gets digits
                art_events_dict[ind_id] = art_status
            if "LifeCourseLatencyTimerUpdate" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                new_incubation_timer = float(dtk_sft.get_val("timer ", line))
                if ind_id in art_events_dict.keys():
                    if art_events_dict.get(ind_id) == "StartedART":
                        started_art_latency_data.append(new_incubation_timer)
                    else:
                        stopped_art_latency_data.append(new_incubation_timer)
                    art_events_dict.pop(ind_id)
                else:
                    success = False
                    outfile.write(
                        "BAD: No art-related event found in the logs for this timer update for Individual {},"
                        " at time {}.\n".format(
                            ind_id, int(dtk_sft.get_val("time= ", line))))
        # we want the stopped art latency data to NOT match the started art latency data
        # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector
        if dtk_sft.test_exponential(stopped_art_latency_data,
                                    tb_cd4_activation_vector[2],
                                    integers=False,
                                    roundup=False,
                                    round_nearest=False):
            outfile.write(
                "BAD: The StoppedArt latency data distribution matches the StartedArt latency data"
                " distribution, but shouldn't.\n")
            success = False
        expected_stopped_art_data = np.random.exponential(
            1 / tb_cd4_activation_vector[0], len(stopped_art_latency_data))
        small_duration_count = 0
        for duration in stopped_art_latency_data:
            if duration < big_magic_number:
                small_duration_count += 1
        proportion_small = small_duration_count / float(
            len(stopped_art_latency_data))
        if proportion_small > 0.01:
            outfile.write(
                "BAD: More than 0.5% of our durations are suspiciously small, it is {}. "
                "Please Investigate.\n".format(proportion_small))
            success = False
        outfile.write("Data points checked = {}.\n".format(
            len(stopped_art_latency_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        dtk_sft.plot_data(
            sorted(stopped_art_latency_data),
            sorted(expected_stopped_art_data),
            label1="Actual",
            label2="Expected",
            title=
            "StoppedART Latency data should have a similar shape/scale of duration but will not "
            "match",
            xlabel="Data Points",
            ylabel="Days",
            category="tb_activation_and_cd4_hiv_first_on_art_off_art",
            line=True,
            overlap=True)
Beispiel #10
0
def create_report_file(param_obj, output_dict, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        slow_progressor_rate = param_obj[KEY_SLOW_PROGRESSOR_RATE]
        fast_progressor_rate = param_obj[KEY_FAST_PROGRESSOR_RATE]
        child_fast_fraction = param_obj[KEY_CHILD_FRACTION]
        adult_fast_fraction = param_obj[KEY_ADULT_FRACTION]
        progression_multiplier = np.mean(param_obj[KEY_CD4_PROGRESSION_MULTIPLIER])
        simulation_duration = param_obj[KEY_DURATION]
        if not len(output_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)

        outfile.write("checking test conditions: \n")
        if child_fast_fraction:
            success = False
            outfile.write("BAD: expected {0} = 0, got {1} from config.json. "
                          "Please fix the test.\n".format(KEY_CHILD_FRACTION, child_fast_fraction))
        dist_exponential_np_slow = np.random.exponential(1/slow_progressor_rate, 100)
        if min(dist_exponential_np_slow) < simulation_duration:
            success = False
            outfile.write("BAD: expected a small {0} to distinguish fast and slow progress TB, got {1} from config.json. Please "
                          "fix the test.\n".format(KEY_SLOW_PROGRESSOR_RATE, slow_progressor_rate))
        outfile.write("conditional check result is {}.\n".format(success))

        actual_timer = []
        internal_timer = []
        slow_count = 0
        outfile.write("collecting the actual timestep between latent and presymptomatic:\n")
        outfile.write("checking if the internal timer matches the PreSymptomatic to Cleared duration:\n")

        for id in output_dict:
            presymptomatic_time = timer = latent_time = None
            if KEY_PRESYMPTOMATIC in output_dict[id]:
                presymptomatic_time = output_dict[id][KEY_PRESYMPTOMATIC][0]
                timer = output_dict[id][KEY_PRESYMPTOMATIC][1]
                internal_timer.append(timer)
            if KEY_LATENT in output_dict[id]:
                latent_time = output_dict[id][KEY_LATENT]
            if latent_time:
                if presymptomatic_time: # some individual may not move to presymptomatic state at the end of the simulation
                    actual_timer.append(presymptomatic_time - latent_time)
                    if presymptomatic_time - latent_time != math.ceil(timer):
                        success = False
                        outfile.write("BAD: individual {0} has internal timer = {1} but the actual timer is {2} (enter "
                                      "latent state at timestep {3}, enter presymptomatic active state at "
                                      "timestep {4}).\n".format(id, timer, presymptomatic_time - latent_time,
                                                                latent_time, presymptomatic_time))
                else:
                    slow_count += 1
                    if debug:
                        outfile.write("Individual {0} moved to latent state at timestep {1} and is not move to "
                                      "presymptomatic active yet at the end of simulation (duration = {2})."
                                      "\n".format(id, latent_time, simulation_duration))
            else:
                success = False
                outfile.write("BAD: individual {0} moved to presymptomatic active state at timerstep {1} before entering "
                              "latent state.\n".format(id, presymptomatic_time))
        if not len(actual_timer):
            success = False
            outfile.write("BAD: There is no latent to presymptomatic state transition in this test, please fix the test.\n")

        outfile.write("Running ks test for latent to presymptomatic internal timer and numpy exponential distribution: \n")
        size = len(internal_timer)
        scale = 1.0 / fast_progressor_rate
        dist_exponential_np = np.random.exponential(scale, size)
        dtk_sft.plot_data_sorted(internal_timer, dist2=np.array(dist_exponential_np), label1="internal timer",
                          label2="numpy exponential", title="exponential rate = {}".format(fast_progressor_rate),
                          xlabel="data point", ylabel="latent to presymptomatic internal timer",
                          category='latent_to_presymptomatic_internal_timer', show=True, line=False, overlap=True)
        result = dtk_sft.test_exponential(internal_timer, p1=fast_progressor_rate, report_file=outfile, integers=True, roundup=True,
                                          round_nearest=False)
        outfile.write("ks test result is {0}, exponential rate = {1}, # of data point = {2}.\n".format(result, fast_progressor_rate, size))
        if not result:
            success = False
            outfile.write("BAD: test exponential for latent to presymptomatic duration failed with fast_progressor_rate "
                          "= {}.\n".format(fast_progressor_rate))
        else:
            outfile.write(
                "GOOD: test exponential for latent to presymptomatic duration passed with fast_progressor_rate "
                "= {}.\n".format(fast_progressor_rate))

        outfile.write("running binomial test with 95% confidence for Fast_Progressor_Fraction_Adult:\n")
        result2 = dtk_sft.test_binomial_95ci(num_success=len(internal_timer), num_trials=len(internal_timer) + slow_count,
                                             prob=adult_fast_fraction * progression_multiplier, report_file=outfile,
                                             category="Fast_Progressor_Fraction_Adult")
        outfile.write("number of slow progressor is {0} and number of fast progressor is {1}.\n".format(slow_count, len(internal_timer)))
        if not result2:
            success = False
            outfile.write("BAD: binomial test for Fast_Progressor_Fraction_Adult = {0} and TB_CD4_Primary_Progression= {1} failed"
                          ".\n".format(adult_fast_fraction, progression_multiplier))
        else:
            outfile.write("GOOD: binomial test for Fast_Progressor_Fraction_Adult = {0} and TB_CD4_Primary_Progression= {1} passed"
                          ".\n".format(adult_fast_fraction, progression_multiplier))

        outfile.write(dtk_sft.format_success_msg(success))
    if debug:
        print( "SUMMARY: Success={0}\n".format(success) )
    return success
Beispiel #11
0
def create_report_file(param_obj, output_dict, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        slow_progressor_rate = param_obj[KEY_SLOW_PROGRESSOR_RATE]
        simulation_duration = param_obj[KEY_DURATION]
        if not len(output_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)
        actual_timer = []
        internal_timer = []
        outfile.write("collecting the actual timestep between latent and presymptomatic:\n")
        outfile.write("checking if the internal timer matches the PreSymptomatic to Cleared duration:\n")
        for id in output_dict:
            presymptomatic_time = timer = latent_time = None
            if KEY_PRESYMPTOMATIC in output_dict[id]:
                presymptomatic_time = output_dict[id][KEY_PRESYMPTOMATIC][0]
                timer = output_dict[id][KEY_PRESYMPTOMATIC][1]
                internal_timer.append(timer)
            if KEY_LATENT in output_dict[id]:
                latent_time = output_dict[id][KEY_LATENT]
            if latent_time:
                if presymptomatic_time: # some individual may not move to presymptomatic state at the end of the simulation
                    actual_timer.append(presymptomatic_time - latent_time)
                    if presymptomatic_time - latent_time != math.ceil(timer):
                        success = False
                        outfile.write("BAD: individual {0} has internal timer = {1} but the actual timer is {2} (enter "
                                      "latent state at timestep {3}, enter presymptomatic active state at "
                                      "timestep {4}).\n".format(id, timer, presymptomatic_time - latent_time,
                                                                latent_time, presymptomatic_time))
                else:
                    outfile.write("Individual {0} moved to latent state at timestep {1} and is not move to "
                                  "presymptomatic active yet at the end of simulation (duration = {2})."
                                  "\n".format(id, latent_time, simulation_duration))
            else:
                success = False
                outfile.write("BAD: individual {0} moved to presymptomatic active state at timerstep {1} before entering "
                              "latent state.\n".format(id, presymptomatic_time))
        if not len(actual_timer):
            success = False
            outfile.write("BAD: There is no latent to presymptomatic state transition in this test, please fix the test.\n")

        outfile.write("Running ks test for latent to presymptomatic internal timer and numpy exponential distribution: \n")
        size = len(internal_timer)
        scale = 1.0 / slow_progressor_rate
        dist_exponential_np = np.random.exponential(scale, size)
        dtk_sft.plot_data_sorted(internal_timer, dist2=np.array(dist_exponential_np), label1="latent to presymptomatic internal timer",
                          label2="numpy exponential", title="exponential rate = {}".format(slow_progressor_rate),
                          xlabel="data point", ylabel="latent to presymptomatic internal timer",
                          category='latent_to_presymptomatic_internal_timer', show=True, line=False, overlap=True)
        result = dtk_sft.test_exponential(internal_timer, p1=slow_progressor_rate, report_file=outfile, integers=True, roundup=True,
                                          round_nearest=False)
        outfile.write("ks test result is {0}, exponential rate = {1}, # of data point = {2}.\n".format(result, slow_progressor_rate, size))
        if not result:
            success = False
            outfile.write("BAD: test exponential for latent to presymptomatic internal timer failed with slow_progressor_rate "
                          "= {}.\n".format(slow_progressor_rate))
        else:
            outfile.write("GOOD: test exponential for latent to presymptomatic internal timer passed with slow_progressor_rate "
                          "= {}.\n".format(slow_progressor_rate))

        outfile.write(dtk_sft.format_success_msg(success))
    if debug:
        print( "SUMMARY: Success={0}\n".format(success) )
    return success
Beispiel #12
0
def create_report_file(drug_start_timestep, inactivation_times, active_count, inactivations, drug_inactivation_rate, report_name, debug = False):
    with open(report_name, "w") as outfile:
        success = True
        # ks exponential test doesn't work very well with large rate, use chi squared test instead.
        # while rate is small ks test for exponential distribution is more sensitive to catch the difference
        if drug_inactivation_rate < 0.1:
            outfile.write( "Testing inactivation times as draws from exponential distrib with rate {0}. "
                           "Dataset size = {1}.\n".format( drug_inactivation_rate, len( inactivation_times ) ) )
            success = dtk_sft.test_exponential( inactivation_times, drug_inactivation_rate, outfile, integers=True,
                                                roundup=True, round_nearest=False )
            if not success:
                outfile.write("BAD: ks test for rate {} is False.\n".format(drug_inactivation_rate))
            size = len(inactivation_times)
            scale = 1.0 / drug_inactivation_rate
            dist_exponential_np = numpy.random.exponential(scale, size)
            dist_exponential_np = [math.ceil(x) for x in dist_exponential_np]
            dtk_sft.plot_data_sorted(inactivation_times, dist_exponential_np,
                              label1="test times", label2="numpy data",
                              title="inactivation_times_actual_vs_numpy",
                              xlabel="data points", ylabel="Inactivation times",
                              category="inactivation_times", show = True, line = True, overlap=True)
            dtk_sft.plot_cdf(inactivation_times, dist_exponential_np,
                             label1="test times", label2="numpy data",
                             title="inactivation_times_cdf",
                             xlabel="days", ylabel="probability",
                             category="inactivation_times_cdf", show = True)
            dtk_sft.plot_probability(inactivation_times, dist_exponential_np,
                                     label1="test times", label2="numpy data",
                                     title="inactivation_times_pdf",
                                     xlabel="days", ylabel="probability",
                                     category="inactivation_times_pdf", show = True)
        else:
            outfile.write("Testing inactivation count per day with rate {0}. \n".format( drug_inactivation_rate) )
            expected_inactivation = []
            for t in range( len(inactivations)):
                if t < drug_start_timestep :
                    if inactivations[t] > 0:
                        success = False
                        outfile.write("BAD: expected no inactivations on drugs before day {0}, get {1} cases at timestep {2}.\n"
                                      "".format(drug_start_timestep , inactivations[t], t))
                elif active_count[t] > 0:
                    expected_inactivation.append(drug_inactivation_rate * active_count[t])
            if len(inactivations) <= len(expected_inactivation) + drug_start_timestep:
                test_inactivation_dates = inactivations[drug_start_timestep+1:]
                expected_inactivation = expected_inactivation[:len(test_inactivation_dates)]
            else:
                test_inactivation_dates = inactivations[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_inactivation)]
            #print (len(inactivations), len(test_inactivation_dates), len(expected_inactivation))
            #print (test_inactivation_dates, expected_inactivation)
            dtk_sft.plot_data(test_inactivation_dates, expected_inactivation,
                                     label1="actual inactivation", label2="expected inactivation",
                                     title="inactivation per day",
                                     xlabel="date after drug start day", ylabel="inactivation per day",
                                     category="inactivation_counts", show=True, line=True, overlap=True, sort=False)

            chi_result = dtk_sft.test_multinomial(dist=test_inactivation_dates, proportions=expected_inactivation,
                                                  report_file=outfile, prob_flag=False)
            if not chi_result:
                success = False
                outfile.write("BAD: Chi-squared test reuslt is False.\n")
        outfile.write(dtk_sft.format_success_msg(success))
        if debug:
            print(dtk_sft.format_success_msg(success))
        return success
Beispiel #13
0
def create_report_file(param_obj, output_dict, report_name, debug):
    with open(report_name, "w") as outfile:
        config_name = param_obj[KEY_CONFIG_NAME]
        outfile.write("Config_name = {}\n".format(config_name))
        success = True
        cure_rate = param_obj[KEY_CURE_RATE]
        simulation_duration = param_obj[KEY_DURATION]
        if not len(output_dict):
            success = False
            outfile.write(dtk_sft.sft_no_test_data)
        cure_timer = []
        actual_timer = []
        outfile.write(
            "Checking the timer and actual timestep between active and cure:\n"
        )
        outfile.write(
            "checking if the internal timer matches the PreSymptomatic to Cleared duration:\n"
        )

        for id in output_dict:
            cure_time = timer = active_time = None
            if KEY_CLEARED in output_dict[id]:
                cure_time = output_dict[id][KEY_CLEARED][0]
                timer = output_dict[id][KEY_CLEARED][1]
                cure_timer.append(timer)
            if KEY_SYMPTOMATIC in output_dict[id]:
                active_time = output_dict[id][KEY_SYMPTOMATIC][0]
            if active_time:
                if cure_time:  # some individual may not be cleared at the end of the simulation
                    actual_timer.append(cure_time - active_time)
                    if cure_time - active_time != math.ceil(timer):
                        success = False
                        outfile.write(
                            "BAD: individual {0} has cure timer = {1} but the actual cure time is {2} (enter "
                            "symptomatic active state at timestep {3}, cleared at timestep {4}).\n"
                            .format(id, timer, cure_time - active_time,
                                    active_time, cure_time))
                else:
                    outfile.write(
                        "Individual {0} moved to symptomatic active at timestep {1} and is not cleared yet at "
                        "the end of simulation (duration = {2}).\n".format(
                            id, active_time, simulation_duration))
            else:
                success = False
                outfile.write(
                    "BAD: individual {0} is cleared before entering active symptomatic state.\n"
                    .format(id))
        if not len(actual_timer):
            success = False
            outfile.write(
                "BAD: There is no recovered individual in this test, please fix the test.\n"
            )
        outfile.write(
            "Result is {0}. # of recovered individual = {1}\n".format(
                success, len(actual_timer)))

        outfile.write(
            "Running ks test for timer and numpy exponential distribution: \n")
        size = len(cure_timer)
        scale = 1.0 / cure_rate
        dist_exponential_np = np.random.exponential(scale, size)
        dtk_sft.plot_data_sorted(
            cure_timer,
            dist2=np.array(dist_exponential_np),
            label1="cure timer",
            label2="numpy exponential",
            title="exponential rate = {}".format(cure_rate),
            xlabel="data point",
            ylabel="cure timer",
            category='Cure_timer',
            show=True,
            line=False,
            overlap=True)
        result = dtk_sft.test_exponential(cure_timer,
                                          p1=cure_rate,
                                          report_file=outfile,
                                          integers=False,
                                          roundup=False,
                                          round_nearest=False)
        outfile.write(
            "ks test result is {0}, exponential rate = {1}, # of data point = {2}.\n"
            .format(result, cure_rate, size))
        if not result:
            success = False
            outfile.write(
                "BAD: test exponential for cure timer failed with cure rate = {}.\n"
                .format(cure_rate))

        outfile.write(dtk_sft.format_success_msg(success))
    if debug:
        print("SUMMARY: Success={0}\n".format(success))
    return success
Beispiel #14
0
def create_report_file(drug_start_timestep, disease_deaths, cum_deaths, deaths, infected_individuals, death_times, drug_mortality_rate_HIV, report_name ):
    with open(report_name, "w") as outfile:
        success = True
        length = len(cum_deaths)
        if sum(disease_deaths)==0 or sum(cum_deaths)==0 or len(death_times)==0:
            success = False
            outfile.write(dtk_sft.no_test_data)
        for x in range(length):
            if disease_deaths[x] != cum_deaths[x]:
                success = False
                outfile.write("BAD: at timestep {0}, disease deaths is {1} in InsetChart.json and {2} in stdout.txt.\n".format(x+1, disease_deaths[x], cum_deaths[x]))
        # ks exponential test doesn't work very well with large rate, use chi squared test instead
        # while rate is small ks test for exponential distribution is more sensitive to catch the difference
        if drug_mortality_rate_HIV < 0.1:
            outfile.write("Testing death times as draws from exponential distrib with rate {0}. "
                          "Dataset size = {1}.\n".format(drug_mortality_rate_HIV, len(death_times)))
            ks_result = dtk_sft.test_exponential( death_times, drug_mortality_rate_HIV, report_file = outfile,
                                                  integers=True, roundup=True, round_nearest=False )
            if not ks_result:
                success = False
                outfile.write("BAD: ks test reuslt is False.\n")
            size = len(death_times)
            scale = 1.0 / drug_mortality_rate_HIV
            dist_exponential_np = numpy.random.exponential(scale, size)
            dist_exponential_np = [math.ceil(x) for x in dist_exponential_np]
            dtk_sft.plot_data_sorted(death_times, dist_exponential_np,
                              label1="death times", label2="numpy data",
                              title="death_times_actual_vs_numpy",
                              xlabel="data points", ylabel="death times",
                              category="death_times", show=True, line = True, overlap=True)
            dtk_sft.plot_cdf(death_times, dist_exponential_np,
                             label1="death times", label2="numpy data",
                             title="death_times_cdf",
                             xlabel="days", ylabel="probability",
                             category="death_times_cdf", show=True)
        else:
            outfile.write("Testing death count per day with rate {0}. \n".format(drug_mortality_rate_HIV))
            expected_mortality = []
            for t in range( len(deaths)):
                if t < drug_start_timestep + 1:
                    if deaths[t] > 0:
                        success = False
                        outfile.write("BAD: expected no disease death on drugs before day {0}, get {1} cases at timestep {2}.\n"
                                      "".format(drug_start_timestep + 1, deaths[t], t))
                elif infected_individuals[t] > 0:
                    expected_mortality.append(drug_mortality_rate_HIV * infected_individuals[t])
            expected_mortality.pop(0) # the Infected is off by one day
            test_death_dates = deaths[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_mortality)]
            dtk_sft.plot_data(test_death_dates, expected_mortality,
                                     label1="actual death", label2="expected death",
                                     title="death per day",
                                     xlabel="date after drug start day", ylabel="death per day",
                                     category="death_counts", show=True, line=True, overlap=True, sort=False)

            chi_result = dtk_sft.test_multinomial(dist=test_death_dates, proportions=expected_mortality,
                                                  report_file=outfile, prob_flag=False)
            if not chi_result:
                success = False
                outfile.write("BAD: Chi-squared test reuslt is False.\n")

        outfile.write(dtk_sft.format_success_msg(success))
        return success