예제 #1
0
def setup_data_sets(seed):
    start = time.time()
    print("Generating training and test data")
    dist = AntibioticsDatabase(n_x=n_x, antibiotic_limit=50, seed=seed)
    training_data, test_data = dist.get_data()
    training_data = split_patients(training_data)

    print("Generating data took {:.3f} seconds".format(time.time() - start))
    return dist, training_data, test_data
예제 #2
0
 def __init__(self, seed=None):
     super().__init__(seed=seed)
     dist = AntibioticsDatabase(n_x=6, antibiotic_limit=50, seed=seed)
     training_data, test_data = dist.get_data()
     data = split_patients(training_data)
     n_x = dist.n_x
     n_a = dist.n_a
     n_y = dist.n_y
     self.doctor_approximator = DoctorApproximator(n_x, n_a, n_y, data)
     self.statistics = self.doctor_approximator.get_patient_statistics()
def plot_time_vs_effect(values, times, settings):
    plot_colors = ['k', 'r', 'b', 'g', 'm', 'c', 'y']
    plot_markers = ['s', 'v', 'P', '1', '2', '3', '4']
    plot_lines = ['-', '--', ':', '-.']

    setup_algorithms = settings.setup_algorithms
    starting_seed, n_data_sets, n_deltas, file_name_prefix = settings.load_settings(
    )
    dist = AntibioticsDatabase(AntibioticsDeltaSweepSettings.n_x,
                               50,
                               seed=10342)
    training_data, test_data = dist.get_data()
    training_data = split_patients(training_data)
    algs = setup_algorithms(training_data, dist, 0)
    n_algorithms = len(algs)

    values_mean = np.sum(values, 0) / n_data_sets
    times_mean = np.sum(times, 0) / n_data_sets

    zipped_mean = np.zeros((n_algorithms, 2, n_deltas))
    for i_alg in range(n_algorithms):
        zipped_mean[i_alg][0] = times_mean[:, i_alg]
        zipped_mean[i_alg][1] = values_mean[:, i_alg]

    fig, ax1 = plt.subplots(figsize=(6, 4))
    plt.rcParams["font.family"] = "serif"
    for i_alg in range(n_algorithms):
        ax1.plot(zipped_mean[i_alg, 0],
                 zipped_mean[i_alg, 1],
                 plot_colors[i_alg] + plot_markers[i_alg] + plot_lines[0],
                 label='{}'.format(algs[i_alg].label))
    ax1.invert_xaxis()
    ax1.legend()
    plt.xlabel("Mean time")
    plt.ylabel("Efficacy")
    ax1.grid(True)
    plt.savefig("saved_values/" + file_name_prefix + "_time_vs_effect4.pdf")
예제 #4
0
from Database.sql_cocktail_statistics import get_antibiotcsevents
from Database.antibioticsdatabase import AntibioticsDatabase
import matplotlib.pyplot as plt
import numpy as np

database = AntibioticsDatabase()
database.cur.execute(get_antibiotcsevents)
data = database.cur.fetchall()
datapoints = {}
prev_hadm_id = 0
for chartevent in data:
    hadm_id = chartevent[0]
    label = chartevent[1]
    start_time = chartevent[2]
    if hadm_id in datapoints:
        datapoints[hadm_id].append([label, start_time])
    else:
        datapoints[hadm_id] = [[label, start_time]]

times = []
for hadm_id, value in datapoints.items():
    for i, entry in enumerate(value):
        label = entry[0]
        time = entry[1]
        if i != 0:
            if label != prev_label:
                diff_time = time - prev_time
                minutes_diff_time = diff_time.seconds/60
                times.append(minutes_diff_time)
        prev_label = label
        prev_time = time
예제 #5
0
    res.append(max_mean_treatment_effects / n_test_samples)
    res.append(mean_times / n_test_samples)

    return res


if __name__ == '__main__':
    settings = get_settings()

    # Settings
    plot_var = False
    starting_seed, n_data_sets, delta, file_name_prefix = settings.load_settings(
    )

    # Quick hack to get n_algorithms
    tmp_dist = AntibioticsDatabase(AntibioticsSettings.n_x, 50, seed=10342)
    training_data, test_data = tmp_dist.get_data()
    training_data = training_data
    n_x = tmp_dist.n_x
    n_a = tmp_dist.n_a
    n_y = tmp_dist.n_y
    algs = settings.setup_algorithms(tmp_dist, training_data, n_x, n_a, n_y,
                                     delta)
    n_algorithms = len(algs)

    values = np.zeros((n_data_sets, n_algorithms, n_a))
    times = np.zeros((n_data_sets, n_algorithms))

    main_start = time.time()
    pool = Pool(processes=n_data_sets)
    results = []
def plot_data(values, times, settings, plot_var=False):

    plot_colors = ['k', 'r', 'b', 'g', 'm', 'c', 'y']
    plot_markers = ['s', 'v', 'P', '1', '2', '3', '4']
    plot_lines = ['-', '--', ':', '-.'] + [(i, (1, 4, i, i^2)) for i in range(0, 4)]

    # Extract settings
    load_settings = settings.load_settings
    setup_algorithms = settings.setup_algorithms
    starting_seed, n_data_sets, delta, file_name_prefix = load_settings()
    dist = AntibioticsDatabase(AntibioticsSettings.n_x, 50, seed=10342)
    training_data, test_data = dist.get_data()
    training_data = split_patients(training_data)
    n_x = dist.n_x
    n_a = dist.n_a
    n_y = dist.n_y
    algs = setup_algorithms(dist, training_data, n_x, n_a, n_y, delta)

    n_algorithms = len(algs)

    values_mean = np.sum(values, 0) / n_data_sets
    times_mean = np.sum(times, 0) / n_data_sets

    tmp = algs[-1]
    algs[-1] = algs[-2]
    algs[-2] = tmp

    tmp = values_mean[-1].copy()
    values_mean[-1] = values_mean[-2]
    values_mean[-2] = tmp

    tmp = times_mean[-1].copy()
    times_mean[-1] = times_mean[-2]
    times_mean[-2] = tmp
    '''
    values_var = np.zeros(n_algorithms)
    times_var = np.zeros(n_algorithms)
    for i_alg in range(n_algorithms):
        v_var = 0
        t_var = 0
        for i_data_set in range(n_data_sets):
            v_var += (values_mean[i_alg] - values[i_data_set][i_alg])**2
            t_var += (times_mean[i_alg] - times[i_data_set][i_alg])**2
        values_var[i_alg] = v_var / (n_data_sets - 1)
        times_var[i_alg] = t_var / (n_data_sets - 1)
    '''

    x = np.arange(0, n_a)
    x_ticks = list(np.arange(1, n_a + 1))
    plt.figure()
    plt.title(r'Treatment effect. $\delta$: {}'.format(delta))
    plt.ylabel('Mean treatment effect')
    plt.xlabel('Number of tried treatments')
    average_max_treatment_effect = sum([max(data[-1]) for data in test_data]) / len(test_data)
    mean_lines = np.linspace(0, 1, n_algorithms)
    algs[-3].label = "NDP_F"
    algs[-2].label = "Emulated doctor"
    for i_alg in range(n_algorithms):
        if algs[i_alg].name != "Doctor":
            plt.plot(x, values_mean[i_alg],
                     plot_markers[i_alg] + plot_colors[i_alg], linestyle=plot_lines[i_alg % len(plot_lines)], label=algs[i_alg].label)
            #plt.plot(x, values_mean[i_alg], plot_colors[i_alg], linestyle='-',
            #         alpha=0.3)
            # plt.plot(x, mean_treatment_effects[i_plot], plot_markers[i_plot] + plot_colors[i_plot] + plot_lines[1])
            # plt.fill_between(x, mean_treatment_effects[i_plot], max_mean_treatment_effects[i_plot], color=plot_colors[i_plot], alpha=0.1)
            plt.axvline(times_mean[i_alg] - 1, ymin=mean_lines[i_alg], ymax=mean_lines[i_alg + 1],
                        color=plot_colors[i_alg])
            plt.axvline(times_mean[i_alg] - 1, ymin=0, ymax=1,
                        color=plot_colors[i_alg], alpha=0.1)
        else:
            plt.plot(0, values_mean[i_alg][0],
                     plot_markers[i_alg] + plot_colors[i_alg], markersize=20, linestyle=plot_lines[i_alg % len(plot_lines)],
                     linewidth=4, label=algs[i_alg].label)
    #plt.rcParams["text.usetex"] = True
    plt.rcParams["font.family"] = "serif"
    plt.grid(True)
    plt.xticks(x, x_ticks)
    plt.plot(x, np.ones(len(x)) * average_max_treatment_effect, linestyle=plot_lines[-1], label='MAX_POSS_AVG')

    plt.legend(loc='lower right')
    plt.savefig("saved_values/" + file_name_prefix + "_plotNew.pdf")

    # Plot mean number of treatments tried
    plt.figure()
    plt.title('Search time')
    plt.ylabel('Mean number of treatments tried')
    plt.xlabel('Policy')
    x_bars = []
    for i_alg, alg in enumerate(algs):
        x_bars.append(alg.name)
    x_bars = [label.replace(" ", '\n') for label in x_bars]
    rects = plt.bar(x_bars, times_mean)
    for rect in rects:
        h = rect.get_height()
        plt.text(rect.get_x() + rect.get_width() / 2., 0.90 * h, "%f" % h, ha="center", va="bottom")
    plt.show()
예제 #7
0
        plot_mean_treatment_effect, plot_treatment_efficiency,
        plot_delta_efficiency, plot_search_time, plot_strictly_better
    ]
    main_start = time.time()

    # Generate the data
    # dist = DiscreteDistribution(n_z, n_x, n_a, n_y, seed=seed, outcome_sensitivity_x_z=1)
    #dist = DiscreteDistributionWithSmoothOutcomes(n_z, n_x, n_a, n_y, seed=seed, outcome_sensitivity_x_z=1)
    # dist = DiscreteDistributionWithInformation(n_z, n_x, n_a, n_y, seed=seed)
    '''
    dist.print_moderator_statistics()
    dist.print_covariate_statistics()
    dist.print_treatment_statistics()
    dist.print_detailed_treatment_statistics()
    '''
    dist = AntibioticsDatabase(n_x=1, antibiotic_limit=50, seed=seed)
    '''
    dist = NewDistribution(seed=seed)
    #dist = NewDistributionSlightlyRandom(seed=seed)
    n_x = 1
    n_a = 3
    n_y = 3
    '''
    '''
    dist = FredrikDistribution()
    n_x = 1
    n_a = 3
    n_y = 2
    '''

    if type(dist) != AntibioticsDatabase:
def plot_sweep_delta(values, times, settings, plot_var=False, split_plot=True):
    plot_colors = ['k', 'r', 'b', 'g', 'm', 'c', 'y']
    plot_markers = ['s', 'v', 'P', '1', '2', '3', '4']
    plot_lines = ['-', '--', ':', '-.', '-', '--', ':']

    # Extract settings
    load_settings = settings.load_settings
    setup_algorithms = settings.setup_algorithms
    starting_seed, n_data_sets, n_deltas, file_name_prefix = load_settings()
    dist = AntibioticsDatabase(AntibioticsDeltaSweepSettings.n_x,
                               50,
                               seed=10342)
    training_data, test_data = dist.get_data()
    training_data = split_patients(training_data)
    algs = setup_algorithms(training_data, dist, 0)
    n_algorithms = len(algs)
    deltas = np.linspace(0.0, 1.0, n_deltas)

    values_mean = np.sum(values, 0) / n_data_sets
    times_mean = np.sum(times, 0) / n_data_sets
    values_var = np.zeros((n_deltas, n_algorithms))
    times_var = np.zeros((n_deltas, n_algorithms))
    for i_delta in range(n_deltas):
        for i_alg in range(n_algorithms):
            v_var = 0
            t_var = 0
            for i_data_set in range(n_data_sets):
                v_var += (values_mean[i_delta][i_alg] -
                          values[i_data_set][i_delta][i_alg])**2
                t_var += (times_mean[i_delta][i_alg] -
                          times[i_data_set][i_delta][i_alg])**2
            values_var[i_delta][i_alg] = v_var / (n_data_sets - 1)
            times_var[i_delta][i_alg] = t_var / (n_data_sets - 1)

    # Plot mean treatment effect vs delta
    if not split_plot:
        fig, ax1 = plt.subplots(figsize=(6, 4))
        ax2 = ax1.twinx()
    else:
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6, 10))
    ax1.set_title(r'Mean treatment effect/mean search time vs $\delta$')
    ax1.set_xlabel(r'$\delta$')
    ax2.set_xlabel(r'$\delta$')
    ax1.set_ylabel('Efficacy')
    ax2.set_ylabel('Mean search time')
    lns = []
    for i_alg in range(n_algorithms):
        ln1 = ax1.plot(deltas,
                       values_mean[:, i_alg],
                       plot_colors[i_alg] + plot_markers[i_alg] +
                       plot_lines[0],
                       label='{} {}'.format(algs[i_alg].label, 'effect'),
                       markevery=3)
        ln2 = ax2.plot(deltas,
                       times_mean[:, i_alg],
                       plot_colors[i_alg] + plot_markers[i_alg] +
                       plot_lines[1],
                       label='{} {}'.format(algs[i_alg].label, 'time'),
                       markevery=3)
        lns.append(ln1)
        lns.append(ln2)
        if plot_var:
            ln1v = ax1.fill_between(
                deltas,
                values_mean[:, i_alg] - values_var[:, i_alg],
                values_mean[:, i_alg] + values_var[:, i_alg],
                facecolor=plot_colors[i_alg],
                alpha=0.3)
            ln2v = ax2.fill_between(deltas,
                                    times_mean[:, i_alg] - times_var[:, i_alg],
                                    times_mean[:, i_alg] + times_var[:, i_alg],
                                    facecolor=plot_colors[i_alg],
                                    alpha=0.3)
            lns.append(ln1v)
            lns.append(ln2v)
    ax1.grid(True)
    ax2.grid(True)
    plt.rcParams["font.family"] = "serif"
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1, labels1, loc='upper right')
    ax2.legend(lines2, labels2, loc='lower left')
    plt.savefig("saved_values/" + file_name_prefix + "_plot2.png")