def plot_time_vs_effect(values, times, settings): plot_colors = ['k', 'r', 'b', 'g', 'm', 'c', 'y'] plot_markers = ['s', 'v', 'P', '1', '2', '3', '4'] plot_lines = ['-', '--', ':', '-.'] starting_seed, n_data_sets, n_deltas, n_z, n_x, n_a, n_y, n_training_samples, n_test_samples, file_name_prefix = settings.load_settings( ) tmp_dist = DiscreteDistributionWithSmoothOutcomes(3, 1, 5, 3) algs = settings.setup_algorithms( split_patients(generate_data(tmp_dist, 10)), tmp_dist, 0.1) n_algorithms = len(algs) values_mean = np.sum(values, 0) / n_data_sets times_mean = np.sum(times, 0) / n_data_sets zipped_mean = np.zeros((n_algorithms, 2, n_deltas)) for i_alg in range(n_algorithms): zipped_mean[i_alg][0] = times_mean[:, i_alg] zipped_mean[i_alg][1] = values_mean[:, i_alg] fig, ax1 = plt.subplots(figsize=(6, 4)) plt.rcParams["font.family"] = "serif" for i_alg in range(n_algorithms): ax1.plot(zipped_mean[i_alg, 0], zipped_mean[i_alg, 1], plot_colors[i_alg] + plot_markers[i_alg] + plot_lines[i_alg], label='{}'.format(algs[i_alg].label), markevery=3) ax1.invert_xaxis() ax1.legend() plt.xlabel("Mean time") plt.ylabel("Efficacy") ax1.grid(True) plt.savefig("saved_values/" + file_name_prefix + "_time_vs_effect.pdf")
def setup_data_sets(n_z, n_x, n_a, n_y, n_training_samples, n_test_samples, seed): start = time.time() print("Generating training and test data") dist = DiscreteDistributionWithSmoothOutcomes(n_z, n_x, n_a, n_y, seed=seed) training_data = split_patients(generate_data(dist, n_training_samples)) test_data = generate_test_data(dist, n_test_samples) print("Generating data took {:.3f} seconds".format(time.time() - start)) return dist, training_data, test_data
def plot_sweep_data(values, times, settings, plot_var=False, split_plot=True): plot_colors = ['k', 'r', 'b', 'g', 'm', 'c', 'y'] plot_markers = ['s', 'v', 'P', '1', '2', '3', '4'] plot_lines = ['-', '--', ':', '-.', '-', '--'] load_settings = settings.load_settings setup_algorithms = settings.setup_algorithms starting_seed, n_data_sets, delta, n_data_set_sizes, n_z, n_x, n_a, n_y, n_training_samples_max, n_test_samples, file_name_prefix = load_settings( ) tmp_dist = DiscreteDistributionWithSmoothOutcomes(3, 1, 5, 3) algs = setup_algorithms(split_patients(generate_data(tmp_dist, 10)), tmp_dist, 0.1) file_name_prefix = file_name_prefix n_algorithms = len(algs) n_training_samples_array = np.geomspace(10, n_training_samples_max, n_data_set_sizes).astype(int) values_mean = np.sum(values, 0) / n_data_sets times_mean = np.sum(times, 0) / n_data_sets values_var = np.zeros((n_data_set_sizes, n_algorithms)) times_var = np.zeros((n_data_set_sizes, n_algorithms)) for i_size in range(n_data_set_sizes): for i_alg in range(n_algorithms): v_var = 0 t_var = 0 for i_data_set in range(n_data_sets): v_var += (values_mean[i_size][i_alg] - values[i_data_set][i_size][i_alg])**2 t_var += (times_mean[i_size][i_alg] - times[i_data_set][i_size][i_alg])**2 values_var[i_size][i_alg] = v_var / (n_data_sets - 1) times_var[i_size][i_alg] = t_var / (n_data_sets - 1) # Plot mean treatment effect vs delta if not split_plot: fig, ax1 = plt.subplots(figsize=(6, 4)) ax2 = ax1.twinx() else: fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6, 10)) ax1.set_title( 'Mean treatment value/Mean search time vs data set size (delta: {})'. format(delta)) ax1.set_xlabel('Data set size') ax2.set_xlabel('Data set size') ax1.set_ylabel('Mean treatment value') ax2.set_ylabel('Mean search time') lns = [] for i_alg in range(n_algorithms): ln1 = ax1.plot(n_training_samples_array, values_mean[:, i_alg], plot_colors[i_alg] + plot_markers[i_alg] + plot_lines[i_alg], label='{} {}'.format(algs[i_alg].label, 'effect'), markevery=3) ln2 = ax2.plot(n_training_samples_array, times_mean[:, i_alg], plot_colors[i_alg] + plot_markers[i_alg] + plot_lines[i_alg], label='{} {}'.format(algs[i_alg].label, 'time'), markevery=3) lns.append(ln1) lns.append(ln2) if plot_var: ln1v = ax1.fill_between( n_training_samples_array, values_mean[:, i_alg] - values_var[:, i_alg], values_mean[:, i_alg] + values_var[:, i_alg], facecolor=plot_colors[i_alg], alpha=0.3) ln2v = ax2.fill_between(n_training_samples_array, times_mean[:, i_alg] - times_var[:, i_alg], times_mean[:, i_alg] + times_var[:, i_alg], facecolor=plot_colors[i_alg], alpha=0.3) lns.append(ln1v) lns.append(ln2v) ax1.grid(True) ax2.grid(True) plt.rcParams["font.family"] = "serif" lines1, labels1 = ax1.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() ax1.legend(lines1, labels1, loc='lower right') ax2.legend(lines2, labels2, loc='upper right') ax1.set_xscale('log') ax2.set_xscale('log') plt.savefig("saved_values/" + file_name_prefix + "_plot.pdf")
def plot_sweep_delta(values, times, settings, plot_var=False, split_plot=True): plot_colors = ['k', 'r', 'b', 'g', 'm', 'c', 'y'] plot_markers = ['s', 'v', 'P', '1', '2', '3', '4'] plot_lines = ['-', '--', ':', '-.', '-', '--', ':'] # Extract settings load_settings = settings.load_settings setup_algorithms = settings.setup_algorithms starting_seed, n_data_sets, n_deltas, n_z, n_x, n_a, n_y, n_training_samples, n_test_samples, file_name_prefix = load_settings( ) tmp_dist = DiscreteDistributionWithSmoothOutcomes(3, 5, 5, 3) algs = setup_algorithms(split_patients(generate_data(tmp_dist, 10)), tmp_dist, 0.1) n_algorithms = len(algs) deltas = np.linspace(0.0, 1.0, n_deltas) values_mean = np.sum(values, 0) / n_data_sets times_mean = np.sum(times, 0) / n_data_sets values_var = np.zeros((n_deltas, n_algorithms)) times_var = np.zeros((n_deltas, n_algorithms)) for i_delta in range(n_deltas): for i_alg in range(n_algorithms): v_var = 0 t_var = 0 for i_data_set in range(n_data_sets): v_var += (values_mean[i_delta][i_alg] - values[i_data_set][i_delta][i_alg])**2 t_var += (times_mean[i_delta][i_alg] - times[i_data_set][i_delta][i_alg])**2 values_var[i_delta][i_alg] = v_var / (n_data_sets - 1) times_var[i_delta][i_alg] = t_var / (n_data_sets - 1) # Plot mean treatment effect vs delta fig, ax1 = plt.subplots(1, 1, figsize=(6, 5)) plt.rcParams["font.family"] = "serif" ax1.set_title(r'Mean treatment effect/mean search time vs $\delta$') ax1.set_xlabel(r'$\delta$') ax1.set_ylabel('Efficacy') lns = [] for i_alg in range(n_algorithms): ln1 = ax1.plot(deltas, values_mean[:, i_alg], plot_colors[i_alg] + plot_markers[i_alg] + plot_lines[i_alg], label='{} {}'.format(algs[i_alg].label, 'effect'), markevery=3) lns.append(ln1) if plot_var: ln1v = ax1.fill_between( deltas, values_mean[:, i_alg] - values_var[:, i_alg], values_mean[:, i_alg] + values_var[:, i_alg], facecolor=plot_colors[i_alg], alpha=0.3) lns.append(ln1v) ax1.grid(True) lines1, labels1 = ax1.get_legend_handles_labels() ax1.legend(lines1, labels1, loc='upper right') plt.savefig("saved_values/" + file_name_prefix + "_effect_plot.pdf") fig, ax2 = plt.subplots(1, 1, figsize=(6, 5)) ax2.set_xlabel(r'$\delta$') ax2.set_ylabel('Mean search time') lns = [] for i_alg in range(n_algorithms): ln2 = ax2.plot(deltas, times_mean[:, i_alg], plot_colors[i_alg] + plot_markers[i_alg] + plot_lines[i_alg], label='{} {}'.format(algs[i_alg].label, 'time'), markevery=3) lns.append(ln2) if plot_var: ln2v = ax2.fill_between(deltas, times_mean[:, i_alg] - times_var[:, i_alg], times_mean[:, i_alg] + times_var[:, i_alg], facecolor=plot_colors[i_alg], alpha=0.3) lns.append(ln2v) ax2.grid(True) lines2, labels2 = ax2.get_legend_handles_labels() ax2.legend(lines2, labels2, loc='lower left') plt.savefig("saved_values/" + file_name_prefix + "_time_plot.pdf")
n_z = 3 n_x = 1 n_a = 5 n_y = 3 n_training_samples = 500000 delta = 0.3 dist = DiscreteDistributionWithSmoothOutcomes(n_z, n_x, n_a, n_y, seed=seed, outcome_sensitivity_x_z=1) dist.print_treatment_statistics() dist.print_detailed_treatment_statistics() split_training_data = split_patients(generate_data(dist, n_training_samples)) sa = StatisticalApproximator(n_x, n_a, n_y, split_training_data, smoothing_mode='gaussian') ta = ExactApproximator(dist) print("Init constraints") csa = Constraint(n_x, n_a, n_y, approximator=sa, delta=delta) cta = Constraint(n_x, n_a, n_y, approximator=ta, delta=delta) cdp = ConstrainedDynamicProgramming(n_x, n_a, n_y, split_training_data,