def run_algorithms(problem, num_samples, algorithms, algorithm_labels, repeats=1, call=True, verbose=False): ''' ''' for i, algorithm in enumerate(algorithms): filename = dm.get_filename(algorithm) path = os.path.join(os.getcwd(), 'data', filename) try: # Try to read from the database with open(path, 'rb') as f: alg_data = pickle.load(f) except IOError: # The file doesn't exist: run the experiments for r in range(repeats): algorithm.run(num_samples) # And load the data after with open(path, 'rb') as f: alg_data = pickle.load(f) data_index = 0 array_index = 0 while array_index < repeats: print 'data entry', data_index if data_index >= len(alg_data.list_of_samples): if not call: raise Exception('Not enough data points') # Run the required additional experiments for r in range(repeats - array_index): algorithm.run(num_samples) # Reload the algorithm data with open(path, 'rb') as f: alg_data = pickle.load(f) continue samples = alg_data.list_of_samples[data_index] if len(samples) < num_samples: data_index += 1 continue array_index += 1 data_index += 1
def plot_distances(problem, num_samples, algorithms, algorithm_labels, repeats=1, call=True, verbose=False, which='Both'): ''' Retrieves results from data files and plots the results. If there are not enough data points for either the number of repeats or number of samples, additional data points are added (functions are called) The left plot is the number of samples against the error. The right plot is the number of simulation calls against the error. Arguments --------- problem : instance of ABC_Problem The problem we're trying to solve. num_samples : int The number of samples to draw. algorithms : list List of algorithms to plot the curves of. algorithm_labels : list of strings Labels for the legend of the plot. Optional Arguments ------------------ repeats : int Number of times runs are repeated. call : bool If True will do additional runs of the algorithms to obtain enough data. Default True. which : string Which of the plots to show. `Samples` `Simulations` `Both` verbose : bool If True prints iteration numbers, default False. Returns ------- ax1, ax2 : tuple The axis handles of the two created figures ''' color_cycle = ['r', '0', 'b', 'g'] fig1 = plt.figure(1) ax1 = fig1.add_subplot(111) ax1.set_color_cycle(color_cycle) ax1.set_xlabel(r'Number of samples') ax1.set_ylabel(r'Total Variation Distance') fig2 = plt.figure(2) ax2 = fig2.add_subplot(111) ax2.set_color_cycle(color_cycle) ax2.set_xlabel(r'Number of simulation calls') ax2.set_ylabel(r'Total Variation Distance') fig3 = plt.figure(3) ax3 = fig3.add_subplot(111) ax3.set_color_cycle(color_cycle) ax3.set_xlabel(r'Number of samples') ax3.set_ylabel(r'Number of simulation calls') fig4 = plt.figure(4) ax4 = fig4.add_subplot(111) ax4.set_color_cycle(color_cycle) ax4.set_xlabel(r'Number of simulation calls') ax4.set_ylabel(r'NMSE') for i, algorithm in enumerate(algorithms): dist = np.zeros((num_samples, repeats)) sim_calls = np.zeros((num_samples, repeats)) nmse = np.zeros((num_samples, repeats)) filename = dm.get_filename(algorithm) path = os.path.join(os.getcwd(), 'data', filename) try: # Try to read from the database with open(path, 'rb') as f: alg_data = pickle.load(f) except IOError: # The file doesn't exist: run the experiments for r in range(repeats): algorithm.run(num_samples) # And load the data after with open(path, 'rb') as f: alg_data = pickle.load(f) data_index = 0 array_index = 0 while array_index < repeats: print 'data entry', data_index if data_index >= len(alg_data.list_of_samples): if not call: raise Exception('Not enough data points') # Run the required additional experiments for r in range(repeats - array_index): algorithm.run(num_samples) # Reload the algorithm data with open(path, 'rb') as f: alg_data = pickle.load(f) continue samples = alg_data.list_of_samples[data_index] if len(samples) < num_samples: data_index += 1 continue samples = samples[:num_samples] sim_calls[:, array_index] = alg_data.list_of_sim_calls[ data_index][:num_samples] dist[:, array_index] = variation_distance(samples, problem) nmse[:, array_index] = NMSE_convergence(problem, samples) array_index += 1 data_index += 1 avg_dist = np.mean(dist, 1) std_dist = np.std(dist, 1) avg_sim_calls = np.mean(np.cumsum(sim_calls, 0), 1) std_sim_calls = np.std(np.cumsum(sim_calls, 0), 1) avg_nmse = np.mean(nmse, 1) std_nmse = np.std(nmse, 1) indices = np.floor(np.logspace(0, np.log10(num_samples))).astype(int) indices[0] = 0 indices[-1] = num_samples - 1 line, = ax1.plot(indices, avg_dist[indices], label=algorithm_labels[i]) ax1.fill_between(indices, avg_dist[indices] - 2 * std_dist[indices], avg_dist[indices] + 2 * std_dist[indices], color=line.get_color(), alpha=0.25) line, = ax2.plot(avg_sim_calls[indices], avg_dist[indices], label=algorithm_labels[i]) ax2.fill_between(avg_sim_calls[indices], avg_dist[indices] - 2 * std_dist[indices], avg_dist[indices] + 2 * std_dist[indices], color=line.get_color(), alpha=0.25) line, = ax3.plot(range(num_samples), avg_sim_calls, label=algorithm_labels[i]) ax3.fill_between(np.array(range(num_samples), ndmin=1), np.clip(avg_sim_calls - 2 * std_sim_calls, 1, np.inf), avg_sim_calls + 2 * std_sim_calls, color=line.get_color(), alpha=0.25) line, = ax4.plot(avg_sim_calls, avg_nmse, label=algorithm_labels[i]) ax4.fill_between(avg_sim_calls, avg_nmse - 2 * std_nmse, avg_nmse + 2 * std_nmse, color=line.get_color(), alpha=0.25) ax1.set_xscale('log') ax1.grid(True) ax1.legend(loc='best') ax2.set_xscale('log') ax2.grid(True) ax2.legend(loc='best') ax3.set_xscale('log') ax3.set_yscale('log', nonposy='clip') ax3.grid(True) ax3.legend(loc='best') ax4.set_xscale('log') ax4.set_yscale('log') ax4.grid(True) ax4.legend(loc='best') return ax1, ax2, ax3, ax4
def plot_distances(problem, num_samples, algorithms, algorithm_labels, repeats=1, call=True, verbose=False, which='Both'): ''' Retrieves results from data files and plots the results. If there are not enough data points for either the number of repeats or number of samples, additional data points are added (functions are called) The left plot is the number of samples against the error. The right plot is the number of simulation calls against the error. Arguments --------- problem : instance of ABC_Problem The problem we're trying to solve. num_samples : int The number of samples to draw. algorithms : list List of algorithms to plot the curves of. algorithm_labels : list of strings Labels for the legend of the plot. Optional Arguments ------------------ repeats : int Number of times runs are repeated. call : bool If True will do additional runs of the algorithms to obtain enough data. Default True. which : string Which of the plots to show. `Samples` `Simulations` `Both` verbose : bool If True prints iteration numbers, default False. Returns ------- ax1, ax2 : tuple The axis handles of the two created figures ''' color_cycle = ['r', '0', 'b', 'g'] fig1 = plt.figure(1) ax1 = fig1.add_subplot(111) ax1.set_color_cycle(color_cycle) ax1.set_xlabel(r'Number of samples') ax1.set_ylabel(r'Total Variation Distance') fig2 = plt.figure(2) ax2 = fig2.add_subplot(111) ax2.set_color_cycle(color_cycle) ax2.set_xlabel(r'Number of simulation calls') ax2.set_ylabel(r'Total Variation Distance') fig3 = plt.figure(3) ax3 = fig3.add_subplot(111) ax3.set_color_cycle(color_cycle) ax3.set_xlabel(r'Number of samples') ax3.set_ylabel(r'Number of simulation calls') fig4 = plt.figure(4) ax4 = fig4.add_subplot(111) ax4.set_color_cycle(color_cycle) ax4.set_xlabel(r'Number of simulation calls') ax4.set_ylabel(r'NMSE') for i, algorithm in enumerate(algorithms): dist = np.zeros((num_samples, repeats)) sim_calls = np.zeros((num_samples, repeats)) nmse = np.zeros((num_samples, repeats)) filename = dm.get_filename(algorithm) path = os.path.join(os.getcwd(), 'data', filename) try: # Try to read from the database with open(path, 'rb') as f: alg_data = pickle.load(f) except IOError: # The file doesn't exist: run the experiments for r in range(repeats): algorithm.run(num_samples) # And load the data after with open(path, 'rb') as f: alg_data = pickle.load(f) data_index = 0 array_index = 0 while array_index < repeats: print 'data entry', data_index if data_index >= len(alg_data.list_of_samples): if not call: raise Exception('Not enough data points') # Run the required additional experiments for r in range(repeats - array_index): algorithm.run(num_samples) # Reload the algorithm data with open(path, 'rb') as f: alg_data = pickle.load(f) continue samples = alg_data.list_of_samples[data_index] if len(samples) < num_samples: data_index += 1 continue samples = samples[:num_samples] sim_calls[:, array_index] = alg_data.list_of_sim_calls[ data_index][:num_samples] dist[:, array_index] = variation_distance(samples, problem) nmse[:, array_index] = NMSE_convergence(problem, samples) array_index += 1 data_index += 1 avg_dist = np.mean(dist, 1) std_dist = np.std(dist, 1) avg_sim_calls = np.mean(np.cumsum(sim_calls, 0), 1) std_sim_calls = np.std(np.cumsum(sim_calls, 0), 1) avg_nmse = np.mean(nmse, 1) std_nmse = np.std(nmse, 1) indices = np.floor(np.logspace(0, np.log10(num_samples))).astype(int) indices[0] = 0 indices[-1] = num_samples - 1 line, = ax1.plot(indices, avg_dist[indices], label=algorithm_labels[i]) ax1.fill_between( indices, avg_dist[indices] - 2 * std_dist[indices], avg_dist[indices] + 2 * std_dist[indices], color=line.get_color(), alpha=0.25) line, = ax2.plot(avg_sim_calls[indices], avg_dist[indices], label=algorithm_labels[i]) ax2.fill_between( avg_sim_calls[indices], avg_dist[indices] - 2 * std_dist[indices], avg_dist[indices] + 2 * std_dist[indices], color=line.get_color(), alpha=0.25) line, = ax3.plot(range(num_samples), avg_sim_calls, label=algorithm_labels[i]) ax3.fill_between( np.array(range(num_samples), ndmin=1), np.clip(avg_sim_calls - 2 * std_sim_calls, 1, np.inf), avg_sim_calls + 2 * std_sim_calls, color=line.get_color(), alpha=0.25) line, = ax4.plot(avg_sim_calls, avg_nmse, label=algorithm_labels[i]) ax4.fill_between( avg_sim_calls, avg_nmse - 2 * std_nmse, avg_nmse + 2 * std_nmse, color=line.get_color(), alpha=0.25) ax1.set_xscale('log') ax1.grid(True) ax1.legend(loc='best') ax2.set_xscale('log') ax2.grid(True) ax2.legend(loc='best') ax3.set_xscale('log') ax3.set_yscale('log', nonposy='clip') ax3.grid(True) ax3.legend(loc='best') ax4.set_xscale('log') ax4.set_yscale('log') ax4.grid(True) ax4.legend(loc='best') return ax1, ax2, ax3, ax4