def boxplot_multiple_configurations_rewards_timesteps_last_episodes( algor, param, values_of_param, last_20_rewards, last_20_timesteps): """ Generate boxplots for the value of reward over the last 20 episodes """ # non sto più facendo una media, sto mettendo tutti i punti del reward medio # last 20 episodes rewards of 5 run -> 100 punti per box # [ run 1 run 2 run 3 run 4 run 5 # [ep 90] ... ... # [ep 91] # ... # [ep 100] # ] fig, ax = plt.subplots() col = ax.boxplot(last_20_rewards) ax.set_xticklabels(values_of_param) ax.set_ylabel('Avg reward') ax.set_title('Avg reward in 5 runs of last 20 episodes per config of ' + param + ' for algo ' + algor) fig.tight_layout() plt.savefig('boxplot_param_reward_last_20' + get_extension()) fig, ax = plt.subplots() col = ax.boxplot( last_20_timesteps) # , ["SARSA", "SARSA(λ)", "Q-learning", "Q(λ)"]) ax.set_xticklabels(values_of_param) ax.set_ylabel('Avg time steps') ax.set_title( 'Avg time steps in 5 runs of last 20 episodes per config of ' + param + ' for algo ' + algor) fig.tight_layout() plt.savefig('boxplot_param_timestep_last_20' + get_extension())
def plot_multiple_algos_avg_rewards_timesteps_bars(algos, avg_rew, avg_steps, path): """ Plot averaged bar graphs for 1 single path """ target_output_dir = build_output_dir_from_path(output_dir, path) fig, ax = plt.subplots() cols_labels = [] for al in algos: cols_labels.append(print_cute_algo_name(al)) col = ax.bar(cols_labels, avg_rew, align='center') ax.set_ylabel('Avg reward for episode') # ax.set_title('Avg reward for algos') plt.axhline(0, color='black', lw=.3) fig.tight_layout() plt.savefig(target_output_dir + 'avg_rewards_for_algos' + get_extension()) plt.show() fig, ax = plt.subplots() col = ax.bar( cols_labels, avg_steps, align='center', ) ax.set_ylabel('Avg time steps for episode') fig.tight_layout() plt.savefig(target_output_dir + 'avg_steps_for_algos' + get_extension()) plt.show()
def plot_training_time_traffic(path=None): """ Generate boxplots for training time and traffic values saved into csv files """ times = [[], [], [], []] traffic = [[], [], [], []] starter = "0" if path is not None: starter = "path" + str(path) # target_output_dir is current directory if path is not defined (I used it for data before tuning) target_output_dir = build_output_dir_from_path(output_dir, path) algos = ["sarsa", "sarsa_lambda", "qlearning", "qlearning_lambda"] # Check input_dir exists if pathlib.Path(input_dir).exists(): pass else: print("Input directory does not exist.") exit(1) # read csv files generated by get_training_time_traffic.py for index, fa in enumerate(algos): with open(input_dir + starter + "_" + fa + ".csv", 'r') as csv_file: reader = csv.reader(csv_file, delimiter=',') next(reader, None) for row in reader: times[index].append(float(row[1])) traffic[index].append(int(row[2])) fig, ax = matplotlib.pyplot.subplots() col = ax.boxplot(times) ax.set_xticklabels([print_cute_algo_name(algos[0]), print_cute_algo_name(algos[1]), print_cute_algo_name(algos[2]), print_cute_algo_name(algos[3])]) ax.set_ylabel('Time (s)') matplotlib.pyplot.xticks(rotation=45) # ax.set_title('Training time per algorithm') matplotlib.pyplot.grid(True, color='gray', linestyle='dashed') fig.tight_layout() matplotlib.pyplot.savefig(target_output_dir + 'training_times' + get_extension()) matplotlib.pyplot.show() fig, ax = matplotlib.pyplot.subplots() col = ax.boxplot(traffic) # , ["SARSA", "SARSA(λ)", "Q-learning", "Q(λ)"]) ax.set_xticklabels(["SARSA", "SARSA(λ)", "Q-learning", "Q(λ)"]) matplotlib.pyplot.xticks(rotation=45) ax.set_ylabel('Number of commands sent') # ax.set_title('Traffic generated per algorithm') matplotlib.pyplot.grid(True, color='gray', linestyle='dashed') fig.tight_layout() matplotlib.pyplot.savefig(target_output_dir + 'training_traffic' + get_extension()) matplotlib.pyplot.show()
def plot_multiple_configuration_avg_rewards_timesteps_bars( algo, param, param_values, avg_rew, avg_steps): """ Generate bar plots with the global average reward and timesteps values for multiple executions and configurations of a single parameter """ complete_target_dir = build_output_dir_for_params(target_dir, param, algo) fig, ax = plt.subplots() param_labels = [] cnt = 0 for i in param_values: if cnt == 0 and param == "lambda": param_labels.append(return_greek_letter(param) + "=" + i) cnt += 1 else: param_labels.append( return_greek_letter(param) + "=" + i.lstrip('0')) col = ax.bar(param_labels, avg_rew, align='center') ax.set_ylabel('Avg reward for episode') # ax.set_title('Avg reward for different configurations of ' + param) plt.axhline(0, color='black', lw=.3) fig.tight_layout() plt.savefig(complete_target_dir + 'avg_rewards_for_' + param + get_extension()) plt.show() fig, ax = plt.subplots() param_labels = [] cnt = 0 for i in param_values: if cnt == 0 and param == "lambda": param_labels.append(return_greek_letter(param) + "=" + i) cnt += 1 else: param_labels.append( return_greek_letter(param) + "=" + i.lstrip('0')) col = ax.bar( param_labels, avg_steps, align='center', ) ax.set_ylabel('Avg time steps for episode') # ax.set_title('Avg steps for different configurations of ' + param) fig.tight_layout() plt.savefig(complete_target_dir + 'avg_steps_for_' + param + get_extension()) plt.show()
def plot_cdf_reward_multiple_algo(algorithms_target, episodes_target, avg_rew, path): """ Generate plot of the CDF of the average reward for episodes """ target_output_dir = build_output_dir_from_path(output_dir, path) fig, ax = plt.subplots() for i in range(0, len(algorithms_target)): # plt.plot(episodes_target[i], avg_rew[i], label=algorithms_target[i],) # First sorting the array plt.hist(np.sort(avg_rew[i]), density=True, cumulative=True, label=print_cute_algo_name(algorithms_target[i]), bins=2000, histtype='step', alpha=0.8) fix_hist_step_vertical_line_at_end(ax) plt.xlabel('Reward') plt.ylabel('CDF (Episode)') plt.legend(loc='lower right', prop=fontP, ncol=n_cols) # plt.title('CDF of avg reward per sent command') plt.grid(True, color='gray', linestyle='dashed') plt.tight_layout() plt.ylim(0, 1.0) plt.savefig(target_output_dir + 'cdf_rewards_multiple_algo' + get_extension()) plt.show()
def retrieve_reward_per_request_single_run(date_to_retrieve, show_intermediate_graphs=False, color_index=0, algorithm="sarsa"): """ Retrieve the reward per each time step (or command sent by the RL algorithm) from log file for 1 single execution """ episodes, commands, reward, cum_rewards = read_all_info_from_log( date_to_retrieve) if show_intermediate_graphs: colors = ["#EB1E35", "#E37600", "#054AA6", "#038C02"] pl.plot(commands, cum_rewards, label=algorithm, color=colors[color_index]) pl.xlabel("Number of sent commands $\mathregular{n_a}$") pl.ylabel("Cumulative reward $\mathregular{C(n_a)}$") pl.legend(loc='upper right') # pl.title('Cumulative reward over commands for ' + algorithm) pl.grid(True) plt.savefig('commands_plot_' + algorithm + get_extension()) pl.tight_layout() plt.show() return commands, cum_rewards, len(commands)
def plot_multiple_algo_moving_avg(algorithms_target, episodes_target, moving_average_rewards_target, moving_average_timesteps_target, path=None): """ Generate plots having the moving average reward and timesteps values for all RL algorithms """ target_output_dir = build_output_dir_from_path(output_dir, path) for i in range(0, len(algorithms_target)): pl.plot(episodes_target[i] [np.array(episodes_target[i]).shape[0] - np.array(moving_average_rewards_target[i]).shape[0]:], moving_average_rewards_target[i], label=print_cute_algo_name(algorithms_target[i])) pl.xlabel("Episode $\mathregular{E}$") pl.ylabel("Total reward $\mathregular{R(E)}$") pl.legend(loc='lower right', prop=fontP, ncol=n_cols) # pl.title('Moving average of final reward over episodes') pl.grid(True, color='gray', linestyle='dashed') pl.tight_layout() plt.savefig(target_output_dir + 'mavg_reward_plot' + get_extension()) plt.show() for i in range(0, len(algorithms_target)): pl.plot(episodes_target[i] [np.array(episodes_target[i]).shape[0] - np.array(moving_average_timesteps_target[i]).shape[0]:], moving_average_timesteps_target[i], label=print_cute_algo_name(algorithms_target[i])) pl.xlabel("Episode $\mathregular{E}$") pl.ylabel("Number of time steps $\mathregular{T(E)}$") pl.legend(loc='upper right', prop=fontP, ncol=n_cols) # pl.title('Moving average of number of time steps over episodes') pl.grid(True, color='gray', linestyle='dashed') pl.tight_layout() plt.savefig(target_output_dir + 'mavg_timesteps_plot' + get_extension()) plt.show()
def compute_avg_reward_per_request_multiple_runs(dates, algo, show_intermediate_graphs=False ): """ Compute the average reward per commands over multiple executions Note that multiple executions of the same algorithms are likely to have different numbers of commands sent/timesteps This number depends on the single run """ commands = [] cum_rewards = [] min_length = -1 for index, dat in enumerate(dates): com, cr, cl = retrieve_reward_per_request_single_run(dat) commands.append(com) cum_rewards.append(cr) if min_length == -1: min_length = cl if cl < min_length: min_length = cl if show_intermediate_graphs: pl.plot(com, cr, label=algo + "-run" + str(dates.index(dat))) # single line # iterate over cum_rewards and min_length of commands to compute the average of cum_rewards avg_cum_reward = [] avg_commands = [] for i in range(min_length): total_sum = 0.0 total_cnt = 0.0 for index, dat in enumerate(dates): total_sum += cum_rewards[index][i] total_cnt += 1 avg_cum_reward.append(total_sum / total_cnt) avg_commands.append(i) if show_intermediate_graphs: pl.xlabel("Number of sent commands $\mathregular{n_a}$") pl.ylabel("Cumulative reward $\mathregular{C(n_a)}$") pl.legend(loc='upper right') # pl.title('Cumulative reward over commands for ' + algo) pl.grid(True) pl.tight_layout() plt.savefig('all_commands_' + algo + get_extension()) plt.show() return avg_cum_reward, avg_commands
def plot_cum_reward_per_command_multiple_algos_for_specified_path( rewards, commands, algorithms, path): """ Generate plot with the cumulative reward average over multiple executions for all algorithms used for 1 single path """ target_output_dir = build_output_dir_from_path(output_dir, path) for index, al in enumerate(algorithms): pl.plot(commands[index], rewards[index], label=print_cute_algo_name(al)) # single line pl.xlabel('Number of sent commands $\mathregular{n_a}$') pl.ylabel('Cumulative reward $\mathregular{C(n_a)}$') pl.legend(loc='upper left', prop=fontP, ncol=n_cols) # pl.title('Cumulative reward over commands for algos') pl.grid(True, color='gray', linestyle='dashed') pl.tight_layout() plt.savefig(target_output_dir + 'all_commands_all_algos' + get_extension()) plt.show()
def compute_avg_reward_single_algo_multiple_runs(date_array, algorithm=None): """ Compute directly from the output file the average reward per time step for each episode """ x_all = [] y_all_avg_rewards = [] # retrieve data for all dates for dat in date_array: x, y_avg_reward_for_one_episode = read_avg_reward_from_output_file( algorithm, dat) x_all.append(x) y_all_avg_rewards.append(y_avg_reward_for_one_episode) fig, ax = plt.subplots() for i in range(0, len(x_all)): plt.hist(np.sort(y_all_avg_rewards[i]), density=True, cumulative=True, label='CDF-run ' + str(i), bins=2000, histtype='step', alpha=0.8) fix_hist_step_vertical_line_at_end(ax) plt.xlabel('Reward') plt.ylabel('CDF (Episode)') plt.legend(loc='lower right', prop=fontP, ncol=n_cols) # plt.title('CDF of avg reward per sent command ' + algorithm) plt.ylim(0, 1.0) plt.grid(True, color='gray', linestyle='dashed') plt.tight_layout() # plt.savefig('cdf_rewards_multiple_run_' + algorithm + get_extension()) plt.show() # compute average over multiple runs y_final_avg_rewards = [] for array_index in range(0, len(x_all[0])): sum_r = 0 count = 0 for date_index in range(0, len(date_array)): # compute average sum_r += y_all_avg_rewards[date_index][array_index] count += 1 y_final_avg_rewards.append(sum_r / float(count)) df_final_avg_over_n_runs = pd.DataFrame({ 'x': x_all[0], 'y1': y_final_avg_rewards }) i = ["sarsa", "sarsa_lambda", "qlearning", "qlearning_lambda"].index(algorithm) # plot results pl.plot(df_final_avg_over_n_runs['x'], df_final_avg_over_n_runs['y1'], label="avg over " + str(len(date_array)) + " run") # avg line pl.xlabel('Episodes') pl.ylabel('Avg reward obtained per episode') pl.legend(loc='lower right', prop=fontP, ncol=n_cols) # pl.title('Reward for ' + algorithm + ' algorithm over episodes') pl.grid(True, color='gray', linestyle='dashed') pl.tight_layout() plt.savefig('avg_reward_plot_multiple_runs' + get_extension()) # plt.show() plt.close() return algorithm, x_all[0], y_final_avg_rewards, y_all_avg_rewards
def plot_single_algo_multiple_runs(date_array, algorithm=None, path=None, partial=None): """ Generate plots with reward of a single execution over episodes, average reward and moving average reward computed over multiple executions of the same RL algorithm (same values for timesteps) """ target_output_dir = build_output_dir_from_path(output_dir, path, partial) window_size = 10 x_all = [] y_all_reward = [] y_all_cum_reward = [] y_all_timesteps = [] x = [] y_reward = [] y_cum_reward = [] y_timesteps = [] # retrieve data for all dates for dat in date_array: x, y_reward, y_cum_reward, y_timesteps = read_reward_timesteps_from_output_file( algorithm, dat, partial) x_all.append(x) y_all_reward.append(y_reward) y_all_cum_reward.append(y_cum_reward) y_all_timesteps.append(y_timesteps) # compute average over multiple runs y_final_reward, y_final_cum_reward, y_final_timesteps = compute_avg_over_multiple_runs( len(x_all[0]), len(date_array), y_all_reward, y_all_cum_reward, y_all_timesteps) df_single_run = pd.DataFrame({ 'x': x, 'y1': y_reward, 'y2': y_timesteps, 'y3': y_cum_reward }) df_final_avg_over_n_runs = pd.DataFrame({ 'x': x_all[0], 'y1': y_final_reward, 'y2': y_final_timesteps, 'y3': y_final_cum_reward }) # calculate the smoothed moving average weights = np.repeat(1.0, window_size) / window_size yMA = np.convolve(df_final_avg_over_n_runs['y1'], weights, 'valid') # plot results pl.plot(df_single_run['x'], df_single_run['y1'], ':', label='1 run', color="grey") # single line pl.plot(df_final_avg_over_n_runs['x'], df_final_avg_over_n_runs['y1'], 'k', label=str(len(date_array)) + " runs avg") # avg line pl.plot(df_final_avg_over_n_runs['x'] [np.array(df_final_avg_over_n_runs['x']).shape[0] - yMA.shape[0]:], yMA, 'r', label=str(len(date_array)) + ' runs moving avg') # moving avg line pl.xlabel("Episode $\mathregular{E}$") pl.ylabel("Total reward $\mathregular{R(E)}$") pl.legend(loc='lower right', prop=fontP, ncol=n_cols) # pl.title('Final reward for ' + algorithm + ' algorithm over episodes') pl.grid(True, color='gray', linestyle='dashed') pl.tight_layout() plt.savefig(target_output_dir + 'all_reward_plot_' + algorithm + get_extension()) plt.show() yMA_timesteps = np.convolve(df_final_avg_over_n_runs['y2'], weights, 'valid') # plot results pl.plot(df_single_run['x'], df_single_run['y2'], ':', label='1 run', color="grey") # single line pl.plot(df_final_avg_over_n_runs['x'], df_final_avg_over_n_runs['y2'], 'k', label=str(len(date_array)) + " runs avg") # avg line pl.plot(df_final_avg_over_n_runs['x'] [np.array(df_final_avg_over_n_runs['x']).shape[0] - yMA_timesteps.shape[0]:], yMA_timesteps, 'r', label=str(len(date_array)) + ' runs moving avg') # moving avg line pl.xlabel("Episode $\mathregular{E}$") pl.ylabel("Number of time steps $\mathregular{T(E)}$") pl.legend(loc='upper right', prop=fontP, ncol=n_cols) # pl.title('Time steps for ' + algorithm + ' algorithm over episodes') pl.grid(True, color='gray', linestyle='dashed') pl.tight_layout() plt.savefig(target_output_dir + 'all_timesteps_plot_' + algorithm + get_extension()) plt.show() return algorithm, x, yMA, yMA_timesteps
def plot_multiple_configuration_moving_avg(algorithm, param, param_values_target, episodes_target, moving_average_rewards_target, moving_average_timesteps_target): """ Generate plots with the moving average of reward and timesteps for multiple executions and configurations of a single parameter """ complete_target_dir = build_output_dir_for_params(target_dir, param, algorithm) for i in range(0, len(param_values_target)): if i == 0 and param == "lambda": # lambda can be 0 so I do not remove the 0 in the legend of the plot pl.plot( episodes_target[i] [np.array(episodes_target[i]).shape[0] - np.array(moving_average_rewards_target[i]).shape[0]:], moving_average_rewards_target[i], label=return_greek_letter(param) + r'$=$' + param_values_target[i], ) else: pl.plot( episodes_target[i] [np.array(episodes_target[i]).shape[0] - np.array(moving_average_rewards_target[i]).shape[0]:], moving_average_rewards_target[i], label=return_greek_letter(param) + r'$=$' + param_values_target[i].lstrip('0'), ) # remove 0 from legend, keep only decimals pl.xlabel("Episode $\mathregular{E}$") pl.ylabel("Total reward $\mathregular{R(E)}$") pl.legend(loc='lower right', prop=fontP, ncol=n_cols) # pl.title('Moving average of reward over episodes for ' + algorithm) pl.grid(True, color='gray', linestyle='dashed') pl.tight_layout() plt.savefig(complete_target_dir + 'mavg_reward_params' + get_extension()) plt.show() for i in range(0, len(param_values_target)): if i == 0 and param == "lambda": pl.plot( episodes_target[i] [np.array(episodes_target[i]).shape[0] - np.array(moving_average_timesteps_target[i]).shape[0]:], moving_average_timesteps_target[i], label=return_greek_letter(param) + r'$=$' + param_values_target[i], ) # color=color[i]) else: pl.plot( episodes_target[i] [np.array(episodes_target[i]).shape[0] - np.array(moving_average_timesteps_target[i]).shape[0]:], moving_average_timesteps_target[i], label=return_greek_letter(param) + r'$=$' + param_values_target[i].lstrip('0'), ) pl.xlabel("Episode $\mathregular{E}$") pl.ylabel("Number of time steps $\mathregular{T(E)}$") pl.legend(loc='upper right', prop=fontP, ncol=n_cols) # pl.title('Moving average of number of steps over episodes for ' + algorithm) pl.grid(True, color='gray', linestyle='dashed') pl.tight_layout() plt.savefig(complete_target_dir + 'mavg_timesteps_params' + get_extension()) plt.show()