def plot_opinions_on_methods(expe): if use_nime20_notations: fig = plt.figure(figsize=(5.5, 1.7)) else: fig = plt.figure(figsize=(7, 3)) ax = fig.add_subplot(111) # We rely on a pre-computed pandas dataframe for this expe.opinions.plot.bar(rot=0, ax=ax) if use_nime20_notations: ax.set(ylabel='Number of\nindividuals', xlabel='Characteristic asked') else: ax.set(title='Answers to the questions: which method was the [...]?', ylabel='Amount of subjects', xlabel='Characteristic asked') # legend needs more space max_displayed_y = int(math.floor(expe.opinions.max().max() * 1.2)) if use_nime20_notations: max_displayed_y = expe.opinions.max().max() + 1.0 # Manuel x ticks, optimized dislay for paper ax.legend(loc='upper center', bbox_to_anchor=(1.30, 1.0)) ax.set_xticklabels( ["fastest", "most\nprecise", "most\nintuitive", "preferred"]) else: ax.legend(loc='upper center', bbox_to_anchor=(0.38, 1.00)) ax.set_ylim([0, max_displayed_y]) fig.tight_layout() figurefiles.save_in_figures_folder(plt.gcf(), "Opinions_on_methods.pdf")
def plot_age_and_sex(expe): all_ages = [subject.age for subject in expe.subjects] female_ages = [ subject.age for subject in expe.subjects if subject.sex == edp.SexType.FEMALE ] male_ages = [ subject.age for subject in expe.subjects if subject.sex == edp.SexType.MALE ] other_ages = [ subject.age for subject in expe.subjects if subject.sex == edp.SexType.NON_BINARY ] print("Subjects: {} female, {} male, {} other, from {} to {} years old". format(len(female_ages), len(male_ages), len(other_ages), min(all_ages), max(all_ages))) # TODO display male/female as swarm plot (to show age duplicates) fig, ax = plt.subplots(1, 1) plt.scatter(female_ages, np.full(len(female_ages), edp.SexType.FEMALE)) plt.scatter(male_ages, np.full(len(male_ages), edp.SexType.MALE)) plt.scatter(other_ages, np.full(len(other_ages), edp.SexType.NON_BINARY)) ax.set(title="Age and sex of the {} subjects".format(len(expe.subjects)), xlabel="Age", ylabel="Sex") ax.set_ylim(-0.5, 2.5) ax.yaxis.set(ticks=range(0, 3), ticklabels=["non-binary", "male", "female"]) plt.grid(linestyle="--", alpha=0.5) fig.tight_layout() # save before show (because show empties the figure's internal data....) figurefiles.save_in_figures_folder(fig, "Age_and_sex.pdf")
def all_perfs_histogram(expe, perf_eval_type=perfeval.EvalType.ADJUSTED, display_KS=False): """ Shows all performances sorted in 2 groups (sliders and interp.), and displays the p-value of the Kolmogorov-Smirnov test """ # TODO change the KS-test which might not be the more adapted (-> switch to rank test with ordered values) histogram_bins = np.linspace(0.0, 1.0, 20) kde_bw = 0.05 if use_nime20_notations: fig = plt.figure(figsize=(4.5, 2.0)) else: fig = plt.figure(figsize=(7, 3)) ax = fig.add_subplot(111) adjusted_s_2d = np.array(expe.get_all_actual_s_2d(perf_eval_type)) distplot0 = sns.distplot(adjusted_s_2d[:, 0], bins=histogram_bins, kde=True, kde_kws={"bw": kde_bw}, ax=ax, label='Sliders') distplot1 = sns.distplot(adjusted_s_2d[:, 1], bins=histogram_bins, kde=True, kde_kws={"bw": kde_bw}, ax=ax, label='Interpolation') ax.set_xlim(0.0, 1.0) if use_nime20_notations: ax.legend(loc='best') #, bbox_to_anchor=(0.50, 0.7)) ax.set(xlabel="Performance scores", ylabel="Scaled counts,\nestimated PDF") ax.set_ylim(0.0, 3.7) else: ax.legend(loc='best') ax.set(title="Performances of all subjects (eval. function {})".format( perfeval.get_perf_eval_name(perf_eval_type)), xlabel=r"Performance score $S$", ylabel="Scaled counts and estimated PDF") # Komolgorov-Smirnov test using scipy stats. The null hypothesis is 'the 2 samples are drawn from # the same distribution'. Null hypothesis can be rejected is p-value is small. # Obvious results.... p-value is around 10^-19 # TODO changed to a signed-rank test (but values ordering msut be ensured) if display_KS: [ks_stat, p_value] = stats.ks_2samp(adjusted_s_2d[:, 0], adjusted_s_2d[:, 1], alternative='two-sided') ax.text(x=0.1, y=0.1, s='KS-stat={:.2f}, p-value={:.2f}'.format(ks_stat, p_value), bbox=dict(boxstyle="round", fc="w")) fig.tight_layout() figurefiles.save_in_figures_folder( fig, "Perfs_histogram_eval{}.pdf".format(perf_eval_type.value))
def compare_adjusted(self, adj_types=[ EvalType.ADJUSTED, EvalType.FOCUS_ON_TIME, EvalType.FOCUS_ON_ERROR ]): fig = plt.figure( figsize=(9.5, 9)) # can't change the projection of an existing axes for i in range(len(adj_types)): ax_adj = fig.add_subplot(len(adj_types), 2, 1 + 2 * i, projection='3d') # CODE EXEMPLE : source = https://hub.packtpub.com/creating-2d-3d-plots-using-matplotlib/ surf = ax_adj.plot_surface(self.e_grid, self.t_grid, adjusted_eval( self.e_grid, self.t_grid, self.allowed_time, adjustment_type=adj_types[i]), linewidth=0, cmap=cm.plasma) self._configure_perf_surface_axes(ax_adj) name = adj_types[i].name.lower() if name: name = ' (' + name + ')' ax_adj.set(title='Perf. evaluation function {}'.format( get_perf_eval_name(adj_types[i])), xlabel='Norm. sum of errors $E$') fig.colorbar(surf, aspect=18) ax_adj_hist = fig.add_subplot(len(adj_types), 2, 2 + 2 * i) adjusted_s = self.expe.get_all_actual_s_1d( adjustment_type=adj_types[i]) sns.distplot(adjusted_s, bins=self.histogram_bins, kde=True, kde_kws={"bw": self.kde_bw}, ax=ax_adj_hist) ax_adj_hist.axvline(np.mean(adjusted_s), color='r', linestyle='--') plt.legend(['mean']) self._configure_perf_hist_kde_axes(ax_adj_hist, adj_types[i]) plt.tight_layout() fig.subplots_adjust(left=0.05) figurefiles.save_in_figures_folder( fig, "Perf_adjusted_comparison_{}_{}_{}.pdf".format( adj_types[0], adj_types[1], adj_types[2]))
def plot_adjusted_perf_only(self): """ Evaluation function display for Nime20 paper """ fig = plt.figure(figsize=(5, 2.5)) ax_adj = fig.add_subplot(111, projection='3d') surf = ax_adj.plot_surface(self.e_grid, self.t_grid, adjusted_eval( self.e_grid, self.t_grid, self.allowed_time, adjustment_type=EvalType.ADJUSTED), linewidth=0, cmap=cm.plasma) self._configure_perf_surface_axes( ax_adj) # many configs will be overriden just after ax_adj.set(xlabel='$E$, normalized sum of errors') ax_adj.set(ylabel='$D$, search duration [s]', zlabel=r'$S$, performance') ax_adj.set_xlim(0.0, 0.8) ax_adj.set_xticks(np.linspace(0.0, 0.8, 5)) fig.colorbar(surf, aspect=18, pad=0.1) plt.tight_layout() fig.subplots_adjust(left=0.07, bottom=0.115, right=1.04) figurefiles.save_in_figures_folder(fig, "Perf_eval_adjusted.pdf")
def fit_perf_vs_expertise(expe, perf_eval_type, show_fit_analysis=False): assert len(expe.subjects[0].mean_s_ingame ) == 2, 'Works for 2 methods only (sliders vs. interp)' # Degrees of polynomial regressions faders_reg_degree = 2 # best seems to be 2 (in terms of R2 and RMSE) # TODO re-check if 1 is enough interp_reg_degree = 1 # TODO re-check if 2 would fit better (with more data) expertise_levels = np.asarray( [subject.expertise_level for subject in expe.subjects], dtype=int) # vstack of row arrays mean_s = np.vstack((np.asarray([ subject.get_mean_s_adjusted(perf_eval_type)[0] for subject in expe.subjects ]), np.asarray([ subject.get_mean_s_adjusted(perf_eval_type)[1] for subject in expe.subjects ]))) # manual polyfits, because seaborn does not (and will not...) give numerical outputs (only graphs, visualizations) if show_fit_analysis: reg0 = np.polyfit(expertise_levels, mean_s[0, :], faders_reg_degree) reg1 = np.polyfit(expertise_levels, mean_s[1, :], interp_reg_degree) reg_p = [np.poly1d(reg0), np.poly1d(reg1)] for i in range(2): plot_name = ('Sliders' if i == 0 else 'Interp') plot_name = plot_name + '_eval' + str(perf_eval_type.value) analyse_goodness_of_fit(expertise_levels, mean_s[i, :], reg_p[i], plot_name) # Seaborn fit graph (underlying functions: np.polyfit) if use_nime20_notations: fig = plt.figure(figsize=(5.5, 3)) else: fig = plt.figure(figsize=(6, 4)) ax = fig.add_subplot() if use_nime20_notations: ax.set_xlabel("Estimated expertise level", fontdict={'fontsize': 12}) ax.set_ylabel("Average performances", fontdict={'fontsize': 12}) else: ax.set( title="Average performance of subjects, related to their expertise", xlabel="Estimated expertise level", ylabel="Average performance score") regplot0 = sns.regplot(x=expertise_levels, y=mean_s[0, :], order=faders_reg_degree, label="Sliders", marker='x', scatter_kws={'alpha': 0.9}) regplot1 = sns.regplot(x=expertise_levels, y=mean_s[1, :], order=interp_reg_degree, label="Interpolation", marker='+', scatter_kws={'alpha': 0.6}) if not use_nime20_notations: ax.set_ylim([0, 1]) else: ax.set_ylim([0.2, 0.7]) ax.set_xlim([min(expertise_levels) - 0.5, max(expertise_levels) + 0.5]) ax.set_xticks(range(min(expertise_levels), max(expertise_levels) + 1)) ax.grid(axis='y') ax.legend(loc='best') if not use_nime20_notations: ax.text(x=0.8, y=0.1, s='Perf. eval. function: {}'.format( perfeval.get_perf_eval_name(perf_eval_type)), bbox=dict(boxstyle="round", fc="w")) fig.tight_layout() figurefiles.save_in_figures_folder( fig, "Perf_vs_expertise_eval{}.pdf".format(perf_eval_type)) # Finally : KS-test on sliders and interp average perfs # However, the KS-test is not really relevant on such small samples... for jbis in (0, 1): perfs_by_expertise = list( ) # list of arrays. Index 0 is expertise 1, ... etc. display_str = "{} - Number of subjects per expertise level (from {} to {}): "\ .format(edp.MethodType(jbis).name.lower(), min(expertise_levels), max(expertise_levels)) for ii in range(max(expertise_levels) - min(expertise_levels) + 1): average_perfs = [ subject.get_mean_s_adjusted(perf_eval_type)[jbis] for subject in expe.subjects if subject.expertise_level == (ii + min(expertise_levels)) ] perfs_by_expertise.append(np.asarray(average_perfs)) display_str = display_str + " n={} ".format(len(average_perfs)) print(display_str) # Actual KS-tests (triangular pattern: 1vs(2,3,4), 2vs(3,4), 3vs4....) display_str = "KS-test, for average scores (sorted by expertise level): " for ii in range(max(expertise_levels) - min(expertise_levels) + 1): for iii in range(ii + 1, max(expertise_levels) - min(expertise_levels) + 1): [KS_stat, p_value] = stats.ks_2samp(perfs_by_expertise[ii], perfs_by_expertise[iii]) display_str = display_str + " {}vs{}: p-value={} ".format( ii + min(expertise_levels), iii + min(expertise_levels), p_value) print(display_str)
def plot_all_perfs_histograms_by_synth(expe, perf_eval_type=perfeval.EvalType. ADJUSTED, display_tests=False): all_s = expe.get_all_valid_s(perf_eval_type) n_cols = 4 n_rows = math.ceil( float(expe.global_params.synths_count - expe.global_params.synths_trial_count) / n_cols) fig = plt.figure(figsize=(13, 8)) for j in range(expe.global_params.synths_trial_count, expe.global_params.synths_count): sliders_s = np.array(all_s[j][0]) interp_s = np.array(all_s[j][1]) synth_index = j - expe.global_params.synths_trial_count col = synth_index % n_cols row = math.floor(float(synth_index) / n_cols) ax = fig.add_subplot(n_rows, n_cols, synth_index + 1) ax.set(title="Synth ID={}".format(synth_index), xlabel="Performance score S", ylabel="Observations, estimated PDF") sns.distplot(sliders_s, rug=True, hist=False) sns.distplot(interp_s, rug=True, hist=False) ax.set_xlim([0.0, 1.0]) if display_tests: # - - - Normality tests - cancelled (not much power for small sample sizes) - - - test_normality = True if test_normality: normality_string = "Synth {} normality test p-values: ".format( synth_index) is_normal = [False, False] for k in range(2): # Null hypothesis: samples come from a normal distribution # D'agostino and pearson (scipy stats default) always says yes... (small sample size) # Shapiro-Wilk: [stat_value, p_value] = stats.shapiro(all_s[j][k]) normality_string = normality_string + " {:.3f}".format( p_value) is_normal[k] = ( p_value > 0.05 ) # 5% normality test... MIGHT be normal if p_value > 0.05 (not sure) normality_string = normality_string + ( "(yes) " if is_normal[k] else "(no) ") print(normality_string) # - - - Wilcoxon signed-rank test, non-parameters, for related pair samples - - - # (replaces Mann-Whitney U test, non parametric, OK for small samples) print("Synth {}:".format(j - expe.global_params.synths_trial_count)) [w_stat, p_value ] = stats.wilcoxon(x=sliders_s, y=interp_s) # implementation requires n>20 print( " Wilcoxon signed-rank test: stat={:0.2f}, p-value={:0.4f} {}" .format(w_stat, p_value, ("(different) " if p_value < 0.05 else "(maybe identical)"))) # U should be compared to U_critical #[u_stat, u_p_value] = stats.mannwhitneyu(sliders_s, interp_s, alternative="two-sided") #print(" (M-W U: U-stat={:0.2f}, p-value={:0.4f})".format(u_stat, u_p_value)) fig.tight_layout() figurefiles.save_in_figures_folder( fig, "Perfs_hist_per_synth_{}-{}.pdf".format(perf_eval_type.value, perf_eval_type.name.lower()))
def plot_all_perfs_per_synth(expe, plottype='box', perf_eval_type=perfeval.EvalType.ADJUSTED): assert expe.global_params.search_types_count == 2, 'This display allows slider/interp search types only' if plottype != 'box' and plottype != 'violin': raise ValueError('Only \'violin\' plot and \'box\' plot are available') all_s = expe.get_all_valid_s(perf_eval_type) if use_nime20_notations: fig = plt.figure(figsize=(9, 2)) else: fig = plt.figure(figsize=(7, 3)) ax = fig.add_subplot(111) if use_nime20_notations: ax.set(title="", xlabel="Synthesizer ID", ylabel="Performances") else: ax.set( title="Performances of all subjects, per synth (eval. function {})" .format(perfeval.get_perf_eval_name(perf_eval_type)), xlabel="Synth ID ", ylabel="Performance $S$") # box plot of all S perfs data, with empty space after each synth synths_range = range(expe.global_params.synths_trial_count * 2, expe.global_params.synths_count * 2) cur_x_tick = 0 x_ticks = [] x_ticks_labels = [] bps = [] # for box plots vls = [] # for violin plots for i in synths_range: if (cur_x_tick % 3 == 0): # space ax.axvline(x=cur_x_tick, ymin=0.0, ymax=1.0, color='black', linewidth=0.5) x_ticks.append(cur_x_tick) x_ticks_labels.append(' ') cur_x_tick += 1 # actual boxplot at every iteration synth_index = int(math.floor(float(i) / 2.0)) synth_id = synth_index - expe.global_params.synths_trial_count if (i % 2) == 0: box_color = 'C0' if use_nime20_notations: x_ticks_labels.append('{}-S'.format(synth_id)) else: x_ticks_labels.append('{} (sliders)'.format(synth_id)) else: # separating line after each synth box_color = 'C1' if use_nime20_notations: x_ticks_labels.append('{}-I'.format(synth_id)) else: x_ticks_labels.append('{} (interp.)'.format(synth_id)) if plottype == 'box': # artist costomization from https://matplotlib.org/3.1.0/gallery/statistics/boxplot.html median_props = dict(linestyle='-', linewidth=2.0, color='k') # Means deleted for NIME20 # mean_point_props = dict(marker='D', markeredgecolor='black', markerfacecolor='r', markersize=4) bps.append( ax.boxplot(all_s[synth_index][i % 2], positions=[cur_x_tick], sym='{}.'.format(box_color), widths=[0.6], medianprops=median_props)) #, showmeans=True)) # meanprops=mean_point_props)) plt.setp(bps[-1]['boxes'], color=box_color) plt.setp(bps[-1]['whiskers'], color=box_color) plt.setp(bps[-1]['fliers'], color=box_color) elif plottype == 'violin': vls.append( ax.violinplot(all_s[synth_index][i % 2], positions=[cur_x_tick])) x_ticks.append(cur_x_tick) cur_x_tick += 1 if not use_nime20_notations and False: # legends disabled for nime20 (and now: always disabled) if plottype == 'box': ax.legend( [bps[0]['boxes'][0], bps[1]['boxes'][0], bps[0]['medians'][0] ], #bps[0]['means'][0]], ['Sliders method', 'Interp. method', 'medians' ], #'means $\\overline{s_j}$'], loc='center left', bbox_to_anchor=(1.0, 0.5)) elif plottype == 'violin': pass # not enough at the moment to really use a violin plot... ax.set_ylim([0, 1]) ax.set_xlim([0, cur_x_tick]) ax.set_xticks(x_ticks) x_labels_fontsize = 10 if use_nime20_notations else 8 ax.set_xticklabels(x_ticks_labels, rotation=90, fontdict={'fontsize': x_labels_fontsize}) fig.tight_layout() figurefiles.save_in_figures_folder( fig, "Perfs_per_synth_{}-{}.pdf".format(perf_eval_type.value, perf_eval_type.name.lower()))