def compute_multiple_bounds(m, N, risk, KLQP, disagreement=None, delta=0.05): """ Compute bounds on the full sample and print their values. m: Training set size (labeled examples) N: Full sample size (labeled + unlabeled examples) risk : Gibbs risk observed on the training set KLQP : Kullback-Leibler divergence between prior and posterior disagreement : Expected disagreement on the full sample delta : confidence parameter (default=0.05) """ print('*** Parameters ***') print('m = %d\nN = %d\nrisk = %f\nKLQP = %f\ndisagreement = %f\ndelta = %f\n' % (m, N, risk, KLQP, disagreement, delta) ) print('*** Bounds on the risk of the Gibbs Classifier ***') thm5_KL = bounds.compute_general_transductive_gibbs_bound(d_functions.kl_divergence,risk, m, N, KLQP, delta) print('Theorem 5-KL : %f' % thm5_KL) thm5_Dstar = bounds.compute_general_transductive_gibbs_bound(d_functions.new_transductive_divergence, risk, m, N, KLQP, delta) print('Theorem 5-D* : %f' % thm5_Dstar) cor7a = bounds.compute_corollary_7a_gibbs_bound(risk, m, N, KLQP, delta) print('Corollary 7(a): %f' % cor7a) cor7b = bounds.compute_corollary_7b_gibbs_bound(risk, m, N, KLQP, delta) print('Corollary 7(b): %f' % cor7b) derbeko = bounds.compute_derbeko_2007_gibbs_bound(risk, m, N, KLQP, delta) print('Derbeko (2007): %f' % derbeko) print('\n*** Bounds on the risk of the Gibbs Classifier (Based on Theorem 5-D*) ***') twice = 2 * thm5_Dstar print('Twice the Gibbs: %f' % twice) if disagreement is None: print('Via the C-Bound: [disagreement required]') else: c_bound = compute_c_bound(thm5_Dstar, disagreement) print('Via the C-Bound: %f' % c_bound)
def generate_bound_figures(N_list, ratios_list, risk, KLQP, delta=0.05): """ Illustrate of the bound calculations N_list : List of full sample size ratios_list : List of m/N rations ( where m is the number of labeled examples) risk : Gibbs risk observed on the training set KLQP : Kullback-Leibler divergence between prior and posterior delta : confidence parameter (default=0.05) """ divergences_dict = OrderedDict() divergences_dict["Kullback-Leibler"] = d_functions.kl_divergence divergences_dict["D*-function"] = d_functions.new_transductive_divergence divergences_dict["Quadratic Distance"] = d_functions.quadratic_distance divergences_dict["Variation Distance"] = d_functions.variation_distance divergences_dict["Triangular Discrimination"] = d_functions.triangular_discrimination n_rows = len(ratios_list) n_cols = len(divergences_dict) x_values = np.arange(0., 1.0, .005) pyplot.subplots_adjust(wspace=0.1, hspace=0.1) STATS_dict = dict() for i, divergence_name in enumerate(divergences_dict.keys()): print('*** D-function: ' + divergence_name + ' ***') for j, ratio in enumerate(ratios_list): ax = pyplot.subplot(n_rows, n_cols, j*n_cols + i + 1) # Compute and draw d-function values (blue curves) divergence = divergences_dict[divergence_name] divergence_values = [divergence(risk, x, ratio) for x in x_values] pyplot.plot(x_values, divergence_values, linewidth=2) # Compute and draw bound values (horizontal lines) for each value of N for N in N_list: m = N * ratio complexity_term = compute_transductive_complexity_term(divergence, m, N) bound = compute_general_transductive_gibbs_bound(divergence, risk, m, N, KLQP, delta=delta, complexity_term=complexity_term) rhs = (KLQP + log(complexity_term / delta)) / m print('m=%d N=%d bound=%0.3f' % (m,N,bound) ) handle = pyplot.plot([-1., bound, 2.], 3*[rhs], 'o--', label='%0.3f' % bound)[0] STATS_dict[(i, N, ratio)] = (bound, rhs, handle) # Compute and draw risk limits (vertical dashed lines) risk_sup = 1. - ratio * (1.-risk) risk_inf = ratio * risk pyplot.plot(2*[risk_sup], [0., 1.], 'k:') pyplot.plot(2*[risk_inf], [0., 1.], 'k:') # Set plot properties pyplot.legend(loc=2) pyplot.xlim(0., 1.) pyplot.ylim(0., .5 if ratio > .4 else 1.) if j == n_rows-1: pyplot.xlabel(divergence_name) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(13) else: pyplot.setp(ax.get_xticklabels(), visible=False) if i == 0: pyplot.ylabel("m/N = %0.1f" % ratio) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(13) else: pyplot.setp(ax.get_yticklabels(), visible=False) # Highlight lower bounds for each (m,N) pairs for N in N_list: for j, ratio in enumerate(ratios_list): best_bound = 1e6 best_i = -1 for i, _ in enumerate(divergences_dict.keys()): bound, rhs, handle = STATS_dict[(i, N, ratio)] if bound < best_bound: best_bound, best_handle, best_i = bound, handle, i best_handle.set_marker('*') best_handle.set_markersize(14.) best_handle.set_markeredgewidth(0.) pyplot.subplot(n_rows, n_cols, j * n_cols + best_i + 1) pyplot.legend(loc=2) pyplot.show()