s = [i for i in spectrum if i > 0] P = 50 P_teach = 300 P_vals = np.logspace(0.25, 3, num=15).astype('int') num_repeats = 50 all_errs = np.zeros((len(P_vals), kmax)) all_mc = np.zeros(len(P_vals)) std_errs = np.zeros((len(P_vals), kmax)) std_MC = np.zeros(len(P_vals)) for i in range(len(P_vals)): P = P_vals[i] all_errs[i, :], all_mc[i], std_errs[i, :], std_MC[i] = generalization( P, P, spectrum, kmax, d, num_repeats, lamb=lamb) sol, p = approx_learning_curves.simulate_uc(spectrum, degens, lamb=lamb) plt.rcParams.update({'font.size': 12}) kplot = [0, 1, 2, 4, 6] colors = ['b', 'r', 'g', 'm', 'c'] all_errsdf = pd.DataFrame(all_errs) std_errsdf = pd.DataFrame(std_errs) mc_df = pd.DataFrame(all_mc) std_mc_df = pd.DataFrame(std_MC) all_errsdf.to_csv('results/all_errs_lamb%lf_d%d.csv' % (lamb, d)) std_errsdf.to_csv('results/std_errs_lamb%lf_d%d.csv' % (lamb, d)) mc_df.to_csv('results/mc_errs_lamb%lf_d%d.csv' % (lamb, d)) std_mc_df.to_csv('results/std_mc_errs_lamb%lf_d%d.csv' % (lamb, d))
print(spectrum[0:10]) print(spectrum_NNGP[0:10]) plt.loglog(np.linspace(1, len(spectrum), len(spectrum)), spectrum / spectrum[0], label='NTK') plt.loglog(np.linspace(1, len(spectrum), len(spectrum)), spectrum_NNGP / spectrum_NNGP[0], label='NNGP') plt.xlabel(r'$k$', fontsize=20) plt.ylabel(r'$\lambda_k$', fontsize=20) plt.legend() plt.tight_layout() plt.show() sol, p = approx_learning_curves.simulate_uc(spectrum, degens, lamb=lamb) sol_NNGP, p_NNGP = approx_learning_curves.simulate_uc(spectrum_NNGP, degens, lamb=lamb) for k in range(3): plt.loglog(p, sol[:, k] / sol[0, k], color='black') plt.loglog(p, sol_NNGP[:, k] / sol_NNGP[0, k], '-.', color='red') plt.show() s = [i for i in spectrum if i > 0] P = 50 P_teach = 300 P_vals = np.logspace(0.25, 3.0, num=15).astype('int') num_repeats = 35
elif i==1: spec_emp[i] = ak2[i] + bk2[0] + bk2[i+1] * (i+1)/(2*i+d) + (i+d-3)/(2*i+d-4) * bk2[i-1] else: spec_emp[i] = ak2[i] + bk2[i+1] * (i+1)/(2*i+d) + bk2[i-1] * (i+d-3)/(2*i+d-4) print(spec_emp) plt.loglog(spec_emp, 'o', label = 'sample on sphere') plt.loglog(theory_spectrum, 'o', label = 'sample gaussian (NTK)') plt.legend() plt.xlabel('k') plt.ylabel(r'$\lambda_k$') plt.savefig('gauss_vs_sphere2layer.pdf') plt.show() theory_g_sqr, p = approx_learning_curves.simulate_uc(spec_emp, degens, lamb = 1e-10) theory_NTK_g_sqr, p = approx_learning_curves.simulate_uc(theory_spectrum, degens, lamb = 1e-10) theory_gen = np.zeros(theory_g_sqr.shape) theory_NTK = np.zeros(theory_NTK_g_sqr.shape) for k in range(kmax): if theory_spectrum[k] !=0: theory_NTK[:,k] = theory_NTK_g_sqr[:,k] / theory_spectrum[k]**2 * ak2[k] if spec_emp[k] != 0: theory_gen[:,k] = theory_g_sqr[:,k] / spec_emp[k]**2 * ak2[k] colors = ['b','r','g', 'm', 'c'] colors = ['b','r','g', 'm', 'c'] kplot = [0,1,2,4,6]
print(train_errs) kernel_errs = pd.read_csv(kernel_name + '.csv').to_numpy() kernel_std = pd.read_csv(kernel_std_name + '.csv').to_numpy() print(mode_errs.shape) pvals = [5,10,20,50,100,250,500,1000] kmax = 20 d = 30 depth = 4 coeffs = compute_NTK_spectrum.get_effective_spectrum([depth - 1], kmax, d, ker = 'NTK')[0,:] degens = np.array( [gegenbauer.degeneracy(d,k) for k in range(kmax)] ) theory_curves, p = approx_learning_curves.simulate_uc(coeffs, degens, lamb = 0, num_pts = 2000, max_p= np.log10(np.amax(pvals))) plt.rcParams.update({'font.size': 12}) kvals = [1,2,4] print(train_errs.shape) for i in range(train_errs.shape[0]): plt.semilogy(train_errs[i,1:train_errs.shape[1]], label = 'k=%d' % (kvals[i])) plt.legend() plt.xlabel('SGD iteration', fontsize=20) plt.ylabel(r'$E_{tr}$', fontsize=20) plt.tight_layout() plt.savefig(train_name + '.pdf') plt.show()
# calculate spectrum of teacher spectrum = gegenbauer.calculate_activation_coeffs(kmax, d)**2 degens = np.array([gegenbauer.degeneracy(d, k) for k in range(kmax)]) # fix get effective spectrum for higher d theory_spectrum = compute_NTK_spectrum.get_effective_spectrum([1], kmax, d, ker='NTK')[0, :] theory_spectrum_hermite = compute_NTK_spectrum.get_effective_spectrum_hermite( [2], kmax, d, ker='NTK')[0, :] theory_spectrum_NNGP = compute_NTK_spectrum.get_effective_spectrum( [1], kmax, d, ker='NNGP')[0, :] theory_g_sqr, p = approx_learning_curves.simulate_uc(theory_spectrum, degens, lamb=1e-10) theory_g_sqr_NNGP, p = approx_learning_curves.simulate_uc(theory_spectrum_NNGP, degens, lamb=1e-10) theory_g_sqr_hermite, p = approx_learning_curves.simulate_uc( theory_spectrum_hermite, degens, lamb=1e-8) theory_gen = np.zeros(theory_g_sqr.shape) theory_gen_NNGP = np.zeros(theory_g_sqr.shape) theory_gen_hermite = np.zeros(theory_g_sqr.shape) for k in range(kmax): if spectrum[k] != 0: theory_gen[:, k] = theory_g_sqr[:, k] / theory_spectrum[k]**2 * spectrum[k]