def plot_search_surface(known_nd_data, known_target_points, given_data_dims, opt_model_params=None): import plottool as pt pt.figure(2, doclf=True) # Interpolate uniform grid positions unknown_nd_data, ug_shape = compute_interpolation_grid(known_nd_data, 0 * 5) interpolated_error = interpolate_error(known_nd_data, known_target_points, unknown_nd_data) ax = pt.plot_surface3d( unknown_nd_data.T[0].reshape(ug_shape), unknown_nd_data.T[1].reshape(ug_shape), interpolated_error.reshape(ug_shape), xlabel='nDaids', ylabel='K', zlabel='error', rstride=1, cstride=1, cmap=pt.plt.get_cmap('jet'), wire=True, #norm=pt.mpl.colors.Normalize(0, 1), #shade=False, #dark=False, ) ax.scatter(known_nd_data.T[0], known_nd_data.T[1], known_target_points, s=100, c=pt.YELLOW) assert len(given_data_dims) == 1, 'can only plot 1 given data dim' xdim = given_data_dims[0] ydim = (xdim + 1) % (len(known_nd_data.T)) known_nd_min = known_nd_data.min(axis=0) known_nd_max = known_nd_data.max(axis=0) xmin, xmax = known_nd_min[xdim], known_nd_max[xdim] ymin, ymax = known_nd_min[ydim], known_nd_max[ydim] zmin, zmax = known_target_points.min(), known_target_points.max() if opt_model_params is not None: # plot learned data if availabel #given_known_nd_data = known_nd_data.take(given_data_dims, axis=1) xdata = np.linspace(xmin, xmax) ydata = compute_K(xdata, opt_model_params) xydata = np.array((xdata, ydata)).T zdata = interpolate_error(known_nd_data, known_target_points, xydata) ax.plot(xdata, ydata, zdata, c=pt.ORANGE) ymax = max(ymax, ydata.max()) ymin = min(ymin, ydata.min()) zmin = min(zmin, zdata.min()) zmax = max(zmax, zdata.max()) ax.scatter(xdata, ydata, zdata, s=100, c=pt.ORANGE) #[t.set_color('white') for t in ax.xaxis.get_ticklines()] #[t.set_color('white') for t in ax.xaxis.get_ticklabels()] ax.set_aspect('auto') ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) ax.set_zlim(zmin, zmax) import matplotlib.ticker as mtick ax.zaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f')) return ax
def iters_until_threshold(): """ How many iterations of ewma until you hit the poisson / biniomal threshold This establishes a principled way to choose the threshold for the refresh criterion in my thesis. There are paramters --- moving parts --- that we need to work with: `a` the patience, `s` the span, and `mu` our ewma. `s` is a span paramter indicating how far we look back. `mu` is the average number of label-changing reviews in roughly the last `s` manual decisions. These numbers are used to estimate the probability that any of the next `a` manual decisions will be label-chanigng. When that probability falls below a threshold we terminate. The goal is to choose `a`, `s`, and the threshold `t`, such that the probability will fall below the threshold after a maximum of `a` consecutive non-label-chaning reviews. IE we want to tie the patience paramter (how far we look ahead) to how far we actually are willing to go. """ import numpy as np import utool as ut import sympy as sym i = sym.symbols('i', integer=True, nonnegative=True, finite=True) # mu_i = sym.symbols('mu_i', integer=True, nonnegative=True, finite=True) s = sym.symbols('s', integer=True, nonnegative=True, finite=True) # NOQA thresh = sym.symbols('tau', real=True, nonnegative=True, finite=True) # NOQA alpha = sym.symbols('alpha', real=True, nonnegative=True, finite=True) # NOQA c_alpha = sym.symbols('c_alpha', real=True, nonnegative=True, finite=True) # patience a = sym.symbols('a', real=True, nonnegative=True, finite=True) available_subs = { a: 20, s: a, alpha: 2 / (s + 1), c_alpha: (1 - alpha), } def dosubs(expr, d=available_subs): """ recursive expression substitution """ expr1 = expr.subs(d) if expr == expr1: return expr1 else: return dosubs(expr1, d=d) # mu is either the support for the poisson distribution # or is is the p in the binomial distribution # It is updated at timestep i based on ewma, assuming each incoming responce is 0 mu_0 = 1.0 mu_i = c_alpha**i # Estimate probability that any event will happen in the next `a` reviews # at time `i`. poisson_i = 1 - sym.exp(-mu_i * a) binom_i = 1 - (1 - mu_i)**a # Expand probabilities to be a function of i, s, and a part = ut.delete_dict_keys(available_subs.copy(), [a, s]) mu_i = dosubs(mu_i, d=part) poisson_i = dosubs(poisson_i, d=part) binom_i = dosubs(binom_i, d=part) if True: # ewma of mu at time i if review is always not label-changing (meaningful) mu_1 = c_alpha * mu_0 # NOQA mu_2 = c_alpha * mu_1 # NOQA if True: i_vals = np.arange(0, 100) mu_vals = np.array( [dosubs(mu_i).subs({ i: i_ }).evalf() for i_ in i_vals]) # NOQA binom_vals = np.array( [dosubs(binom_i).subs({ i: i_ }).evalf() for i_ in i_vals]) # NOQA poisson_vals = np.array( [dosubs(poisson_i).subs({ i: i_ }).evalf() for i_ in i_vals]) # NOQA # Find how many iters it actually takes my expt to terminate thesis_draft_thresh = np.exp(-2) np.where(mu_vals < thesis_draft_thresh)[0] np.where(binom_vals < thesis_draft_thresh)[0] np.where(poisson_vals < thesis_draft_thresh)[0] sym.pprint(sym.simplify(mu_i)) sym.pprint(sym.simplify(binom_i)) sym.pprint(sym.simplify(poisson_i)) # Find the thresholds that force termination after `a` reviews have passed # do this by setting i=a poisson_thresh = poisson_i.subs({i: a}) binom_thresh = binom_i.subs({i: a}) print('Poisson thresh') print(sym.latex(sym.Eq(thresh, poisson_thresh))) print(sym.latex(sym.Eq(thresh, sym.simplify(poisson_thresh)))) poisson_thresh.subs({a: 115, s: 30}).evalf() sym.pprint(sym.Eq(thresh, poisson_thresh)) sym.pprint(sym.Eq(thresh, sym.simplify(poisson_thresh))) print('Binomial thresh') sym.pprint(sym.simplify(binom_thresh)) sym.pprint(sym.simplify(poisson_thresh.subs({s: a}))) def taud(coeff): return coeff * 360 if 'poisson_cache' not in vars(): poisson_cache = {} binom_cache = {} S, A = np.meshgrid(np.arange(1, 150, 1), np.arange(0, 150, 1)) import plottool as pt SA_coords = list(zip(S.ravel(), A.ravel())) for sval, aval in ut.ProgIter(SA_coords): if (sval, aval) not in poisson_cache: poisson_cache[(sval, aval)] = float( poisson_thresh.subs({ a: aval, s: sval }).evalf()) poisson_zdata = np.array([ poisson_cache[(sval, aval)] for sval, aval in SA_coords ]).reshape(A.shape) fig = pt.figure(fnum=1, doclf=True) pt.gca().set_axis_off() pt.plot_surface3d(S, A, poisson_zdata, xlabel='s', ylabel='a', rstride=3, cstride=3, zlabel='poisson', mode='wire', contour=True, title='poisson3d') pt.gca().set_zlim(0, 1) pt.gca().view_init(elev=taud(1 / 16), azim=taud(5 / 8)) fig.set_size_inches(10, 6) fig.savefig('a-s-t-poisson3d.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) for sval, aval in ut.ProgIter(SA_coords): if (sval, aval) not in binom_cache: binom_cache[(sval, aval)] = float( binom_thresh.subs({ a: aval, s: sval }).evalf()) binom_zdata = np.array([ binom_cache[(sval, aval)] for sval, aval in SA_coords ]).reshape(A.shape) fig = pt.figure(fnum=2, doclf=True) pt.gca().set_axis_off() pt.plot_surface3d(S, A, binom_zdata, xlabel='s', ylabel='a', rstride=3, cstride=3, zlabel='binom', mode='wire', contour=True, title='binom3d') pt.gca().set_zlim(0, 1) pt.gca().view_init(elev=taud(1 / 16), azim=taud(5 / 8)) fig.set_size_inches(10, 6) fig.savefig('a-s-t-binom3d.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) # Find point on the surface that achieves a reasonable threshold # Sympy can't solve this # sym.solve(sym.Eq(binom_thresh.subs({s: 50}), .05)) # sym.solve(sym.Eq(poisson_thresh.subs({s: 50}), .05)) # Find a numerical solution def solve_numeric(expr, target, solve_for, fixed={}, method=None, bounds=None): """ Args: expr (Expr): symbolic expression target (float): numberic value solve_for (sympy.Symbol): The symbol you care about fixed (dict): fixed values of the symbol solve_numeric(poisson_thresh, .05, {s: 30}, method=None) solve_numeric(poisson_thresh, .05, {s: 30}, method='Nelder-Mead') solve_numeric(poisson_thresh, .05, {s: 30}, method='BFGS') """ import scipy.optimize # Find the symbol you want to solve for want_symbols = expr.free_symbols - set(fixed.keys()) # TODO: can probably extend this to multiple params assert len(want_symbols) == 1, 'specify all but one var' assert solve_for == list(want_symbols)[0] fixed_expr = expr.subs(fixed) def func(a1): expr_value = float(fixed_expr.subs({solve_for: a1}).evalf()) return (expr_value - target)**2 if not fixed: a1 = 0 else: a1 = list(fixed.values())[0] # if method is None: # method = 'Nelder-Mead' # method = 'Newton-CG' # method = 'BFGS' result = scipy.optimize.minimize(func, x0=a1, method=method, bounds=bounds) if not result.success: print('\n') print(result) print('\n') return result # Numeric measurments of thie line thresh_vals = [.001, .01, .05, .1, .135] svals = np.arange(1, 100) target_poisson_plots = {} for target in ut.ProgIter(thresh_vals, bs=False, freq=1): poisson_avals = [] for sval in ut.ProgIter(svals, 'poisson', freq=1): expr = poisson_thresh fixed = {s: sval} want = a aval = solve_numeric(expr, target, want, fixed, method='Nelder-Mead').x[0] poisson_avals.append(aval) target_poisson_plots[target] = (svals, poisson_avals) fig = pt.figure(fnum=3) for target, dat in target_poisson_plots.items(): pt.plt.plot(*dat, label='prob={}'.format(target)) pt.gca().set_xlabel('s') pt.gca().set_ylabel('a') pt.legend() pt.gca().set_title('poisson') fig.set_size_inches(5, 3) fig.savefig('a-vs-s-poisson.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) target_binom_plots = {} for target in ut.ProgIter(thresh_vals, bs=False, freq=1): binom_avals = [] for sval in ut.ProgIter(svals, 'binom', freq=1): aval = solve_numeric(binom_thresh, target, a, { s: sval }, method='Nelder-Mead').x[0] binom_avals.append(aval) target_binom_plots[target] = (svals, binom_avals) fig = pt.figure(fnum=4) for target, dat in target_binom_plots.items(): pt.plt.plot(*dat, label='prob={}'.format(target)) pt.gca().set_xlabel('s') pt.gca().set_ylabel('a') pt.legend() pt.gca().set_title('binom') fig.set_size_inches(5, 3) fig.savefig('a-vs-s-binom.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) # ---- if True: fig = pt.figure(fnum=5, doclf=True) s_vals = [1, 2, 3, 10, 20, 30, 40, 50] for sval in s_vals: pp = poisson_thresh.subs({s: sval}) a_vals = np.arange(0, 200) pp_vals = np.array( [float(pp.subs({ a: aval }).evalf()) for aval in a_vals]) # NOQA pt.plot(a_vals, pp_vals, label='s=%r' % (sval, )) pt.legend() pt.gca().set_xlabel('a') pt.gca().set_ylabel('poisson prob after a reviews') fig.set_size_inches(5, 3) fig.savefig('a-vs-thresh-poisson.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) fig = pt.figure(fnum=6, doclf=True) s_vals = [1, 2, 3, 10, 20, 30, 40, 50] for sval in s_vals: pp = binom_thresh.subs({s: sval}) a_vals = np.arange(0, 200) pp_vals = np.array( [float(pp.subs({ a: aval }).evalf()) for aval in a_vals]) # NOQA pt.plot(a_vals, pp_vals, label='s=%r' % (sval, )) pt.legend() pt.gca().set_xlabel('a') pt.gca().set_ylabel('binom prob after a reviews') fig.set_size_inches(5, 3) fig.savefig('a-vs-thresh-binom.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) # ------- fig = pt.figure(fnum=5, doclf=True) a_vals = [1, 2, 3, 10, 20, 30, 40, 50] for aval in a_vals: pp = poisson_thresh.subs({a: aval}) s_vals = np.arange(1, 200) pp_vals = np.array( [float(pp.subs({ s: sval }).evalf()) for sval in s_vals]) # NOQA pt.plot(s_vals, pp_vals, label='a=%r' % (aval, )) pt.legend() pt.gca().set_xlabel('s') pt.gca().set_ylabel('poisson prob') fig.set_size_inches(5, 3) fig.savefig('s-vs-thresh-poisson.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) fig = pt.figure(fnum=5, doclf=True) a_vals = [1, 2, 3, 10, 20, 30, 40, 50] for aval in a_vals: pp = binom_thresh.subs({a: aval}) s_vals = np.arange(1, 200) pp_vals = np.array( [float(pp.subs({ s: sval }).evalf()) for sval in s_vals]) # NOQA pt.plot(s_vals, pp_vals, label='a=%r' % (aval, )) pt.legend() pt.gca().set_xlabel('s') pt.gca().set_ylabel('binom prob') fig.set_size_inches(5, 3) fig.savefig('s-vs-thresh-binom.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) #--------------------- # Plot out a table mu_i.subs({s: 75, a: 75}).evalf() poisson_thresh.subs({s: 75, a: 75}).evalf() sval = 50 for target, dat in target_poisson_plots.items(): slope = np.median(np.diff(dat[1])) aval = int(np.ceil(sval * slope)) thresh = float(poisson_thresh.subs({s: sval, a: aval}).evalf()) print('aval={}, sval={}, thresh={}, target={}'.format( aval, sval, thresh, target)) for target, dat in target_binom_plots.items(): slope = np.median(np.diff(dat[1])) aval = int(np.ceil(sval * slope)) pass
def plot_search_surface(known_nd_data, known_target_points, given_data_dims, opt_model_params=None): import plottool as pt pt.figure(2, doclf=True) # Interpolate uniform grid positions unknown_nd_data, ug_shape = compute_interpolation_grid( known_nd_data, 0 * 5) interpolated_error = interpolate_error(known_nd_data, known_target_points, unknown_nd_data) ax = pt.plot_surface3d( unknown_nd_data.T[0].reshape(ug_shape), unknown_nd_data.T[1].reshape(ug_shape), interpolated_error.reshape(ug_shape), xlabel='nDaids', ylabel='K', zlabel='error', rstride=1, cstride=1, cmap=pt.plt.get_cmap('jet'), wire=True, #norm=pt.mpl.colors.Normalize(0, 1), #shade=False, #dark=False, ) ax.scatter(known_nd_data.T[0], known_nd_data.T[1], known_target_points, s=100, c=pt.YELLOW) assert len(given_data_dims) == 1, 'can only plot 1 given data dim' xdim = given_data_dims[0] ydim = (xdim + 1) % (len(known_nd_data.T)) known_nd_min = known_nd_data.min(axis=0) known_nd_max = known_nd_data.max(axis=0) xmin, xmax = known_nd_min[xdim], known_nd_max[xdim] ymin, ymax = known_nd_min[ydim], known_nd_max[ydim] zmin, zmax = known_target_points.min(), known_target_points.max() if opt_model_params is not None: # plot learned data if availabel #given_known_nd_data = known_nd_data.take(given_data_dims, axis=1) xdata = np.linspace(xmin, xmax) ydata = compute_K(xdata, opt_model_params) xydata = np.array((xdata, ydata)).T zdata = interpolate_error(known_nd_data, known_target_points, xydata) ax.plot(xdata, ydata, zdata, c=pt.ORANGE) ymax = max(ymax, ydata.max()) ymin = min(ymin, ydata.min()) zmin = min(zmin, zdata.min()) zmax = max(zmax, zdata.max()) ax.scatter(xdata, ydata, zdata, s=100, c=pt.ORANGE) #[t.set_color('white') for t in ax.xaxis.get_ticklines()] #[t.set_color('white') for t in ax.xaxis.get_ticklabels()] ax.set_aspect('auto') ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) ax.set_zlim(zmin, zmax) import matplotlib.ticker as mtick ax.zaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f')) return ax