def optimize_auto_init(p, dat, J, **ops): """ Optimize parameters by calling optimize_locs_widths(). Automatically initialize the test locations and the Gaussian width. Return optimized locations, Gaussian width, optimization info """ assert J > 0 # Use grid search to initialize the gwidth X = dat.data() n_gwidth_cand = 5 gwidth_factors = 2.0**np.linspace(-3, 3, n_gwidth_cand) med2 = util.meddistance(X, 1000)**2 k = kernel.KGauss(med2 * 2) # fit a Gaussian to the data and draw to initialize V0 V0 = util.fit_gaussian_draw(X, J, seed=829, reg=1e-6) list_gwidth = np.hstack(((med2) * gwidth_factors)) besti, objs = GaussFSSD.grid_search_gwidth(p, dat, V0, list_gwidth) gwidth = list_gwidth[besti] assert util.is_real_num( gwidth), 'gwidth not real. Was %s' % str(gwidth) assert gwidth > 0, 'gwidth not positive. Was %.3g' % gwidth logging.info('After grid search, gwidth=%.3g' % gwidth) V_opt, gwidth_opt, info = GaussFSSD.optimize_locs_widths( p, dat, gwidth, V0, **ops) # set the width bounds #fac_min = 5e-2 #fac_max = 5e3 #gwidth_lb = fac_min*med2 #gwidth_ub = fac_max*med2 #gwidth_opt = max(gwidth_lb, min(gwidth_opt, gwidth_ub)) return V_opt, gwidth_opt, info
def job_fssdJ1q_opt(p, data_source, tr, te, r, J=1, null_sim=None): """ FSSD with optimization on tr. Test on te. Use a Gaussian kernel. """ if null_sim is None: null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=r) Xtr = tr.data() with util.ContextTimer() as t: # Use grid search to initialize the gwidth n_gwidth_cand = 5 gwidth_factors = 2.0**np.linspace(-3, 3, n_gwidth_cand) med2 = util.meddistance(Xtr, 1000)**2 k = kernel.KGauss(med2 * 2) # fit a Gaussian to the data and draw to initialize V0 V0 = util.fit_gaussian_draw(Xtr, J, seed=r + 1, reg=1e-6) list_gwidth = np.hstack(((med2) * gwidth_factors)) besti, objs = gof.GaussFSSD.grid_search_gwidth(p, tr, V0, list_gwidth) gwidth = list_gwidth[besti] assert util.is_real_num( gwidth), 'gwidth not real. Was %s' % str(gwidth) assert gwidth > 0, 'gwidth not positive. Was %.3g' % gwidth logging.info('After grid search, gwidth=%.3g' % gwidth) ops = { 'reg': 1e-2, 'max_iter': 40, 'tol_fun': 1e-4, 'disp': True, 'locs_bounds_frac': 10.0, 'gwidth_lb': 1e-1, 'gwidth_ub': 1e4, } V_opt, gwidth_opt, info = gof.GaussFSSD.optimize_locs_widths( p, tr, gwidth, V0, **ops) # Use the optimized parameters to construct a test k_opt = kernel.KGauss(gwidth_opt) fssd_opt = gof.FSSD(p, k_opt, V_opt, null_sim=null_sim, alpha=alpha) fssd_opt_result = fssd_opt.perform_test(te) return { 'test_result': fssd_opt_result, 'time_secs': t.secs, 'goftest': fssd_opt, 'opt_info': info, }
def optimize_locs_params( p, dat, b0, c0, test_locs0, reg=1e-2, max_iter=100, tol_fun=1e-5, disp=False, locs_bounds_frac=100, b_lb=-20.0, b_ub=-1e-4, c_lb=1e-6, c_ub=1e3, ): """ Optimize the test locations and the the two parameters (b and c) of the IMQ kernel by maximizing the test power criterion. k(x,y) = (c^2 + ||x-y||^2)^b where c > 0 and b < 0. data should not be the same data as used in the actual test (i.e., should be a held-out set). This function is deterministic. - p: UnnormalizedDensity specifying the problem. - b0: initial parameter value for b (in the kernel) - c0: initial parameter value for c (in the kernel) - dat: a Data object (training set) - test_locs0: Jxd numpy array. Initial V. - reg: reg to add to the mean/sqrt(variance) criterion to become mean/sqrt(variance + reg) - max_iter: #gradient descent iterations - tol_fun: termination tolerance of the objective value - disp: True to print convergence messages - locs_bounds_frac: When making box bounds for the test_locs, extend the box defined by coordinate-wise min-max by std of each coordinate multiplied by this number. - b_lb: absolute lower bound on b. b is always < 0. - b_ub: absolute upper bound on b - c_lb: absolute lower bound on c. c is always > 0. - c_ub: absolute upper bound on c #- If the lb, ub bounds are None Return (V test_locs, b, c, optimization info log) """ """ In the optimization, we will parameterize b with its square root. Square back and negate to form b. c is not parameterized in any special way since it enters to the kernel with c^2. Absolute value of c will be taken to make sure it is positive. """ J = test_locs0.shape[0] X = dat.data() n, d = X.shape def obj(sqrt_neg_b, c, V): b = -sqrt_neg_b**2 return -IMQFSSD.power_criterion(p, dat, b, c, V, reg=reg) flatten = lambda sqrt_neg_b, c, V: np.hstack( (sqrt_neg_b, c, V.reshape(-1))) def unflatten(x): sqrt_neg_b = x[0] c = x[1] V = np.reshape(x[2:], (J, d)) return sqrt_neg_b, c, V def flat_obj(x): sqrt_neg_b, c, V = unflatten(x) return obj(sqrt_neg_b, c, V) # gradient #grad_obj = autograd.elementwise_grad(flat_obj) # Initial point b02 = np.sqrt(-b0) x0 = flatten(b02, c0, test_locs0) # Make a box to bound test locations X_std = np.std(X, axis=0) # X_min: length-d array X_min = np.min(X, axis=0) X_max = np.max(X, axis=0) # V_lb: J x d V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1)) V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1)) # (J*d+2) x 2. Make sure to bound the reparamterized values (not the original) """ For b, b2 := sqrt(-b) lb <= b <= ub < 0 means sqrt(-ub) <= b2 <= sqrt(-lb) Note the positions of ub, lb. """ x0_lb = np.hstack((np.sqrt(-b_ub), c_lb, np.reshape(V_lb, -1))) x0_ub = np.hstack((np.sqrt(-b_lb), c_ub, np.reshape(V_ub, -1))) x0_bounds = zip(x0_lb, x0_ub) # optimize. Time the optimization as well. # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html grad_obj = autograd.elementwise_grad(flat_obj) with util.ContextTimer() as timer: opt_result = scipy.optimize.minimize( flat_obj, x0, method='L-BFGS-B', bounds=x0_bounds, tol=tol_fun, options={ 'maxiter': max_iter, 'ftol': tol_fun, 'disp': disp, 'gtol': 1.0e-06, }, jac=grad_obj, ) opt_result = dict(opt_result) opt_result['time_secs'] = timer.secs x_opt = opt_result['x'] sqrt_neg_b, c, V_opt = unflatten(x_opt) b = -sqrt_neg_b**2 assert util.is_real_num(b), 'b is not real. Was {}'.format(b) assert b < 0 assert util.is_real_num(c), 'c is not real. Was {}'.format(c) assert c > 0 return V_opt, b, c, opt_result
def optimize_locs_widths( p, dat, gwidth0, test_locs0, reg=1e-2, max_iter=100, tol_fun=1e-5, disp=False, locs_bounds_frac=100, gwidth_lb=None, gwidth_ub=None, use_2terms=False, ): """ Optimize the test locations and the Gaussian kernel width by maximizing a test power criterion. data should not be the same data as used in the actual test (i.e., should be a held-out set). This function is deterministic. - data: a Data object - test_locs0: Jxd numpy array. Initial V. - reg: reg to add to the mean/sqrt(variance) criterion to become mean/sqrt(variance + reg) - gwidth0: initial value of the Gaussian width^2 - max_iter: #gradient descent iterations - tol_fun: termination tolerance of the objective value - disp: True to print convergence messages - locs_bounds_frac: When making box bounds for the test_locs, extend the box defined by coordinate-wise min-max by std of each coordinate multiplied by this number. - gwidth_lb: absolute lower bound on the Gaussian width^2 - gwidth_ub: absolute upper bound on the Gaussian width^2 - use_2terms: If True, then besides the signal-to-noise ratio criterion, the objective function will also include the first term that is dropped. #- If the lb, ub bounds are None, use fraction of the median heuristics # to automatically set the bounds. Return (V test_locs, gaussian width, optimization info log) """ J = test_locs0.shape[0] X = dat.data() n, d = X.shape # Parameterize the Gaussian width with its square root (then square later) # to automatically enforce the positivity. def obj(sqrt_gwidth, V): return -GaussFSSD.power_criterion( p, dat, sqrt_gwidth**2, V, reg=reg, use_2terms=use_2terms) flatten = lambda gwidth, V: np.hstack((gwidth, V.reshape(-1))) def unflatten(x): sqrt_gwidth = x[0] V = np.reshape(x[1:], (J, d)) return sqrt_gwidth, V def flat_obj(x): sqrt_gwidth, V = unflatten(x) return obj(sqrt_gwidth, V) # gradient #grad_obj = autograd.elementwise_grad(flat_obj) # Initial point x0 = flatten(np.sqrt(gwidth0), test_locs0) #make sure that the optimized gwidth is not too small or too large. fac_min = 1e-2 fac_max = 1e2 med2 = util.meddistance(X, subsample=1000)**2 if gwidth_lb is None: gwidth_lb = max(fac_min * med2, 1e-3) if gwidth_ub is None: gwidth_ub = min(fac_max * med2, 1e5) # Make a box to bound test locations X_std = np.std(X, axis=0) # X_min: length-d array X_min = np.min(X, axis=0) X_max = np.max(X, axis=0) # V_lb: J x d V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1)) V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1)) # (J*d+1) x 2. Take square root because we parameterize with the square # root x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1))) x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1))) x0_bounds = zip(x0_lb, x0_ub) # optimize. Time the optimization as well. # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html grad_obj = autograd.elementwise_grad(flat_obj) with util.ContextTimer() as timer: opt_result = scipy.optimize.minimize( flat_obj, x0, method='L-BFGS-B', bounds=x0_bounds, tol=tol_fun, options={ 'maxiter': max_iter, 'ftol': tol_fun, 'disp': disp, 'gtol': 1.0e-06, }, jac=grad_obj, ) opt_result = dict(opt_result) opt_result['time_secs'] = timer.secs x_opt = opt_result['x'] sq_gw_opt, V_opt = unflatten(x_opt) gw_opt = sq_gw_opt**2 assert util.is_real_num( gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt) return V_opt, gw_opt, opt_result