def get_dxopt_delta_p(lin_solver, df_dx, d_dp_df_dx, d_dx_df_dx, A, b, xopt, p, delta_p_direction): # f(x, p) should be convex x_len = A.shape[1] # get tight constraints A_tight, b_tight = get_tight_constraints(A, b, xopt) num_tight = A_tight.shape[0] # get d p_dim = len(delta_p_direction.shape) delta_p_direction_broadcasted = np.tile(delta_p_direction, tuple([x_len] + [1 for i in xrange(p_dim)])) d_top = -np.sum(d_dp_df_dx(p, xopt) * delta_p_direction_broadcasted, axis=tuple(range(1,1+p_dim))) d_bottom = np.zeros(num_tight) d = np.hstack((d_top,d_bottom)) # get C C = np.vstack((np.hstack((d_dx_df_dx(xopt, p), -A_tight.T)), np.hstack((A_tight, np.zeros((num_tight, num_tight)))))) # get deriv deriv = lin_solver(C, d) # print 'solver error:', np.linalg.norm(np.dot(C,deriv) - d) return deriv
def vjp_all(g): vjp_y = g[-1, :] vjp_t0 = 0 time_vjp_list = [] vjp_args = np.zeros(np.size(flat_args)) for i in range(T - 1, 0, -1): # Compute effect of moving measurement time. vjp_cur_t = np.dot(func(yt[i, :], t[i], *func_args), g[i, :]) time_vjp_list.append(vjp_cur_t) vjp_t0 = vjp_t0 - vjp_cur_t # Run augmented system backwards to the previous observation. aug_y0 = np.hstack((yt[i, :], vjp_y, vjp_t0, vjp_args)) aug_ans = odeint(augmented_dynamics, aug_y0, np.array([t[i], t[i - 1]]), tuple((flat_args,)), **kwargs) _, vjp_y, vjp_t0, vjp_args = unpack(aug_ans[1]) # Add gradient from current output. vjp_y = vjp_y + g[i - 1, :] time_vjp_list.append(vjp_t0) vjp_times = np.hstack(time_vjp_list)[::-1] return None, vjp_y, vjp_times, unflatten(vjp_args)
def location_mixture_logpdf(samps, locations, location_weights, distr_at_origin, contr_var = False, variant = 1): # lpdfs = zeroprop.logpdf() diff = samps - locations[:, np.newaxis, :] lpdfs = distr_at_origin.logpdf(diff.reshape([np.prod(diff.shape[:2]), diff.shape[-1]])).reshape(diff.shape[:2]) logprop_weights = log(location_weights/location_weights.sum())[:, np.newaxis] if not contr_var: return logsumexp(lpdfs + logprop_weights, 0) #time_m1 = np.hstack([time0[:,:-1],time0[:,-1:]]) else: time0 = lpdfs + logprop_weights + log(len(location_weights)) if variant == 1: time1 = np.hstack([time0[:,1:],time0[:,:1]]) cov = np.mean(time0**2-time0*time1) var = np.mean((time0-time1)**2) lpdfs = lpdfs - cov/var * (time0-time1) return logsumexp(lpdfs - log(len(location_weights)), 0) elif variant == 2: cvar = (time0[:,:,np.newaxis] - np.dstack([np.hstack([time0[:, 1:], time0[:, :1]]), np.hstack([time0[:,-1:], time0[:,:-1]])])) ## self-covariance matrix of control variates K_cvar = np.diag(np.mean(cvar**2, (0, 1))) #add off diagonal K_cvar = K_cvar + (1.-np.eye(2)) * np.mean(cvar[:,:,0]*cvar[:,:,1]) ## covariance of control variates with random variable cov = np.mean(time0[:,:,np.newaxis] * cvar, 0).mean(0) optimal_comb = np.linalg.inv(K_cvar) @ cov lpdfs = lpdfs - cvar @ optimal_comb return logsumexp(lpdfs - log(len(location_weights)), 0)
def get_dL_dp_thru_xopt(lin_solver, df_dx, d_dp_df_dx, d_dx_df_dx, dL_dxopt, A, b, xopt, p, L_args=None, f_args=None): # assumes L(x_opt), x_opt = argmin_x f(x,p) subject to Ax<=b # L_args is for arguments to L besides x_opt # first, get dL/dws to calculate the gradient at ws1 if not L_args is None: pass #print 'L_args len:', len(L_args) else: print 'NONE' if L_args is None: dL_dxopt_anal_val1 = dL_dxopt(xopt) # else: # pdb.set_trace() dL_dxopt_anal_val1 = dL_dxopt(xopt, L_args) # get tight constraints A_tight, b_tight = get_tight_constraints(A, b, xopt) num_tight = A_tight.shape[0] # make C matrix # pdb.set_trace() if f_args is None: C_corner = d_dx_df_dx(xopt, p) else: C_corner = d_dx_df_dx(xopt, p, f_args) C = np.vstack((np.hstack((C_corner,-A_tight.T)), np.hstack((A_tight,np.zeros((num_tight,num_tight)))))) # print 'C', C # print 'C rank', np.linalg.matrix_rank(C), C.shape # print 'C corner rank', np.linalg.matrix_rank(C_corner), C_corner.shape # make d vector d = np.hstack((dL_dxopt_anal_val1, np.zeros(num_tight))) # solve Cv=d for x v = lin_solver(C, d) # print 'v', v #print C #print d print 'solver error:', np.linalg.norm(np.dot(C,v) - d) # make D if f_args is None: d_dp_df_dx_anal_val1 = d_dp_df_dx(p, xopt) else: d_dp_df_dx_anal_val1 = d_dp_df_dx(p, xopt, f_args) D = np.vstack((-d_dp_df_dx_anal_val1, np.zeros((num_tight,)+p.shape))) # print 'D', D[0:10] return np.sum(D.T * v[tuple([np.newaxis for i in xrange(len(p.shape))])+(slice(None),)], axis=-1).T
def add_data(self, S, F=None): """ Add a data set to the list of observations. First, filter the data with the impulse response basis, then instantiate a set of parents for this data set. :param S: a TxK matrix of of event counts for each time bin and each process. """ assert isinstance(S, np.ndarray) and S.ndim == 2 and S.shape[1] == self.K \ and np.amin(S) >= 0 and S.dtype == np.int, \ "Data must be a TxK array of event counts" T = S.shape[0] if F is None: # Filter the data into a TxKxB array Ftens = self.basis.convolve_with_basis(S) # Flatten this into a T x (KxB) matrix # [F00, F01, F02, F10, F11, ... F(K-1)0, F(K-1)(B-1)] F = Ftens.reshape((T, self.K * self.B)) assert np.allclose(F[:,0], Ftens[:,0,0]) if self.B > 1: assert np.allclose(F[:,1], Ftens[:,0,1]) if self.K > 1: assert np.allclose(F[:,self.B], Ftens[:,1,0]) # Prepend a column of ones F = np.hstack((np.ones((T,1)), F)) for k,node in enumerate(self.nodes): node.add_data(F, S[:,k])
def MLE_EP(self, random_init): w_init = RS.normal(0,1, (self.dimx, self.dimz)) if random_init is False: print "FALSE" w_init = self.W mus = np.array([]) #w = self.marginal_likelihood(w_init) print "initialisation of W:" print w_init print "" w = self.marginal_likelihood(w_init) print "True W" print self.W print "MLE W" print w mus = np.array([]) for i in xrange(self.n): mu = self.get_mu(self.observed[i], w) mus = np.hstack((mus, mu)) mus = mus.reshape((self.n,2)) sig = np.dot(self.W.transpose(), self.W) sig = sig/self.sigx sig = np.linalg.inv(sig) return mus, sig
def add_data(self, F, S): T = F.shape[0] assert S.shape == (T,) and S.dtype in (np.int, np.uint, np.uint32) if F.shape[1] == self.K * self.B: F = np.hstack([np.ones((T,)), F]) else: assert F.shape[1] == 1 + self.K * self.B self.data_list.append((F, S))
def compute_ba_J(cams, X, w, obs, feats): p = obs.shape[0] reproj_err_d = [] for i in range(p): params = np.hstack((cams[obs[i,0]],X[obs[i,1]],w[i])) reproj_err_d.append(compute_reproj_err_d(params,feats[i])) w_err_d = [] for curr_w in w: w_err_d.append(compute_w_err_d(curr_w)) return (reproj_err_d, w_err_d)
def dw(px, py, d, r, T): R = so3.exp(r) x0 = np.array([px, py, 1]) X = np.dot(R, d * x0) + T # derivative of projection w = xy/z, as a matrix # dw/dk = dw/dX * dX/dk dwdX = np.array([ [X[2], 0, -X[0]], [0, X[2], -X[1]]]) / (X[2]*X[2]) dXdR = so3.diff(r, R, d * x0) dXdT = np.eye(3) dXdd = np.dot(R, x0).reshape((3, 1)) return np.dot(dwdX, np.hstack((dXdR, dXdT, dXdd)))
def get_KMM_ineq_constraints(num_train, B_max, eps): G_gt_0 = -np.eye(num_train) h_gt_0 = np.zeros(num_train) G_lt_B_max = np.eye(num_train) h_lt_B_max = np.ones(num_train) * B_max G_B_sum_lt = np.ones(num_train, dtype=float) h_B_sum_lt = (1+eps) * float(num_train) * np.ones(1) G_B_sum_gt = -np.ones(num_train, dtype=float) h_B_sum_gt = -(1-eps) * float(num_train) * np.ones(1) G = np.vstack((G_gt_0,G_lt_B_max,G_B_sum_lt,G_B_sum_gt)) (h_gt_0,h_lt_B_max,h_B_sum_lt,h_B_sum_gt) h = np.hstack((h_gt_0,h_lt_B_max,h_B_sum_lt,h_B_sum_gt)) return G,h
def job_met_gwgrid(sample_source, tr, te, r, J): """MeanEmbeddingTest. Optimize only the Gaussian width with grid search Fix the test locations.""" # optimize on the training set T_randn = tst.MeanEmbeddingTest.init_locs_2randn(tr, J, seed=r+92856) med = util.meddistance(tr.stack_xy(), 1000) list_gwidth = np.hstack( ( (med**2) *(2.0**np.linspace(-5, 5, 40) ) ) ) list_gwidth.sort() besti, powers = tst.MeanEmbeddingTest.grid_search_gwidth(tr, T_randn, list_gwidth, alpha) best_width2 = list_gwidth[besti] met_grid = tst.MeanEmbeddingTest(T_randn, best_width2, alpha) return met_grid.perform_test(te)
def lbfgsUpdate(y, s, corrections, debug, old_dirs, old_stps, Hdiag): """ This function implements the update formula of L-BFGS """ ys = np.dot(y,s) if ys > 1e-10: numCorrections = old_dirs.shape[1] if numCorrections < corrections: # full update new_dirs = np.hstack((old_dirs, s.reshape(s.size,1))) new_stps = np.hstack((old_stps, y.reshape(y.size,1))) else: # limited-momory update new_dirs = np.hstack((old_dirs[:,1:corrections], \ s.reshape(s.size,1))) new_stps = np.hstack((old_stps[:,1:corrections], \ y.reshape(y.size,1))) new_Hdiag = ys/np.dot(y,y) else: if debug == True: print "Skipping update" (new_dirs, new_stps, new_Hdiag) = (old_dirs, old_stps, Hdiag) return (new_dirs, new_stps, new_Hdiag)
def _ntied_transmat_prior(self, transmat_val): # TODO: document choices transmat = np.empty((0, self.n_components)) for r in range(self.n_unique): row = np.empty((self.n_chain, 0)) for c in range(self.n_unique): if r == c: subm = np.array(sp.diags([transmat_val[r, c], 1.0], [0, 1], shape=(self.n_chain, self.n_chain)).todense()) else: lower_left = np.zeros((self.n_chain, self.n_chain)) lower_left[self.n_tied, 0] = 1.0 subm = np.kron(transmat_val[r, c], lower_left) row = np.hstack((row, subm)) transmat = np.vstack((transmat, row)) return transmat
def MLE_EP(self, random_init): w_init = RS.normal(0,1, (self.dimx, self.dimz)) if random_init is False: w_init = self.W mus = np.array([]) w = self.marginal_likelihood(w_init) mus = np.array([]) for i in xrange(self.n): mu = self.get_mu(self.observed[i], w) mus = np.hstack((mus, mu)) mus = mus.reshape((self.n,2)) sig = np.dot(self.W.transpose(), self.W) sig = sig/self.sigx sig = np.linalg.inv(sig) return mus, sig
def job_scf_gwgrid(sample_source, tr, te, r, J): rand_state = np.random.get_state() np.random.seed(r+92856) d = tr.dim() T_randn = np.random.randn(J, d) np.random.set_state(rand_state) # grid search to determine the initial gwidth mean_sd = tr.mean_std() scales = 2.0**np.linspace(-4, 4, 20) list_gwidth = np.hstack( (mean_sd*scales*(d**0.5), 2**np.linspace(-10, 10, 20) )) list_gwidth.sort() besti, powers = tst.SmoothCFTest.grid_search_gwidth(tr, T_randn, list_gwidth, alpha) # initialize with the best width from the grid search best_width = list_gwidth[besti] scf_gwgrid = tst.SmoothCFTest(T_randn, best_width, alpha) return scf_gwgrid.perform_test(te)
def job_quad_mmd(sample_source, tr, te, r): """Quadratic mmd with grid search to choose the best Gaussian width.""" # If n is too large, pairwise meddian computation can cause a memory error. with util.ContextTimer() as t: med = util.meddistance(tr.stack_xy(), 1000) list_gwidth = np.hstack( ( (med**2) *(2.0**np.linspace(-4, 4, 30) ) ) ) list_gwidth.sort() list_kernels = [kernel.KGauss(gw2) for gw2 in list_gwidth] # grid search to choose the best Gaussian width besti, powers = tst.QuadMMDTest.grid_search_kernel(tr, list_kernels, alpha) # perform test best_ker = list_kernels[besti] mmd_test = tst.QuadMMDTest(best_ker, n_permute=400, alpha=alpha) test_result = mmd_test.perform_test(te) return { #'test_method': mmd_test, 'test_result': test_result, 'time_secs': t.secs}
def job_met_gwgrid(prob_label, tr, te, r, ni, n): """MeanEmbeddingTest. Optimize only the Gaussian width with grid search Fix the test locations.""" with util.ContextTimer() as t: # optimize on the training set T_randn = tst.MeanEmbeddingTest.init_locs_2randn(tr, J, seed=r+92856) med = util.meddistance(tr.stack_xy(), 1000) list_gwidth = np.hstack( ( (med**2) *(2.0**np.linspace(-5, 5, 40) ) ) ) list_gwidth.sort() besti, powers = tst.MeanEmbeddingTest.grid_search_gwidth(tr, T_randn, list_gwidth, alpha) best_width2 = list_gwidth[besti] met_grid = tst.MeanEmbeddingTest(T_randn, best_width2, alpha) met_grid_result = met_grid.perform_test(te) return { #'test_method': met_grid, 'test_result': met_grid_result, 'time_secs': t.secs}
def _ntied_transmat(self, transmat_val): # TODO: document choices # +-----------------+ # |a|1|0|0|0|0|0|0|0| # +-----------------+ # |0|a|1|0|0|0|0|0|0| # +-----------------+ # +---+---+---+ |0|0|a|b|0|0|c|0|0| # | a | b | c | +-----------------+ # +-----------+ |0|0|0|e|1|0|0|0|0| # | d | e | f | +----> +-----------------+ # +-----------+ |0|0|0|0|e|1|0|0|0| # | g | h | i | +-----------------+ # +---+---+---+ |d|0|0|0|0|e|f|0|0| # +-----------------+ # |0|0|0|0|0|0|i|1|0| # +-----------------+ # |0|0|0|0|0|0|0|i|1| # +-----------------+ # |g|0|0|h|0|0|0|0|i| # +-----------------+ # for a model with n_unique = 3 and n_tied = 2 transmat = np.empty((0, self.n_components)) for r in range(self.n_unique): row = np.empty((self.n_chain, 0)) for c in range(self.n_unique): if r == c: subm = np.array(sp.diags([transmat_val[r, c], 1 - transmat_val[r, c]], [0, 1], shape=(self.n_chain, self.n_chain)).todense()) else: lower_left = np.zeros((self.n_chain, self.n_chain)) lower_left[self.n_tied, 0] = 1.0 subm = np.kron(transmat_val[r, c], lower_left) row = np.hstack((row, subm)) transmat = np.vstack((transmat, row)) return transmat
def job_scf_gwgrid(sample_source, tr, te, r): rand_state = np.random.get_state() np.random.seed(r+92856) with util.ContextTimer() as t: d = tr.dim() T_randn = np.random.randn(J, d) np.random.set_state(rand_state) # grid search to determine the initial gwidth mean_sd = tr.mean_std() scales = 2.0**np.linspace(-4, 4, 20) list_gwidth = np.hstack( (mean_sd*scales*(d**0.5), 2**np.linspace(-8, 8, 20) )) list_gwidth.sort() besti, powers = tst.SmoothCFTest.grid_search_gwidth(tr, T_randn, list_gwidth, alpha) # initialize with the best width from the grid search best_width = list_gwidth[besti] scf_gwgrid = tst.SmoothCFTest(T_randn, best_width, alpha) test_result = scf_gwgrid.perform_test(te) result = {'test_method': scf_gwgrid, 'test_result': test_result, 'time_secs': t.secs} return result
def get_n_pqrsources(prob_label): """ Return (n, [ (param, P, Q, ds) for ...] = a sample size and a list of tuples, - n: a sample size recommended for the problem - param: a parameter being varied - P: a kmod.model.Model representing the model P (may depend on param) - Q: a kmod.model.Model representing the model Q (may depend on param) - ds: a DataSource. The DataSource generates sample from R (may depend on param). * (P, Q, ds) together specity a three-sample (or model comparison) problem. """ prob2tuples = { # p,q,r all standard normal in 1d. Mean shift problem. Unit variance. 'stdnorm_shift_d1': ( 300, [ ( mp, # p model.ComposedModel( p=density.IsotropicNormal(np.array([mp]), 1.0)), # q = N(0.5, 1). p is intially closer to r. Then moves further away. model.ComposedModel( p=density.IsotropicNormal(np.array([0.5]), 1.0)), # data generating distribution r = N(0, 1) data.DSIsotropicNormal(np.array([0.0]), 1.0), ) for mp in [0.4, 0.45, 0.55, 0.6, 0.7] ]), # p,q,r all standard normal in 10d. Mean shift problem. Unit variance. 'stdnorm_shift_d10': ( 300, [ ( mp, # p model.ComposedModel( p=density.IsotropicNormal(np.hstack((mp, [0.0] * 9)), 1.0) ), # q = N([0.5, ..], 1). p is intially closer to r. Then moves further away. model.ComposedModel( p=density.IsotropicNormal(np.hstack((0.5, [0.0] * 9)), 1.0) ), # data generating distribution r = N(0, 1) data.DSIsotropicNormal(np.zeros(10), 1.0), ) for mp in [0.4, 0.45, 0.55, 0.6, 0.7] ]), # p,q,r all standard normal in 10d. Mean shift problem. Unit variance. 'stdnorm_shift_d20': ( 500, [ ( mp, # p model.ComposedModel( p=density.IsotropicNormal(np.hstack((mp, [0.0] * 19)), 1.0) ), # q = N([0.5, ..], 1). p is intially closer to r. Then moves further away. model.ComposedModel( p=density.IsotropicNormal(np.hstack((0.5, [0.0] * 19)), 1.0) ), # data generating distribution r = N(0, 1) data.DSIsotropicNormal(np.zeros(20), 1.0), ) for mp in [0.4, 0.45, 0.55, 0.6, 0.7] ]), # A Gaussian-Bernoulli RBM problem. 'gbrbm_dx30_dh10': (800, [(purturb_p, ) + pqr_gbrbm_perturb(purturb_p, 0.3, dx=30, dh=10) for purturb_p in [0.1, 0.2, 0.4, 0.5]]), # A Gaussian-Bernoulli RBM problem. 'gbrbm_dx20_dh5': (2000, [(purturb_p, ) + pqr_gbrbm_perturb(purturb_p, 0.3, dx=20, dh=5) for purturb_p in [0.2, 0.25, 0.35, 0.4, 0.5, 0.6, 0.7]]), # A Gaussian-Bernoulli RBM problem. 'gbrbm_dx10_dh5': (600, [(purturb_p, ) + pqr_gbrbm_perturb(purturb_p, 0.3, dx=10, dh=5) for purturb_p in [0.1, 0.2, 0.25, 0.35, 0.4, 0.5, 0.6]]), } # end of prob2tuples if prob_label not in prob2tuples: raise ValueError('Unknown problem label. Need to be one of %s' % str(list(prob2tuples.keys()))) return prob2tuples[prob_label]
def generate_X_from_given_noise_and_given_z(genX_params, num_samples, noiseX, fake_Z, rs): noise_and_z = np.hstack([fake_Z, noiseX]) assert noise_and_z.shape[0] == num_samples samples = neural_net_predict_genX(genX_params, noise_and_z) return samples
state = env.reset() done = False fixed_point = anp.array([0., np.pi, 0., 0., 0.]) start = time.time() f0, A, B = env._linearize(fixed_point) # create cost matrices for LQR Q = np.array([ [1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 100], ]) R = np.array([[0.00001]]) # Compute gain matrix K K = lqr(A,B,Q,R) while not done: env.render() action = -np.matmul(K, state - fixed_point[:-1]) action = action[0] state, reward, done, info = env.step(action) f0, A, B = env._linearize(anp.hstack((state, action)))
def optimize_locs_widths( p, dat, gwidth0, test_locs0, reg=1e-2, max_iter=100, tol_fun=1e-5, disp=False, locs_bounds_frac=100, gwidth_lb=None, gwidth_ub=None, use_2terms=False, ): """ Optimize the test locations and the Gaussian kernel width by maximizing a test power criterion. data should not be the same data as used in the actual test (i.e., should be a held-out set). This function is deterministic. - data: a Data object - test_locs0: Jxd numpy array. Initial V. - reg: reg to add to the mean/sqrt(variance) criterion to become mean/sqrt(variance + reg) - gwidth0: initial value of the Gaussian width^2 - max_iter: #gradient descent iterations - tol_fun: termination tolerance of the objective value - disp: True to print convergence messages - locs_bounds_frac: When making box bounds for the test_locs, extend the box defined by coordinate-wise min-max by std of each coordinate multiplied by this number. - gwidth_lb: absolute lower bound on the Gaussian width^2 - gwidth_ub: absolute upper bound on the Gaussian width^2 - use_2terms: If True, then besides the signal-to-noise ratio criterion, the objective function will also include the first term that is dropped. #- If the lb, ub bounds are None, use fraction of the median heuristics # to automatically set the bounds. Return (V test_locs, gaussian width, optimization info log) """ J = test_locs0.shape[0] X = dat.data() n, d = X.shape # Parameterize the Gaussian width with its square root (then square later) # to automatically enforce the positivity. def obj(sqrt_gwidth, V): return -GaussFSSD.power_criterion( p, dat, sqrt_gwidth**2, V, reg=reg, use_2terms=use_2terms) flatten = lambda gwidth, V: np.hstack((gwidth, V.reshape(-1))) def unflatten(x): sqrt_gwidth = x[0] V = np.reshape(x[1:], (J, d)) return sqrt_gwidth, V def flat_obj(x): sqrt_gwidth, V = unflatten(x) return obj(sqrt_gwidth, V) # gradient # grad_obj = autograd.elementwise_grad(flat_obj) # Initial point x0 = flatten(np.sqrt(gwidth0), test_locs0) # make sure that the optimized gwidth is not too small or too large. fac_min = 1e-2 fac_max = 1e2 med2 = util.meddistance(X, subsample=1000)**2 if gwidth_lb is None: gwidth_lb = max(fac_min * med2, 1e-3) if gwidth_ub is None: gwidth_ub = min(fac_max * med2, 1e5) # Make a box to bound test locations X_std = np.std(X, axis=0) # X_min: length-d array X_min = np.min(X, axis=0) X_max = np.max(X, axis=0) # V_lb: J x d V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1)) V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1)) # (J*d+1) x 2. Take square root because we parameterize with the square # root x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1))) x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1))) x0_bounds = list(zip(x0_lb, x0_ub)) # optimize. Time the optimization as well. # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html grad_obj = autograd.elementwise_grad(flat_obj) with util.ContextTimer() as timer: opt_result = scipy.optimize.minimize( flat_obj, x0, method='L-BFGS-B', bounds=x0_bounds, tol=tol_fun, options={ 'maxiter': max_iter, 'ftol': tol_fun, 'disp': disp, 'gtol': 1.0e-07, }, jac=grad_obj, ) opt_result = dict(opt_result) opt_result['time_secs'] = timer.secs x_opt = opt_result['x'] sq_gw_opt, V_opt = unflatten(x_opt) gw_opt = sq_gw_opt**2 assert util.is_real_num( gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt) return V_opt, gw_opt, opt_result
def train(self, InpsAndTargsFunc, algorithm, monitor_training=0, **kwargs): '''Use this method to train the RNN using one of several training algorithms! Inputs: InpsAndTargsFunc: This should be a FUNCTION that randomly produces a training input and a target function Those should have individual inputs/targets as columns and be the first two outputs of this function. Should also take a 'dt' argument. algorithm: Which training algorithm would you like to use? Options are: 'full-FORCE': as seen in DePasquale 2017 'grad': gradient-based training using autograd (adam optimizer) monitor_training: Collect useful statistics and show at the end **kwargs: use to pass things to the InpsAndTargsFunc function Outputs: Nothing explicitly, but the weights of self.rnn_par are optimized to map the inputs to the targets ''' kwargs['algorithm'] = algorithm if algorithm == 'full-FORCE': '''Use this to train the network according to the full-FORCE algorithm, described in DePasquale 2017 This function uses a recursive least-squares algorithm to optimize the network. Note that after each batch, the function shows an example output as well as recurrent unit activity. Parameters: In self.p, the parameters starting with ff_ control this function. *****NOTE***** The function InpsAndTargsFunc must have a third output of "hints" for training. If you don't want to use hints, replace with a vector of zeros (Nx1) ''' ####################### TEMPORARY ####################### TEMPORARY thetas = [] ####################### TEMPORARY ####################### TEMPORARY # First, initialize some parameters p = self.p self.initialize_act() N = p['network_size'] self.rnn_par['rec_weights'] = np.zeros((N, N)) self.rnn_par['out_weights'] = np.zeros( (self.rnn_par['out_weights'].shape)) # Need to initialize a target-generating network, used for computing error: # First, take some example inputs, targets, and hints to get the right shape D_inps_and_targs = InpsAndTargsFunc(dt=p['dt'], **kwargs) D_num_inputs = D_inps_and_targs['inps'].shape[1] D_num_targs = D_inps_and_targs['targs'].shape[1] D_num_hints = D_inps_and_targs['hints'].shape[1] D_num_total_inps = D_num_inputs + D_num_targs + D_num_hints # Then instantiate the network and pull out some relevant weights DRNN = RNN(hyperparameters=self.p, num_inputs=D_num_total_inps, num_outputs=1) w_targ = np.transpose( DRNN.rnn_par['inp_weights'][D_num_inputs:(D_num_inputs + D_num_targs), :]) w_hint = np.transpose( DRNN.rnn_par['inp_weights'][(D_num_inputs + D_num_targs):D_num_total_inps, :]) Jd = np.transpose(DRNN.rnn_par['rec_weights']) ################### Monitor training with these variables: J_err_ratio = [] J_err_mag = [] J_norm = [] w_err_ratio = [] w_err_mag = [] w_norm = [] ################### # Let the networks settle from the initial conditions print('Initializing', end="") for i in range(p['ff_init_trials']): print('.', end="") inps_and_targs = InpsAndTargsFunc(dt=p['dt'], **kwargs) inp = inps_and_targs['inps'] targ = inps_and_targs['targs'] hints = inps_and_targs['hints'] D_total_inp = np.hstack((inp, targ, hints)) DRNN.run(D_total_inp) self.run(inp) print('') # Now begin training print('Training network...') # Initialize the inverse correlation matrix P = np.eye(N) / p['ff_alpha'] for batch in range(p['ff_num_batches']): print( 'Batch %g of %g, %g trials: ' % (batch + 1, p['ff_num_batches'], p['ff_trials_per_batch']), end="") for trial in range(p['ff_trials_per_batch']): if np.mod(trial, 50) == 0: print('') print('.', end="") # Create input, target, and hints. Combine for the driven network inps_and_targs = InpsAndTargsFunc( dt=p['dt'], **kwargs) # Get relevant time series inp = inps_and_targs['inps'] targ = inps_and_targs['targs'] hints = inps_and_targs['hints'] D_total_inp = np.hstack((inp, targ, hints)) # For recording: dx = [] # Driven network activity x = [] # RNN activity z = [] # RNN output for t in range(len(inp)): # Run both RNNs forward and get the activity. Record activity for potential plotting dx_t = DRNN.run(D_total_inp[t:(t + 1), :], record_flag=1)[1][:, 0:5] z_t, x_t = self.run(inp[t:(t + 1), :], record_flag=1) dx.append(np.squeeze(np.tanh(dx_t) + np.arange(5) * 2)) z.append(np.squeeze(z_t)) x.append( np.squeeze( np.tanh(x_t[:, 0:5]) + np.arange(5) * 2)) if npr.rand() < (1 / p['ff_steps_per_update']): # Extract relevant values r = np.transpose(np.tanh(self.act)) rd = np.transpose(np.tanh(DRNN.act)) J = np.transpose(self.rnn_par['rec_weights']) w = np.transpose(self.rnn_par['out_weights']) # Compute errors J_err = (np.dot(J, r) - np.dot(Jd, rd) - np.dot(w_targ, targ[t:(t + 1), :].T) - np.dot(w_hint, hints[t:(t + 1), :].T)) w_err = np.dot(w, r) - targ[t:(t + 1), :].T # Compute the gain (k) and running estimate of the inverse correlation matrix Pr = np.dot(P, r) k = np.transpose(Pr) / ( 1 + np.dot(np.transpose(r), Pr)) P = P - np.dot(Pr, k) # Update weights w = w - np.dot(w_err, k) J = J - np.dot(J_err, k) self.rnn_par['rec_weights'] = np.transpose(J) self.rnn_par['out_weights'] = np.transpose(w) if monitor_training == 1: J_err_plus = ( np.dot(J, r) - np.dot(Jd, rd) - np.dot(w_targ, targ[t:(t + 1), :].T) - np.dot(w_hint, hints[t:(t + 1), :].T)) J_err_ratio = np.hstack( (J_err_ratio, np.squeeze(np.mean(J_err_plus / J_err)))) J_err_mag = np.hstack( (J_err_mag, np.squeeze(np.linalg.norm(J_err)))) J_norm = np.hstack( (J_norm, np.squeeze(np.linalg.norm(J)))) w_err_plus = np.dot(w, r) - targ[t:(t + 1), :].T w_err_ratio = np.hstack( (w_err_ratio, np.squeeze(w_err_plus / w_err))) w_err_mag = np.hstack( (w_err_mag, np.squeeze(np.linalg.norm(w_err)))) w_norm = np.hstack( (w_norm, np.squeeze(np.linalg.norm(w)))) ####################### TEMPORARY ####################### TEMPORARY thetas.append(inps_and_targs['theta']) ####################### TEMPORARY ####################### TEMPORARY ########## Batch callback print('') # New line after each batch # Convert lists to arrays dx = np.array(dx) x = np.array(x) z = np.array(z) if batch == 0: # Set up plots training_fig = plt.figure() ax_unit = training_fig.add_subplot(2, 1, 1) ax_out = training_fig.add_subplot(2, 1, 2) tvec = np.expand_dims(np.arange(0, len(inp)) * p['dt'], axis=1) # Create output and target lines lines_targ_out = plt.Line2D(np.repeat(tvec, targ.shape[1], axis=1).T, targ.T, linestyle='--', color='r') lines_out = plt.Line2D(np.repeat(tvec, targ.shape[1], axis=1).T, z.T, color='b') ax_out.add_line(lines_targ_out) ax_out.add_line(lines_out) # Create recurrent unit and DRNN target lines lines_targ_unit = {} lines_unit = {} for i in range(5): lines_targ_unit['%g' % i] = plt.Line2D(tvec, dx[:, i], linestyle='--', color='r') lines_unit['%g' % i] = plt.Line2D(tvec, x[:, i], color='b') ax_unit.add_line(lines_targ_unit['%g' % i]) ax_unit.add_line(lines_unit['%g' % i]) # Set up the axes ax_out.set_xlim([0, p['dt'] * len(inp)]) ax_unit.set_xlim([0, p['dt'] * len(inp)]) ax_out.set_ylim([-1.2, 1.2]) ax_unit.set_ylim([-2, 10]) ax_out.set_title('Output') ax_unit.set_title('Recurrent units, batch %g' % (batch + 1)) # Labels ax_out.set_xlabel('Time (s)') ax_out.legend([lines_targ_out, lines_out], ['Target', 'RNN'], loc=1) else: # Update the plot tvec = np.expand_dims(np.arange(0, len(inp)) * p['dt'], axis=1) ax_out.set_xlim([0, p['dt'] * len(inp)]) ax_unit.set_xlim([0, p['dt'] * len(inp)]) ax_unit.set_title('Recurrent units, batch %g' % (batch + 1)) lines_targ_out.set_xdata( np.repeat(tvec, targ.shape[1], axis=1).T) lines_targ_out.set_ydata(targ.T) lines_out.set_xdata( np.repeat(tvec, targ.shape[1], axis=1).T) lines_out.set_ydata(z.T) for i in range(5): lines_targ_unit['%g' % i].set_xdata(tvec) lines_targ_unit['%g' % i].set_ydata(dx[:, i]) lines_unit['%g' % i].set_xdata(tvec) lines_unit['%g' % i].set_ydata(x[:, i]) training_fig.canvas.draw() if monitor_training == 1: # Now for some visualization to see how things went: stats_fig = plt.figure(figsize=(8, 10)) plt.subplot(3, 2, 1) plt.title('Recurrent learning error ratio') plt.plot(J_err_ratio) plt.subplot(3, 2, 3) plt.title('Recurrent error magnitude') plt.plot(J_err_mag) plt.subplot(3, 2, 5) plt.title('Recurrent weights norm') plt.plot(J_norm) plt.subplot(3, 2, 2) plt.plot(w_err_ratio) plt.title('Output learning error ratio') plt.subplot(3, 2, 4) plt.plot(w_err_mag) plt.title('Output error magnitude') plt.subplot(3, 2, 6) plt.plot(w_norm) plt.title('Output weights norm') stats_fig.canvas.draw() print('Done training!') ####################### TEMPORARY ####################### TEMPORARY return thetas ####################### TEMPORARY ####################### TEMPORARY elif algorithm == 'grad': ''' Use this setting to train with a gradient-based optimization (in this case, the adam optimizer). Parameters: In self.p, the parameters starting with grad_ control this function. ''' # First, define the training loss function def training_loss(x, iteration, myparams=kwargs, showplot=0): error = 0 self.rnn_par = x batch_size = self.p['grad_batch_size'] for i in range(batch_size): self.initialize_act() inps_and_targs = InpsAndTargsFunc(dt=self.p['dt'], **myparams) inputs = inps_and_targs['inps'] target = inps_and_targs['targs'] targ_idx = inps_and_targs['targ_idx'] outputs = self.run(inputs)[0] error += np.sum( (outputs[targ_idx, :] - target[targ_idx, :])** 2) / target[targ_idx, 0].size error = error / batch_size if showplot: fig = plt.gcf() fig.add_subplot(1, 2, 2) plt.cla() plt.plot(target, 'r--') plt.plot(outputs, 'b') fig.canvas.draw() return error # Function from David Sussillo that allows for better monitoring and interfacing def myadam(grad, init_params, callback=None, num_iters=100, step_sizes=0.001, b1=0.9, b2=0.999, eps=10**-8, gnorm_max=np.inf, last_m=None, last_v=None, last_i=0, lossfun=[], printstuff=0): """Adam as described in http://arxiv.org/pdf/1412.6980.pdf. It's basically RMSprop with momentum and some correction terms.""" flattened_grad, unflatten, x = autograd.util.flatten_func( grad, init_params) if type(step_sizes) == float or type(step_sizes) == int: step_sizes = step_sizes * np.ones(num_iters) else: assert len(step_sizes) == num_iters m = np.zeros(len(x)) if last_m is None else last_m v = np.zeros(len(x)) if last_v is None else last_v for i in range(num_iters): g = flattened_grad(x, i) gnorm = np.linalg.norm(g) if gnorm > gnorm_max: if printstuff: print(" Gradient norm was: %0.4f" % gnorm) g = g * gnorm_max / gnorm gnorm = np.linalg.norm(g) if printstuff: print(" Gradient norm: %0.4f" % gnorm) print(" Step size: %0.4f" % step_sizes[i]) if callback: callback(unflatten(x), i, unflatten(g), lossfun=lossfun) m = (1 - b1) * g + b1 * m # First moment estimate. v = (1 - b2) * (g**2) + b2 * v # Second moment estimate. mhat = m / (1 - b1**(i + last_i + 1)) # Bias correction. vhat = v / (1 - b2**(i + last_i + 1)) x = x - step_sizes[i] * mhat / (np.sqrt(vhat) + eps) return unflatten(x), (m, v, i + last_i) def callback(weights, iteration, gradient, total_loss=[], lossfun=[]): loss = (lossfun(weights, 0, showplot=1)) total_loss.append(loss) if iteration > 0: fig = plt.gcf() fig.add_subplot(1, 2, 1) plt.semilogy([iteration - 1, iteration], [total_loss[-2], total_loss[-1]], 'b-') fig.canvas.draw() plt.title('Iteration %d' % (iteration + 1)) plt.ylabel('Training loss') plt.xlabel('Iteration') def make_step_sizes(): init_stepsize = self.p['grad_init_stepsize'] decay_factor = self.p['grad_stepsize_decay'] num_iters = self.p['grad_num_iters'] step_sizes = init_stepsize * decay_factor**np.ones( (num_iters)) #np.arange(num_iters) return step_sizes plt.figure() x = self.rnn_par loss_grad = grad(training_loss) x_fin = myadam(loss_grad, x, callback=callback, num_iters=self.p['grad_num_iters'], step_sizes=make_step_sizes(), lossfun=training_loss, gnorm_max=self.p['grad_norm_clip'])[0]
action = np.zeros((dm_act, nb_steps)) init_action = np.zeros((dm_act, horizon)) state[:, 0] = env.reset() for t in range(nb_steps): solver = iLQR(env, init_state=state[:, t], init_action=None, nb_steps=horizon) trace = solver.run(nb_iter=10, verbose=False) _nominal_action = solver.uref action[:, t] = _nominal_action[:, 0] state[:, t + 1], _, _, _ = env.step(action[:, t]) init_action = np.hstack((_nominal_action[:, 1:], np.zeros((dm_act, 1)))) print('Time Step:', t, 'Cost:', trace[-1]) import matplotlib.pyplot as plt plt.figure() plt.subplot(7, 1, 1) plt.plot(state[0, :], '-b') plt.subplot(7, 1, 2) plt.plot(state[1, :], '-b') plt.subplot(7, 1, 3) plt.plot(state[2, :], '-b') plt.subplot(7, 1, 4) plt.plot(state[3, :], '-r')
def m_step(self, expectations, datas, inputs, masks, tags, **kwargs): from sklearn.linear_model import LinearRegression D, M = self.D, self.M for d in range(self.D): # Collect data for this dimension xs, ys, weights = [], [], [] for (Ez, _, _), data, input, mask in zip(expectations, datas, inputs, masks): # Only use data if it is complete if np.all(mask[:, d]): xs.append( np.hstack([ data[self.lags - l - 1:-l - 1, d:d + 1] for l in range(self.lags) ] + [ input[self.lags:, :M], np.ones((data.shape[0] - self.lags, 1)) ])) ys.append(data[self.lags:, d]) weights.append(Ez[self.lags:]) xs = np.concatenate(xs) ys = np.concatenate(ys) weights = np.concatenate(weights) # If there was no data for this dimension then skip it if len(xs) == 0: self.As[:, d, :] = 0 self.Vs[:, d, :] = 0 self.bs[:, d] = 0 continue # Otherwise, fit a weighted linear regression for each discrete state for k in range(self.K): # Check for zero weights (singular matrix) if np.sum(weights[:, k]) < self.lags + M + 1: self.As[k, d] = 1.0 self.Vs[k, d] = 0 self.bs[k, d] = 0 self.inv_sigmas[k, d] = 0 continue # Solve for the most likely A,V,b (no prior) Jk = np.sum(weights[:, k][:, None, None] * xs[:, :, None] * xs[:, None, :], axis=0) hk = np.sum(weights[:, k][:, None] * xs * ys[:, None], axis=0) muk = np.linalg.solve(Jk, hk) self.As[k, d] = muk[:self.lags] self.Vs[k, d] = muk[self.lags:self.lags + M] self.bs[k, d] = muk[-1] # Update the variances yhats = xs.dot( np.concatenate( (self.As[k, d], self.Vs[k, d], [self.bs[k, d]]))) sqerr = (ys - yhats)**2 sigma = np.average(sqerr, weights=weights[:, k], axis=0) + 1e-16 self.inv_sigmas[k, d] = np.log(sigma)
def _m_step_ar(self, expectations, datas, inputs, masks, tags, num_em_iters): K, D, M, lags = self.K, self.D, self.M, self.lags # Collect data for this dimension xs, ys, Ezs = [], [], [] for (Ez, _, _), data, input, mask, tag in zip(expectations, datas, inputs, masks, tags): # Only use data if it is complete if not np.all(mask): raise Exception( "Encountered missing data in AutoRegressiveObservations!") xs.append( np.hstack( [data[self.lags - l - 1:-l - 1] for l in range(self.lags)] + [ input[self.lags:, :self.M], np.ones((data.shape[0] - self.lags, 1)) ])) ys.append(data[self.lags:]) Ezs.append(Ez[self.lags:]) for itr in range(num_em_iters): # Compute expected precision for each data point given current parameters taus = [] for x, y in zip(xs, ys): # mus = self._compute_mus(data, input, mask, tag) # sigmas = self._compute_sigmas(data, input, mask, tag) Afull = np.concatenate((self.As, self.Vs, self.bs[:, :, None]), axis=2) mus = np.matmul(Afull[None, :, :, :], x[:, None, :, None])[:, :, :, 0] sigmas = np.exp(self.inv_sigmas) # nu: (K,) mus: (T, K, D) sigmas: (K, D) y: (T, D) -> tau: (T, K, D) alpha = np.exp(self.inv_nus[:, None]) / 2 + 1 / 2 beta = np.exp(self.inv_nus[:, None]) / 2 + 1 / 2 * ( y[:, None, :] - mus)**2 / sigmas taus.append(alpha / beta) # Fit the weighted linear regressions for each K and D J = np.tile( np.eye(D * lags + M + 1)[None, None, :, :], (K, D, 1, 1)) h = np.zeros(( K, D, D * lags + M + 1, )) for x, y, Ez, tau in zip(xs, ys, Ezs, taus): robust_ar_statistics(Ez, tau, x, y, J, h) mus = np.linalg.solve(J, h) self.As = mus[:, :, :D * lags] self.Vs = mus[:, :, D * lags:D * lags + M] self.bs = mus[:, :, -1] # Fit the variance sqerr = 0 weight = 0 for x, y, Ez, tau in zip(xs, ys, Ezs, taus): yhat = np.matmul(x[None, :, :], np.swapaxes(mus, -1, -2)) sqerr += np.einsum('tk, tkd, ktd -> kd', Ez, tau, (y - yhat)**2) weight += np.sum(Ez, axis=0) self.inv_sigmas = np.log(sqerr / weight[:, None] + 1e-16)
def draw(self): # Draw the airplane in a new window. # Using PyVista Polydata format vertices = np.empty((0, 3)) faces = np.empty((0)) for wing in self.wings: wing_vertices = np.empty((0, 3)) wing_tri_faces = np.empty((0, 4)) wing_quad_faces = np.empty((0, 5)) for i in range(len(wing.xsecs) - 1): is_last_section = i == len(wing.xsecs) - 2 le_start = wing.xsecs[i].xyz_le + wing.xyz_le te_start = wing.xsecs[i].xyz_te() + wing.xyz_le wing_vertices = np.vstack((wing_vertices, le_start, te_start)) wing_quad_faces = np.vstack( (wing_quad_faces, np.expand_dims( np.array( [4, 2 * i + 0, 2 * i + 1, 2 * i + 3, 2 * i + 2]), 0))) if is_last_section: le_end = wing.xsecs[i + 1].xyz_le + wing.xyz_le te_end = wing.xsecs[i + 1].xyz_te() + wing.xyz_le wing_vertices = np.vstack((wing_vertices, le_end, te_end)) vertices_starting_index = len(vertices) wing_quad_faces_reformatted = np.ndarray.copy(wing_quad_faces) wing_quad_faces_reformatted[:, 1:] = wing_quad_faces[:, 1:] + vertices_starting_index wing_quad_faces_reformatted = np.reshape( wing_quad_faces_reformatted, (-1), order='C') vertices = np.vstack((vertices, wing_vertices)) faces = np.hstack((faces, wing_quad_faces_reformatted)) if wing.symmetric: vertices_starting_index = len(vertices) wing_vertices = reflect_over_XZ_plane(wing_vertices) wing_quad_faces_reformatted = np.ndarray.copy(wing_quad_faces) wing_quad_faces_reformatted[:, 1:] = wing_quad_faces[:, 1:] + vertices_starting_index wing_quad_faces_reformatted = np.reshape( wing_quad_faces_reformatted, (-1), order='C') vertices = np.vstack((vertices, wing_vertices)) faces = np.hstack((faces, wing_quad_faces_reformatted)) plotter = pv.Plotter() wing_surfaces = pv.PolyData(vertices, faces) plotter.add_mesh(wing_surfaces, color='#7EFC8F', show_edges=True, smooth_shading=True) xyz_ref = pv.PolyData(self.xyz_ref) plotter.add_points(xyz_ref, color='#50C7C7', point_size=10) plotter.show_grid(color='#444444') plotter.set_background(color="black") plotter.show(cpos=(-1, -1, 1), full_screen=False)
def get_pqsource(prob_label): """ Return (p, ds), a tuple of - p: a Density representing the distribution p - ds: a DataSource, each corresponding to one parameter setting. The DataSource generates sample from q. """ prob2tuples = { # H0 is true. vary d. P = Q = N(0, I) "sg5": ( density.IsotropicNormal(np.zeros(5), 1), data.DSIsotropicNormal(np.zeros(5), 1), ), # P = N(0, I), Q = N( (0.2,..0), I) "gmd5": ( density.IsotropicNormal(np.zeros(5), 1), data.DSIsotropicNormal(np.hstack((0.2, np.zeros(4))), 1), ), "gmd1": ( density.IsotropicNormal(np.zeros(1), 1), data.DSIsotropicNormal(np.ones(1) * 0.2, 1), ), # P = N(0, I), Q = N( (1,..0), I) "gmd100": ( density.IsotropicNormal(np.zeros(100), 1), data.DSIsotropicNormal(np.hstack((1, np.zeros(99))), 1), ), # Gaussian variance difference problem. Only the variance # of the first dimenion differs. d varies. "gvd5": ( density.Normal(np.zeros(5), np.eye(5)), data.DSNormal(np.zeros(5), np.diag(np.hstack((2, np.ones(4))))), ), "gvd10": ( density.Normal(np.zeros(10), np.eye(10)), data.DSNormal(np.zeros(10), np.diag(np.hstack((2, np.ones(9))))), ), # Gaussian Bernoulli RBM. dx=50, dh=10. H0 is true "gbrbm_dx50_dh10_v0": gaussbern_rbm_tuple(0, dx=50, dh=10, n=sample_size), # Gaussian Bernoulli RBM. dx=5, dh=3. H0 is true "gbrbm_dx5_dh3_v0": gaussbern_rbm_tuple(0, dx=5, dh=3, n=sample_size), # Gaussian Bernoulli RBM. dx=50, dh=10. "gbrbm_dx50_dh10_v1em3": gaussbern_rbm_tuple(1e-3, dx=50, dh=10, n=sample_size), # Gaussian Bernoulli RBM. dx=5, dh=3. Perturb with noise = 1e-2. "gbrbm_dx5_dh3_v5em3": gaussbern_rbm_tuple(5e-3, dx=5, dh=3, n=sample_size), # Gaussian mixture of two components. Uniform mixture weights. # p = 0.5*N(0, 1) + 0.5*N(3, 0.01) # q = 0.5*N(-3, 0.01) + 0.5*N(0, 1) "gmm_d1": ( density.IsoGaussianMixture(np.array([[0], [3.0]]), np.array([1, 0.01])), data.DSIsoGaussianMixture(np.array([[-3.0], [0]]), np.array([0.01, 1])), ), # p = N(0, 1) # q = 0.1*N([-10, 0,..0], 0.001) + 0.9*N([0,0,..0], 1) "g_vs_gmm_d5": ( density.IsotropicNormal(np.zeros(5), 1), data.DSIsoGaussianMixture( np.vstack((np.hstack((0.0, np.zeros(4))), np.zeros(5))), np.array([0.0001, 1]), pmix=[0.1, 0.9], ), ), "g_vs_gmm_d2": ( density.IsotropicNormal(np.zeros(2), 1), data.DSIsoGaussianMixture( np.vstack((np.hstack((0.0, np.zeros(1))), np.zeros(2))), np.array([0.01, 1]), pmix=[0.1, 0.9], ), ), "g_vs_gmm_d1": ( density.IsotropicNormal(np.zeros(1), 1), data.DSIsoGaussianMixture(np.array([[0.0], [0]]), np.array([0.01, 1]), pmix=[0.1, 0.9]), ), } if prob_label not in prob2tuples: raise ValueError("Unknown problem label. Need to be one of %s" % str(prob2tuples.keys())) return prob2tuples[prob_label]
def minConf_PQN(funObj, x, funProj, options=None): """ The problems are of the form min funObj(x) s.t. x in C The projected quasi-Newton sub-problems are solved using the spectral projected gradient algorithm Parameters ---------- funObj: function to minimize, return objective value as the first argument and gradient as the second argument funProj: function that returns projection of x onto C options: 1) verbose: level of verbosity (0: no output, 1: final, 2: iter (default), 3: debug) 2) optTol: tolerance used to check for optimality (default: 1e-5) 3) progTol: tolerance used to check for progress (default: 1e-9) 4) maxIter: maximum number of calls to funObj (default: 500) 5) maxProject: maximum number of calls to funProj (default: 100000) 6) numDiff: compute derivatives numerically (0: use user-supplied derivatives (default), 1: use finite differences, 2: use complex differentials) 7) suffDec: sufficient decrease parameter in Armijo condition (default: 1e-4) 8) corrections: number of lbfgs corrections to store (default: 10) 9) adjustStep: use quadratic initialization of line search (default: 0) 10) bbInit: initialize sub-problem with Barzilai-Borwein step (default: 0) 11) SPGoptTol: optimality tolerance for SPG direction finding (default: 1e-6) 12) SPGiters: maximum number of iterations for SPG direction finding (default: 10) Returns ------- x: optimal parameter values f: optimal objective value funEvals: number of function evaluations """ # number of variables/parameters nVars = len(x) # set default optimization settings options_default = {'verbose':2, 'numDiff':0, 'optTol':1e-5, 'progTol':1e-9, \ 'maxIter':500, 'maxProject':100000, 'suffDec':1e-4, \ 'corrections':10, 'adjustStep':0, 'bbInit':0, 'SPGoptTol':1e-6,\ 'SPGprogTol':1e-10, 'SPGiters':10, 'SPGtestOpt':0} options = setDefaultOptions(options, options_default) if options['verbose'] == 3: print 'Running PQN...' print 'Number of L-BFGS Corrections to store: ' + \ str(options['corrections']) print 'Spectral initialization of SPG: ' + str(options['bbInit']) print 'Maximum number of SPG iterations: ' + str(options['SPGiters']) print 'SPG optimality tolerance: ' + str(options['SPGoptTol']) print 'SPG progress tolerance: ' + str(options['SPGprogTol']) print 'PQN optimality tolerance: ' + str(options['optTol']) print 'PQN progress tolerance: ' + str(options['progTol']) print 'Quadratic initialization of line search: ' + \ str(options['adjustStep']) print 'Maximum number of function evaluations: ' + \ str(options['maxIter']) print 'Maximum number of projections: ' + str(options['maxProject']) if options['verbose'] >= 2: print '{:10s}'.format('Iteration') + \ '{:10s}'.format('FunEvals') + \ '{:10s}'.format('Projections') + \ '{:15s}'.format('StepLength') + \ '{:15s}'.format('FunctionVal') + \ '{:15s}'.format('OptCond') funEvalMultiplier = 1 # project initial parameter vector # translate this function (Done!) x = funProj(x) projects = 1 # evaluate initial parameters # translate this function (Done!) [f, g] = funObj(x) funEvals = 1 # check optimality of initial point projects = projects + 1 if np.max(np.abs(funProj(x-g)-x)) < options['optTol']: if options['verbose'] >= 1: print "First-Order Optimality Conditions Below optTol at Initial Point" return (x, f, funEvals) i = 1 while funEvals <= options['maxIter']: # compute step direction # this is for initialization if i == 1: p = funProj(x-g) projects = projects + 1 S = np.zeros((nVars, 0)) Y = np.zeros((nVars, 0)) Hdiag = 1 else: y = g - g_old s = x - x_old # translate this function (Done!) [S, Y, Hdiag] = lbfgsUpdate(y, s, options['corrections'], \ options['verbose']==3, S, Y, Hdiag) # make compact representation k = Y.shape[1] L = np.zeros((k,k)) for j in range(k): L[j+1:,j] = np.dot(np.transpose(S[:,j+1:]), Y[:,j]) N = np.hstack((S/Hdiag, Y.reshape(Y.shape[0], Y.size/Y.shape[0]))) M1 = np.hstack((np.dot(S.T,S)/Hdiag, L)) M2 = np.hstack((L.T, -np.diag(np.diag(np.dot(S.T,Y))))) M = np.vstack((M1, M2)) # translate this function (Done!) HvFunc = lambda v: v/Hdiag - np.dot(N,np.linalg.solve(M,np.dot(N.T,v))) if options['bbInit'] == True: # use Barzilai-Borwein step to initialize sub-problem alpha = np.dot(s,s)/np.dot(s,y) if alpha <= 1e-10 or alpha > 1e10: alpha = min(1., 1./np.sum(np.abs(g))) # solve sub-problem xSubInit = x - alpha*g feasibleInit = 0 else: xSubInit = x feasibleInit = 1 # solve Sub-problem # translate this function (Done!) [p, subProjects] = solveSubProblem(x, g, HvFunc, funProj, \ options['SPGoptTol'], options['SPGprogTol'], \ options['SPGiters'], options['SPGtestOpt'], feasibleInit,\ xSubInit) projects = projects + subProjects d = p - x g_old = g x_old = x # check the progress can be made along the direction gtd = np.dot(g,d) if gtd > -options['progTol']: if options['verbose'] >= 1: print "Directional Derivative below progTol" break # select initial guess to step length if i == 1 or options['adjustStep'] == 0: t = 1. else: t = min(1., 2.*(f-f_old)/gtd) # bound step length on first iteration if i == 1: t = min(1., 1./np.sum(np.abs(g))) # evluate the objective and gradient at the initial step if t == 1: x_new = p else: x_new = x + t*d [f_new, g_new] = funObj(x_new) funEvals = funEvals + 1 # backtracking line search f_old = f # translate isLegal (Done!) while f_new > f + options['suffDec']*np.dot(g,x_new-x) or \ not isLegal(f_new): temp = t # backtrack to next trial value if not isLegal(f_new) or not isLegal(g_new): if options['verbose'] == 3: print "Halving step size" t = t/2. else: if options['verbose'] == 3: print "Cubic backtracking" # translate polyinterp (Done!) t = polyinterp(np.array([[0.,f,gtd],\ [t,f_new,np.dot(g_new,d)]]))[0] # adjust if change is too small/large if t < temp*1e-3: if options['verbose'] == 3: print "Interpolated value too small, Adjusting" t = temp*1e-3 elif t > temp*0.6: if options['verbose'] == 3: print "Interpolated value too large, Adjusting" t = temp*0.6 # check whether step has become too small if np.sum(np.abs(t*d)) < options['progTol'] or t == 0: if options['verbose'] == 3: print "Line search failed" t = 0 f_new = f g_new = g break # evaluate new point f_prev = f_new t_prev = temp x_new = x + t*d [f_new, g_new] = funObj(x_new) funEvals = funEvals + 1 # take step x = x_new f = f_new g = g_new optCond = np.max(np.abs(funProj(x-g)-x)) projects = projects + 1 # output log if options['verbose'] >= 2: print '{:10d}'.format(i) + \ '{:10d}'.format(funEvals*funEvalMultiplier) + \ '{:10d}'.format(projects) + \ '{:15.5e}'.format(t) + \ '{:15.5e}'.format(f) + \ '{:15.5e}'.format(optCond) # check optimality if optCond < options['optTol']: print "First-order optimality conditions below optTol" break if np.max(np.abs(t*d)) < options['progTol']: if options['verbose'] >= 1: print "Step size below progTol" break if np.abs(f-f_old) < options['progTol']: if options['verbose'] >= 1: print "Function value changing by less than progTol" break if funEvals > options['maxIter']: if options['verbose'] >= 1: print "Function evaluation exceeds maxIter" break if projects > options['maxProject']: if options['verbose'] >= 1: print "Number of projections exceeds maxProject" break i = i + 1 return (x, f, funEvals)
def genConstraints(prng, label, alpha, beta, num_ML, num_CL, start_expert = 0, \ flag_same=False): """ This function generates pairwise constraints (ML/CL) using groud-truth cluster label and noise parameters Parameters ---------- label: shape(n_sample, ) cluster label of all the samples alpha: shape(n_expert, ) sensitivity parameters of experts beta: shape(n_expert, ) specificity parameters of experts num_ML: int num_CL: int flag_same: True if different experts provide constraints for the same set of sample pairs, False if different experts provide constraints for different set of sample pairs Returns ------- S: shape(n_con, 4) The first column -> expert id The second and third column -> (row, column) indices of two samples The fourth column -> constraint values (1 for ML and 0 for CL) """ n_sample = len(label) tp = np.tile(label, (n_sample,1)) label_mat = (tp == tp.T).astype(int) ML_set = [] CL_set = [] # get indices of upper-triangle matrix [row, col] = np.triu_indices(n_sample, k=1) # n_sample * (n_sample-1)/2 for idx in range(len(row)): if label_mat[row[idx],col[idx]] == 1: ML_set.append([row[idx], col[idx]]) elif label_mat[row[idx],col[idx]] == 0: CL_set.append([row[idx], col[idx]]) else: print "Invalid matrix entry values" ML_set = np.array(ML_set) CL_set = np.array(CL_set) assert num_ML < ML_set.shape[0] assert num_CL < CL_set.shape[0] # generate noisy constraints for each expert assert len(alpha) == len(beta) n_expert = len(alpha) # initialize the constraint matrix S = np.zeros((0, 4)) # different experts provide constraint for the same set of sample pairs if flag_same == True: idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False) idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False) ML = ML_set[idx_ML, :] CL = CL_set[idx_CL, :] for m in range(n_expert): val_ML = prng.binomial(1, alpha[m], num_ML) val_CL = prng.binomial(1, 1-beta[m], num_CL) Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \ val_ML.reshape(val_ML.size,1) )) Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \ val_CL.reshape(val_CL.size,1) )) S = np.vstack((S, Sm_ML, Sm_CL)).astype(int) # different experts provide constraints for different sets of sample pairs else: for m in range(n_expert): prng = np.random.RandomState(1000 + m) idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False) idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False) ML = ML_set[idx_ML, :] CL = CL_set[idx_CL, :] val_ML = prng.binomial(1, alpha[m], num_ML) val_CL = prng.binomial(1, 1-beta[m], num_CL) Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \ val_ML.reshape(val_ML.size,1) )) Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \ val_CL.reshape(val_CL.size,1) )) S = np.vstack((S, Sm_ML, Sm_CL)).astype(int) return S
def calculate_A_matrix_autograd(x1, y1, x2, y2, x3, y3, x4, y4, x5, y5, P, normalize=False): """ Calculate the A matrix for the DLT algorithm: A.H = 0 all coordinates are in object plane """ X1 = np.array([[x1], [y1], [0.], [1.]]).reshape(4, 1) X2 = np.array([[x2], [y2], [0.], [1.]]).reshape(4, 1) X3 = np.array([[x3], [y3], [0.], [1.]]).reshape(4, 1) X4 = np.array([[x4], [y4], [0.], [1.]]).reshape(4, 1) X5 = np.array([[x5], [y5], [0.], [1.]]).reshape(4, 1) U1 = np.array(np.dot(P, X1)).reshape(3, 1) U2 = np.array(np.dot(P, X2)).reshape(3, 1) U3 = np.array(np.dot(P, X3)).reshape(3, 1) U4 = np.array(np.dot(P, X4)).reshape(3, 1) U5 = np.array(np.dot(P, X5)).reshape(3, 1) object_pts = np.hstack([X1, X2, X3, X4, X5]) image_pts = np.hstack([U1, U2, U3, U4, U5]) if normalize: object_pts_norm, T1 = normalise_points(object_pts) image_pts_norm, T2 = normalise_points(image_pts) else: object_pts_norm = object_pts[[0, 1, 3], :] image_pts_norm = image_pts x1 = object_pts_norm[0, 0] / object_pts_norm[2, 0] y1 = object_pts_norm[1, 0] / object_pts_norm[2, 0] x2 = object_pts_norm[0, 1] / object_pts_norm[2, 1] y2 = object_pts_norm[1, 1] / object_pts_norm[2, 1] x3 = object_pts_norm[0, 2] / object_pts_norm[2, 2] y3 = object_pts_norm[1, 2] / object_pts_norm[2, 2] x4 = object_pts_norm[0, 3] / object_pts_norm[2, 3] y4 = object_pts_norm[1, 3] / object_pts_norm[2, 3] x5 = object_pts_norm[0, 4] / object_pts_norm[2, 4] y5 = object_pts_norm[1, 4] / object_pts_norm[2, 4] u1 = image_pts_norm[0, 0] / image_pts_norm[2, 0] v1 = image_pts_norm[1, 0] / image_pts_norm[2, 0] u2 = image_pts_norm[0, 1] / image_pts_norm[2, 1] v2 = image_pts_norm[1, 1] / image_pts_norm[2, 1] u3 = image_pts_norm[0, 2] / image_pts_norm[2, 2] v3 = image_pts_norm[1, 2] / image_pts_norm[2, 2] u4 = image_pts_norm[0, 3] / image_pts_norm[2, 3] v4 = image_pts_norm[1, 3] / image_pts_norm[2, 3] u5 = image_pts_norm[0, 4] / image_pts_norm[2, 4] v5 = image_pts_norm[1, 4] / image_pts_norm[2, 4] A = np.array([ [0, 0, 0, -x1, -y1, -1, v1 * x1, v1 * y1, v1], [x1, y1, 1, 0, 0, 0, -u1 * x1, -u1 * y1, -u1], [0, 0, 0, -x2, -y2, -1, v2 * x2, v2 * y2, v2], [x2, y2, 1, 0, 0, 0, -u2 * x2, -u2 * y2, -u2], [0, 0, 0, -x3, -y3, -1, v3 * x3, v3 * y3, v3], [x3, y3, 1, 0, 0, 0, -u3 * x3, -u3 * y3, -u3], [0, 0, 0, -x4, -y4, -1, v4 * x4, v4 * y4, v4], [x4, y4, 1, 0, 0, 0, -u4 * x4, -u4 * y4, -u4], [0, 0, 0, -x5, -y5, -1, v5 * x5, v5 * y5, v5], [x5, y5, 1, 0, 0, 0, -u5 * x5, -u5 * y5, -u5], ]) return A
def fit(self, X, y): self.X_train_ = X self.y_train_ = y if self.x_covariance is None: self.x_covariance = 0.0 * self.X_train_.shape[1] if np.ndim(self.x_covariance) == 1: self.x_covariance = np.array([self.x_covariance]) # initial hyper-parameters theta0 = self.init_theta() # Calculate the initial weights self.derivative = np.ones(self.X_train_.shape) print(self.derivative[:10, :10]) # minimize the objective function optima = [ minimize(value_and_grad(self.log_marginal_likelihood), theta0, jac=True, method='L-BFGS-B') ] fig, ax = plt.subplots() ax.scatter(self.X_train_, self.derivative) if self.n_ters is not None: for iteration in range(self.n_ters): print(theta0) # Find the minimum iparams = minimize(value_and_grad( self.log_marginal_likelihood), theta0, jac=True, method='L-BFGS-B') print(iparams) # extract best values signal_variance, noise_likelihood, length_scale = \ self._get_kernel_params(iparams.x) # Recalculate the derivative K = self.rbf_covariance( self.X_train_, length_scale=np.exp(length_scale), signal_variance=np.exp(signal_variance)) K += np.exp(noise_likelihood) * np.eye(K.shape[0]) L = np.linalg.cholesky(K + self.jitter * np.eye(K.shape[0])) iweights = np.linalg.solve(L.T, np.linalg.solve(L, self.y_train_)) self.derivative = self.weights_grad(self.X_train_, iweights, np.exp(length_scale), np.exp(signal_variance)) print(self.derivative[:10, :10]) ax.scatter(self.X_train_, self.derivative) # make a new theta theta0 = np.hstack( [signal_variance, noise_likelihood, length_scale]) plt.show() print() print(optima) lml_values = list(map(itemgetter(1), optima)) best_params = optima[np.argmin(lml_values)][0] print(best_params) # Gather hyper parameters signal_variance, noise_likelihood, length_scale = \ self._get_kernel_params(best_params) self.signal_variance = np.exp(signal_variance) self.noise_likelihood = np.exp(noise_likelihood) self.length_scale = np.exp(length_scale) # Calculate the weights K = self.rbf_covariance(X, length_scale=self.length_scale, signal_variance=self.signal_variance) K += self.noise_likelihood * np.eye(K.shape[0]) L = np.linalg.cholesky(K + self.jitter * np.eye(K.shape[0])) weights = np.linalg.solve(L.T, np.linalg.solve(L, y)) self.weights = weights self.L = L self.K = K L_inv = solve_triangular(self.L.T, np.eye(self.L.shape[0])) self.K_inv = np.dot(L_inv, L_inv.T) return self
def shift_Xy_to_matrices(shift_X, shift_y=None, weights=False): flatten = lambda ls: [l_i for l in ls for l_i in l] source_X_l = [] target_X_l = [] source_y_l = [] try: for (source_X_elt, target_X_elt, source_y_elt) in zip(*zip(*shift_X)[0:3]): if len(target_X_elt) > 0: target_X_l.append(target_X_elt) source_X_l.append(source_X_elt) source_y_l.append(source_y_elt) except: pdb.set_trace() if shift_y is None: if not weights: return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l) else: assert len(iter(shift_X).next()) == 4 return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l), np.array([shift_X_elt[-1] for shift_X_elt in shift_X]) else: target_y_l = [] for (source_X_elt, target_X_elt, source_y_elt), target_y_elt in itertools.izip(zip(*zip(*shift_X)[0:3]), shift_y): if len(target_X_elt) > 0: target_y_l.append(target_y_elt) if not weights: return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l), np.hstack(tuple(flatten(target_y_l))) else: return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l), np.hstack(tuple(flatten(target_y_l))), np.array([shift_X_elt[-1] for shift_X_elt in shift_X])
def Burst_sol(t): x_true = burst_soln(t, burst["n"]) dxdt_true = dburst_soln(t, burst["n"]) x = np.hstack((x_true.reshape(t.size, 1), dxdt_true.reshape(t.size, 1))) return x
# Set up figure. fig = plt.figure(figsize=(8,8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) for step in range(num_steps): # Grab a random datum datum_id = npr.randint(0, num_datums) # Assess expected reward across all possible actions (loop over context + action vectors) rewards = [] contexts = np.zeros((num_actions, F)) for aa in range(num_actions): contexts[aa,:] = np.hstack((x[datum_id, :], [aa])) outputs = generate_nn_output(variational_params, np.expand_dims(contexts[aa,:],0), num_weights, num_samples) rewards.append(np.mean(outputs)) # Check which is greater and choose that [1,0] = eat | [0,1] do not eat # If argmax returns 0, then we eat, otherwise we don't action_chosen = np.argmax(rewards) reward, oracle_reward = reward_function(action_chosen, y[datum_id]) # Calculate the cumulative regret cumulative_regret += oracle_reward - agent_reward # Store the experience of that reward as a training/data pair
def ord_params_GLLVM(y_ord, nj_ord, lambda_ord_old, ps_y, pzl1_ys, zl1_s, AT,\ tol = 1E-5, maxstep = 100): ''' Determine the GLLVM coefficients related to ordinal coefficients by optimizing each column coefficients separately. y_ord (numobs x nb_ord nd-array): The ordinal data nj_ord (list of int): The number of modalities for each ord variable lambda_ord_old (list of nb_ord_j x (nj_ord + r1) elements): The ordinal coefficients of the previous iteration ps_y ((numobs, S) nd-array): p(s | y) for all s in Omega pzl1_ys (nd-array): p(z1 | y, s) zl1_s ((M1, r1, s1) nd-array): z1 | s AT ((r1 x r1) nd-array): Var(z1)^{-1/2} tol (int): Control when to stop the optimisation process maxstep (int): The maximum number of optimization step. ---------------------------------------------------------------------- returns (list of nb_ord_j x (nj_ord + r1) elements): The new ordinal coefficients ''' #**************************** # Ordinal link parameters #**************************** r0 = zl1_s.shape[1] S0 = zl1_s.shape[2] nb_ord = len(nj_ord) new_lambda_ord = [] for j in range(nb_ord): enc = OneHotEncoder(categories='auto') y_oh = enc.fit_transform(y_ord[:,j][..., n_axis]).toarray() # Define the constraints such that the threshold coefficients are ordered nb_constraints = nj_ord[j] - 2 nb_params = nj_ord[j] + r0 - 1 lcs = np.full(nb_constraints, -1) lcs = np.diag(lcs, 1) np.fill_diagonal(lcs, 1) lcs = np.hstack([lcs[:nb_constraints, :], \ np.zeros([nb_constraints, nb_params - (nb_constraints + 1)])]) linear_constraint = LinearConstraint(lcs, np.full(nb_constraints, -np.inf), \ np.full(nb_constraints, 0), keep_feasible = True) opt = minimize(ord_loglik_j, lambda_ord_old[j] ,\ args = (y_oh, zl1_s, S0, ps_y, pzl1_ys, nj_ord[j]), tol = tol, method='trust-constr', jac = ord_grad_j, \ constraints = linear_constraint, hess = '2-point',\ options = {'maxiter': maxstep}) res = opt.x if not(opt.success): # If the program fail, keep the old estimate as value print(opt) res = lambda_ord_old[j] warnings.warn('One of the ordinal optimisations has failed', RuntimeWarning) # Ensure identifiability for Lambda_j new_lambda_ord_j = (res[-r0: ].reshape(1, r0) @ AT[0]).flatten() new_lambda_ord_j = np.hstack([deepcopy(res[: nj_ord[j] - 1]), new_lambda_ord_j]) new_lambda_ord.append(new_lambda_ord_j) return new_lambda_ord
def plot_prob_stat_above_thresh(ex, fname, h1_true, func_xvalues, xlabel, func_title=None): """ plot the empirical probability that the statistic is above the theshold. This can be interpreted as type-1 error (when H0 is true) or test power (when H1 is true). The plot is against the specified x-axis. - ex: experiment number - fname: file name of the aggregated result - h1_true: True if H1 is true - func_xvalues: function taking results dictionary and return the values to be used for the x-axis values. - xlabel: label of the x-axis. - func_title: a function: results dictionary -> title of the plot Return loaded results """ results = glo.ex_load_result(ex, fname) f_pval = lambda job_result: job_result['test_result']['h0_rejected'] #f_pval = lambda job_result: job_result['h0_rejected'] vf_pval = np.vectorize(f_pval) pvals = vf_pval(results['test_results']) repeats, _, n_methods = results['test_results'].shape mean_rejs = np.mean(pvals, axis=0) #std_pvals = np.std(pvals, axis=0) #std_pvals = np.sqrt(mean_rejs*(1.0-mean_rejs)) xvalues = func_xvalues(results) #ns = np.array(results[xkey]) #te_proportion = 1.0 - results['tr_proportion'] #test_sizes = ns*te_proportion line_styles = exglo.func_plot_fmt_map() method_labels = exglo.get_func2label_map() func_names = [f.__name__ for f in results['method_job_funcs'] ] for i in range(n_methods): te_proportion = 1.0 - results['tr_proportion'] fmt = line_styles[func_names[i]] #plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i]) method_label = method_labels[func_names[i]] plt.plot(xvalues, mean_rejs[:, i], fmt, label=method_label) ''' else: # h0 is true z = stats.norm.isf( (1-confidence)/2.0) for i in range(n_methods): phat = mean_rejs[:, i] conf_iv = z*(phat*(1-phat)/repeats)**0.5 #plt.errorbar(test_sizes, phat, conf_iv, fmt=line_styles[i], label=method_labels[i]) plt.plot(test_sizes, mean_rejs[:, i], line_styles[i], label=method_labels[i]) ''' ylabel = 'Test power' if h1_true else 'Type-I error' plt.ylabel(ylabel) plt.xlabel(xlabel) plt.xticks( np.hstack((xvalues) )) alpha = results['alpha'] """ if not h1_true: # plot Wald interval if H0 is true # https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval z = stats.norm.isf( (1-confidence)/2.0) gap = z*(alpha*(1-alpha)/repeats)**0.5 lb = alpha-gap ub = alpha+gap plt.plot(test_sizes, np.repeat(lb, len(test_sizes)), '--', linewidth=2, label='99%-Conf', color='k') plt.plot(test_sizes, np.repeat(ub, len(test_sizes)), '--', linewidth=2, color='k') plt.ylim([lb-0.005, ub+0.005]) """ plt.legend(loc='best') title = '%s. %d trials. $\\alpha$ = %.2g.'%( results['prob_label'], repeats, alpha) if func_title is None else func_title(results) plt.title(title) #plt.grid() return results
def augmented_dynamics(augmented_state, t, flat_args): # Orginal system augmented with vjp_y, vjp_t and vjp_args. y, vjp_y, _, _ = unpack(augmented_state) vjp_all, dy_dt = make_vjp(flat_func, argnum=(0, 1, 2))(y, t, flat_args) vjp_y, vjp_t, vjp_args = vjp_all(-vjp_y) return np.hstack((dy_dt, vjp_y, vjp_t, vjp_args))
def concat_and_multiply(weights, *args): cat_state = np.hstack(args + (np.ones((args[0].shape[0], 1)),)) return np.dot(cat_state, weights)
def populate_coordinates(self): # Populates a variable called self.coordinates with the coordinates of the airfoil. name = self.name.lower().strip() # If it's a NACA 4-series airfoil, try to generate it if "naca" in name: nacanumber = name.split("naca")[1] if nacanumber.isdigit(): if len(nacanumber) == 4: # Parse max_camber = int(nacanumber[0]) * 0.01 camber_loc = int(nacanumber[1]) * 0.1 thickness = int(nacanumber[2:]) * 0.01 # Set number of points per side n_points_per_side = 100 # Referencing https://en.wikipedia.org/wiki/NACA_airfoil#Equation_for_a_cambered_4-digit_NACA_airfoil # from here on out # Make uncambered coordinates x_t = cosspace(n_points=n_points_per_side ) # Generate some cosine-spaced points y_t = 5 * thickness * ( +0.2969 * np.power(x_t, 0.5) - 0.1260 * x_t - 0.3516 * np.power(x_t, 2) + 0.2843 * np.power(x_t, 3) - 0.1015 * np.power( x_t, 4) # 0.1015 is original, #0.1036 for sharp TE ) if camber_loc == 0: camber_loc = 0.5 # prevents divide by zero errors for things like naca0012's. # Get camber y_c_piece1 = max_camber / camber_loc**2 * ( 2 * camber_loc * x_t[x_t <= camber_loc] - x_t[x_t <= camber_loc]**2) y_c_piece2 = max_camber / (1 - camber_loc)**2 * ( (1 - 2 * camber_loc) + 2 * camber_loc * x_t[x_t > camber_loc] - x_t[x_t > camber_loc]**2) y_c = np.hstack((y_c_piece1, y_c_piece2)) # Get camber slope dycdx_piece1 = 2 * max_camber / camber_loc**2 * ( camber_loc - x_t[x_t <= camber_loc]) dycdx_piece2 = 2 * max_camber / (1 - camber_loc)**2 * ( camber_loc - x_t[x_t > camber_loc]) dycdx = np.hstack((dycdx_piece1, dycdx_piece2)) theta = np.arctan(dycdx) # Combine everything x_U = x_t - y_t * np.sin(theta) x_L = x_t + y_t * np.sin(theta) y_U = y_c + y_t * np.cos(theta) y_L = y_c - y_t * np.cos(theta) # Flip upper surface so it's back to front x_U, y_U = np.flipud(x_U), np.flipud(y_U) # Trim 1 point from lower surface so there's no overlap x_L, y_L = x_L[1:], y_L[1:] x = np.hstack((x_U, x_L)) y = np.hstack((y_U, y_L)) coordinates = np.column_stack((x, y)) self.coordinates = coordinates return else: print( "Unfortunately, only 4-series NACA airfoils can be generated at this time." ) # Try to read from airfoil database try: import importlib.resources from . import airfoils raw_text = importlib.resources.read_text(airfoils, name + '.dat') trimmed_text = raw_text[raw_text.find('\n'):] coordinates1D = np.fromstring( trimmed_text, sep='\n') # returns the coordinates in a 1D array assert len( coordinates1D ) % 2 == 0, 'File was found in airfoil database, but it could not be read correctly!' # Should be even coordinates = np.reshape(coordinates1D, (-1, 2)) self.coordinates = coordinates return except FileNotFoundError: print("File was not found in airfoil database!")
def genConstraints(prng, label, alpha, beta, num_ML, num_CL, start_expert = 0, \ flag_same=False): """ This function generates pairwise constraints (ML/CL) using groud-truth cluster label and noise parameters Parameters ---------- label: shape(n_sample, ) cluster label of all the samples alpha: shape(n_expert, ) sensitivity parameters of experts beta: shape(n_expert, ) specificity parameters of experts num_ML: int num_CL: int flag_same: True if different experts provide constraints for the same set of sample pairs, False if different experts provide constraints for different set of sample pairs Returns ------- S: shape(n_con, 4) The first column -> expert id The second and third column -> (row, column) indices of two samples The fourth column -> constraint values (1 for ML and 0 for CL) """ n_sample = len(label) tp = np.tile(label, (n_sample, 1)) label_mat = (tp == tp.T).astype(int) ML_set = [] CL_set = [] # get indices of upper-triangle matrix [row, col] = np.triu_indices(n_sample, k=1) # n_sample * (n_sample-1)/2 for idx in range(len(row)): if label_mat[row[idx], col[idx]] == 1: ML_set.append([row[idx], col[idx]]) elif label_mat[row[idx], col[idx]] == 0: CL_set.append([row[idx], col[idx]]) else: print "Invalid matrix entry values" ML_set = np.array(ML_set) CL_set = np.array(CL_set) assert num_ML < ML_set.shape[0] assert num_CL < CL_set.shape[0] # generate noisy constraints for each expert assert len(alpha) == len(beta) n_expert = len(alpha) # initialize the constraint matrix S = np.zeros((0, 4)) # different experts provide constraint for the same set of sample pairs if flag_same == True: idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False) idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False) ML = ML_set[idx_ML, :] CL = CL_set[idx_CL, :] for m in range(n_expert): val_ML = prng.binomial(1, alpha[m], num_ML) val_CL = prng.binomial(1, 1 - beta[m], num_CL) Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \ val_ML.reshape(val_ML.size,1) )) Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \ val_CL.reshape(val_CL.size,1) )) S = np.vstack((S, Sm_ML, Sm_CL)).astype(int) # different experts provide constraints for different sets of sample pairs else: for m in range(n_expert): prng = np.random.RandomState(1000 + m) idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False) idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False) ML = ML_set[idx_ML, :] CL = CL_set[idx_CL, :] val_ML = prng.binomial(1, alpha[m], num_ML) val_CL = prng.binomial(1, 1 - beta[m], num_CL) Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \ val_ML.reshape(val_ML.size,1) )) Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \ val_CL.reshape(val_CL.size,1) )) S = np.vstack((S, Sm_ML, Sm_CL)).astype(int) return S
def optimize_locs_params( p, dat, b0, c0, test_locs0, reg=1e-2, max_iter=100, tol_fun=1e-5, disp=False, locs_bounds_frac=100, b_lb=-20.0, b_ub=-1e-4, c_lb=1e-6, c_ub=1e3, ): """ Optimize the test locations and the the two parameters (b and c) of the IMQ kernel by maximizing the test power criterion. k(x,y) = (c^2 + ||x-y||^2)^b where c > 0 and b < 0. data should not be the same data as used in the actual test (i.e., should be a held-out set). This function is deterministic. - p: UnnormalizedDensity specifying the problem. - b0: initial parameter value for b (in the kernel) - c0: initial parameter value for c (in the kernel) - dat: a Data object (training set) - test_locs0: Jxd numpy array. Initial V. - reg: reg to add to the mean/sqrt(variance) criterion to become mean/sqrt(variance + reg) - max_iter: #gradient descent iterations - tol_fun: termination tolerance of the objective value - disp: True to print convergence messages - locs_bounds_frac: When making box bounds for the test_locs, extend the box defined by coordinate-wise min-max by std of each coordinate multiplied by this number. - b_lb: absolute lower bound on b. b is always < 0. - b_ub: absolute upper bound on b - c_lb: absolute lower bound on c. c is always > 0. - c_ub: absolute upper bound on c #- If the lb, ub bounds are None Return (V test_locs, b, c, optimization info log) """ """ In the optimization, we will parameterize b with its square root. Square back and negate to form b. c is not parameterized in any special way since it enters to the kernel with c^2. Absolute value of c will be taken to make sure it is positive. """ J = test_locs0.shape[0] X = dat.data() n, d = X.shape def obj(sqrt_neg_b, c, V): b = -sqrt_neg_b**2 return -IMQFSSD.power_criterion(p, dat, b, c, V, reg=reg) flatten = lambda sqrt_neg_b, c, V: np.hstack( (sqrt_neg_b, c, V.reshape(-1))) def unflatten(x): sqrt_neg_b = x[0] c = x[1] V = np.reshape(x[2:], (J, d)) return sqrt_neg_b, c, V def flat_obj(x): sqrt_neg_b, c, V = unflatten(x) return obj(sqrt_neg_b, c, V) # gradient # grad_obj = autograd.elementwise_grad(flat_obj) # Initial point b02 = np.sqrt(-b0) x0 = flatten(b02, c0, test_locs0) # Make a box to bound test locations X_std = np.std(X, axis=0) # X_min: length-d array X_min = np.min(X, axis=0) X_max = np.max(X, axis=0) # V_lb: J x d V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1)) V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1)) # (J*d+2) x 2. Make sure to bound the reparamterized values (not the original) """ For b, b2 := sqrt(-b) lb <= b <= ub < 0 means sqrt(-ub) <= b2 <= sqrt(-lb) Note the positions of ub, lb. """ x0_lb = np.hstack((np.sqrt(-b_ub), c_lb, np.reshape(V_lb, -1))) x0_ub = np.hstack((np.sqrt(-b_lb), c_ub, np.reshape(V_ub, -1))) x0_bounds = list(zip(x0_lb, x0_ub)) # optimize. Time the optimization as well. # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html grad_obj = autograd.elementwise_grad(flat_obj) with util.ContextTimer() as timer: opt_result = scipy.optimize.minimize( flat_obj, x0, method='L-BFGS-B', bounds=x0_bounds, tol=tol_fun, options={ 'maxiter': max_iter, 'ftol': tol_fun, 'disp': disp, 'gtol': 1.0e-06, }, jac=grad_obj, ) opt_result = dict(opt_result) opt_result['time_secs'] = timer.secs x_opt = opt_result['x'] sqrt_neg_b, c, V_opt = unflatten(x_opt) b = -sqrt_neg_b**2 assert util.is_real_num(b), 'b is not real. Was {}'.format(b) assert b < 0 assert util.is_real_num(c), 'c is not real. Was {}'.format(c) assert c > 0 return V_opt, b, c, opt_result
def loss(self, W): _W = np.hstack([-W, W]) logits = np.dot(_W.T, self.X) return math.softmax_cross_entropy(logits, self.Y)
# x_means = np.mean(x, axis=0) # x_stds = np.std(x, axis=0) # # # def normalize(data, data_mean, data_std): # normalized_data = (data - data_mean) / data_std # return normalized_data # # # x_orig = copy.deepcopy(x) # x_norm = normalize(x, x_means, x_stds) # add ones to the data one = np.ones(len(x)) one = one.reshape((-1, 1)) x_orig = np.hstack((one, x)) # x_norm = np.hstack((one, x_norm)) print("x_orig.T", x_orig.T) # print(x_norm.shape[0]) # print(x_norm.shape[1]) # # Perceptron cost function # def perceptron(w): # cost = np.sum(np.maximum(0, -y * np.dot(x_norm, w))) # return cost / float(np.size(y)) # Perceptron cost function def perceptron0(w): cost = np.sum(np.maximum(0, -y0 * np.dot(x_orig, w)))
def controllability_matrix(A, B): M = [B] for i in range(A.shape[0] - 1): M.append(np.dot(A, M[-1])) return np.hstack(M)
ippe_rmat_error_list2 = [] epnp_tvec_error_list = [] epnp_rmat_error_list = [] pnp_tvec_error_list = [] pnp_rmat_error_list = [] new_objectPoints = objectPoints_des new_imagePoints = np.array(cam.project(new_objectPoints, False)) homography_iters = 1 for i in range(100): # Xo = np.copy(new_objectPoints[[0,1,3],:]) #without the z coordinate (plane) # Xi = np.copy(new_imagePoints) # Aideal = ef.calculate_A_matrix(Xo, Xi) objectPoints_list.append(new_objectPoints) objectPoints_historic = np.hstack( [objectPoints_historic, new_objectPoints]) imagePoints_list.append(new_imagePoints) #Calculate the pose using solvepnp ITERATIVE new_imagePoints_noisy = cam.addnoise_imagePoints(new_imagePoints, mean=0, sd=2) pnp_tvec, pnp_rmat = pose_pnp(new_objectPoints, new_imagePoints_noisy, cam.K, False, cv2.SOLVEPNP_ITERATIVE, False) pnpCam = cam.clone_withPose(pnp_tvec, pnp_rmat) x1, y1, x2, y2, x3, y3, x4, y4 = gd.extract_objectpoints_vars( new_objectPoints) repro = gd.repro_error_autograd(x1, y1, x2, y2, x3, y3, x4, y4, pnpCam.P, new_imagePoints_noisy) print "--------------------------------" print "Repro Error: ", repro
def stack_params(u, V, R): u = u.reshape(-1) V = V.reshape(-1) R = R.reshape(-1) params = np.hstack((np.hstack((u, V)), R)) return params
#ipdb.set_trace() #x_new = np.random.random((2,1))*15+np.array([[-5],[0]]) #expected_improvement(x_new, model_gp, y) kg_list = [] for i_rep in range(100): next_point = sample_next_hyperparameter(expected_improvement, model_gp, y, bounds=bounds, n_restarts=25) #ipdb.set_trace() next_loss = target_function(next_point[:, np.newaxis]) #ipdb.set_trace() kg_list.append(calculate_kg(next_point, model_gp, x, y, bounds)) y = np.hstack((y, next_loss)) x = np.hstack((x, next_point[:, np.newaxis])) model_gp.fit(x.transpose(), y) ipdb.set_trace() real_loss = [model_gp.predict(params.reshape(1, -1)) for params in param_grid] # The maximum is at: print param_grid[np.array(real_loss).argmin(), :] C, G = np.meshgrid(lambdas, gammas) plt.figure() cp = plt.contourf(C, G, np.array(real_loss).reshape(C.shape)) plt.scatter(x[0, :], x[1, :]) plt.colorbar(cp) plt.savefig('surface_gp_end.png')