def initParam(prior, X, N, D, G, M, K, dir_param, prng): """ initialize variational parameters with prior parameters """ [tpM, tpG, lb, ub] = [np.ones(M), np.ones(G), 10., 10.] tpR = prng.rand(2*M) [tau_a1, tau_a2, tau_b1, tau_b2, tau_v1, tau_v2] = \ [lb+(ub-lb)*tpR[0 : M], tpM,\ lb+(ub-lb)*tpR[M : 2*M], tpM, \ tpG, tpG] mu_w = prng.randn(G,D,K)/np.sqrt(D) sigma_w = np.ones(G*D*K) * 1e-3 mu_b = prng.randn(G, K)/np.sqrt(D) sigma_b = np.ones(G*K) * 1e-3 phi = np.reshape(prng.dirichlet(np.ones(G)*dir_param, M), M*G) mu_w = np.reshape(mu_w, G*D*K) mu_b = np.reshape(mu_b, G*K) param_init = np.concatenate((tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1,\ tau_v2, mu_w, sigma_w, mu_b, sigma_b)) return param_init
def ye_limit(x, trackwidth): k = TrackCurvature(x) N = len(k) lowlimit = -trackwidth/2 * np.ones(N) highlimit = trackwidth/2 * np.ones(N) # use a 5% margin so we can't actually hit the center of curvature lowlimit[k < 0] = np.maximum(0.95/k[k < 0], -trackwidth/2) highlimit[k > 0] = np.minimum(0.95/k[k > 0], trackwidth/2) return lowlimit, highlimit
def init_params(D, rs=npr.RandomState(0), **kwargs): init_mean = -1 * np.ones(D) + rs.randn(D) * .1 init_log_std = -5 * np.ones(D) + rs.randn(D) * .1 # u, w, b norm_flow_params = [[rs.randn(D,1), rs.randn(D,1), rs.randn(1)] for x in range(k)] return [init_mean, init_log_std, norm_flow_params]
def initParams(num): mat = np.random.randn(m, m) return dict({num + 'z': np.reshape(np.linspace(0.0, 1.0, num=m), (m, 1)), num + 'u_mean': np.random.randn(m, 1), num + 'u_cov_fac': mat @ mat.T, num + 'h_mean': np.random.randn(n, 1), num + 'h_cov_fac': np.random.randn(n, 1), num + 'kernel_noise': np.ones((1, 1)), num + 'kernel_lenscale': np.ones((1, 1)), num + 'function_noise': np.ones((1, 1))})
def init_params(D, rs=npr.RandomState(0), **kwargs): init_mean = -1 * np.ones(D) + rs.randn(D) * .1 init_log_std = -5 * np.ones(D) + rs.randn(D) * .1 # gauss_params = np.concatenate([init_mean, init_log_std]) u = rs.randn(D,1) w = rs.randn(D,1) b = rs.randn(1) return [init_mean, init_log_std, u, w, b]
def compute_stats(Ex, ExxT, ExnxT, inhomog): T = Ex.shape[-1] E_init_stats = ExxT[:,:,0], Ex[:,0], 1., 1. E_pair_stats = np.transpose(ExxT, (2, 0, 1))[:-1], \ ExnxT.T, np.transpose(ExxT, (2, 0, 1))[1:], np.ones(T-1) E_node_stats = np.diagonal(ExxT.T, axis1=-1, axis2=-2), Ex.T, np.ones(T) if not inhomog: E_pair_stats = map(lambda x: np.sum(x, axis=0), E_pair_stats) return E_init_stats, E_pair_stats, E_node_stats
def sinkhorn(w1, w2, M, reg, k): """Sinkhorn algorithm with fixed number of iteration (autograd) """ K = np.exp(-M / reg) ui = np.ones((M.shape[0],)) vi = np.ones((M.shape[1],)) for i in range(k): vi = w2 / (np.dot(K.T, ui)) ui = w1 / (np.dot(K, vi)) G = ui.reshape((M.shape[0], 1)) * K * vi.reshape((1, M.shape[1])) return G
def get_Aopt(inX, iny): X_train, y_train, X_test, y_test = ascdata.split_train_test(inX, iny) X_train = np.concatenate((X_train, np.ones((X_train.shape[ 0 ], 1))), 1) X_test = np.concatenate((X_test, np.ones((X_test.shape[ 0 ], 1))), 1) X_train_less, s_train = ascdata.split_X_s(X_train) X_test_less, s_test = ascdata.split_X_s(X_test) s_train_phi = ascdata.generate_phi(s_train, d, A_phi, b_phi) s_test_phi = ascdata.generate_phi(s_test, d, A_phi, b_phi) nfeatures = X_train.shape[1] - 1 # Dimensions of phi(s) nfeatures_phi = d invT2 = 10 def logprob(inA, inX, iny, ins_phi): RMS = 0 for i in range(len(iny)): wi = np.dot(inA, inX[i]) RMS_current = (iny[i] - np.dot(wi, ins_phi[i]))**2 RMS += RMS_current return -RMS objective = lambda inA, t: -logprob(inA, X_train_less, y_train, s_train_phi) LLHs = [] LLH_xs = [] def callback(params, t, g): LLH = -objective(params, t) LLHs.append(LLH) LLH_xs.append(t) print("Iteration {} log likelihood {}".format(t, LLH)) init_A = 0.00000000001*(np.ones((nfeatures_phi, nfeatures))) # init_A = [[ -3.05236728e-04, -9.50015728e-04, -3.80139503e-04, 1.44010470e-04, -3.05236728e-04, # -4.96117987e-04, -1.02736409e-04, -1.86416292e-04, -9.52628589e-04, -1.55023279e-03, # 1.44717581e-04, 1.00000000e-11, -9.50028200e-04, -4.96117987e-04, 1.00000000e-11, # -3.05236728e-04, 1.77416412e-06, -8.16665436e-06, 3.12622951e-05, -8.25700143e-04, # 1.44627987e-04, 1.90211243e-05, -8.28273186e-04, -9.41349990e-04, -4.56671031e-04, # 9.79097070e-03, -6.41866046e-04, -7.79274856e-05, 1.44539330e-04, -3.05236728e-04, # -5.99188450e-04, -7.29470175e-04, -6.69558174e-04, -9.50028200e-04]] init_A = np.array(init_A) print("Optimizing network parameters...") optimized_params = adam(grad(objective), init_A, step_size=0.01, num_iters=1000, callback=callback) Aopt = optimized_params print "Aopt = ", Aopt return Aopt, X_train_less, y_train, s_train, X_test_less, y_test, s_test, LLHs, LLH_xs
def get_KMM_ineq_constraints(num_train, B_max, eps): G_gt_0 = -np.eye(num_train) h_gt_0 = np.zeros(num_train) G_lt_B_max = np.eye(num_train) h_lt_B_max = np.ones(num_train) * B_max G_B_sum_lt = np.ones(num_train, dtype=float) h_B_sum_lt = (1+eps) * float(num_train) * np.ones(1) G_B_sum_gt = -np.ones(num_train, dtype=float) h_B_sum_gt = -(1-eps) * float(num_train) * np.ones(1) G = np.vstack((G_gt_0,G_lt_B_max,G_B_sum_lt,G_B_sum_gt)) (h_gt_0,h_lt_B_max,h_B_sum_lt,h_B_sum_gt) h = np.hstack((h_gt_0,h_lt_B_max,h_B_sum_lt,h_B_sum_gt)) return G,h
def update_K(self, k): self.K = k # reinitialize K related parameters self.w = 1e-3 * np.ones((self.K + 1, self.K)) if not self.self_connect: for i in xrange(self.K): self.w[i + 1, i] = 1e-32 if self.extra_w: tmp = 1e-10 * np.ones((self.K, self.K)) if not self.self_connect: for i in xrange(self.K): tmp[i, i] = 1e-32 self.w = np.concatenate([self.w, tmp], axis=0)
def optimize_and_lls(optfun): num_iters = 200 elbos = [] def callback(params, t, g): elbo_val = -objective(params, t) elbos.append(elbo_val) if t % 50 == 0: print("Iteration {} lower bound {}".format(t, elbo_val)) init_mean = -1 * np.ones(D) init_log_std = -5 * np.ones(D) init_var_params = np.concatenate([init_mean, init_log_std]) variational_params = optfun(num_iters, init_var_params, callback) return np.array(elbos)
def KL_two_gaussians(params): mu = params[0:(len(params)-2)/2] Sigma = np.exp(params[(len(params)-2)/2:-2]) d = len(mu) muPrior = np.zeros(d) sigmaPrior = np.ones(d)*50 return np.sum(np.log(sigmaPrior/Sigma)+(Sigma**2+(mu-muPrior)**2)/(2*(sigmaPrior**2))-1/2)
def init_params(D, rs=npr.RandomState(0), **kwargs): init_mean = -1 * np.ones(D) + rs.randn(D) * .1 init_log_std = -5 * np.ones(D) + rs.randn(D) * .1 # gauss_params = np.concatenate([init_mean, init_log_std]) # u = rs.randn(D,1) # w = rs.randn(D,1) # b = rs.randn(1) norm_flow_params = [[rs.randn(D,1), rs.randn(D,1), rs.randn(1)] for x in range(k)] # norm_flow_params = np.array(norm_flow_params) # print (norm_flow_params.shape) # fadfsa return [init_mean, init_log_std, norm_flow_params]
def add_data(self, S, F=None): """ Add a data set to the list of observations. First, filter the data with the impulse response basis, then instantiate a set of parents for this data set. :param S: a TxK matrix of of event counts for each time bin and each process. """ assert isinstance(S, np.ndarray) and S.ndim == 2 and S.shape[1] == self.K \ and np.amin(S) >= 0 and S.dtype == np.int, \ "Data must be a TxK array of event counts" T = S.shape[0] if F is None: # Filter the data into a TxKxB array Ftens = self.basis.convolve_with_basis(S) # Flatten this into a T x (KxB) matrix # [F00, F01, F02, F10, F11, ... F(K-1)0, F(K-1)(B-1)] F = Ftens.reshape((T, self.K * self.B)) assert np.allclose(F[:,0], Ftens[:,0,0]) if self.B > 1: assert np.allclose(F[:,1], Ftens[:,0,1]) if self.K > 1: assert np.allclose(F[:,self.B], Ftens[:,1,0]) # Prepend a column of ones F = np.hstack((np.ones((T,1)), F)) for k,node in enumerate(self.nodes): node.add_data(F, S[:,k])
def expectedstats(natparam): neghalfJ, h, _, _ = unpack_dense(natparam) J = -2*neghalfJ Ex = np.linalg.solve(J, h) ExxT = np.linalg.inv(J) + Ex[...,None] * Ex[...,None,:] En = np.ones(J.shape[0]) if J.ndim == 3 else 1. return pack_dense(ExxT, Ex, En, En)
def main(): (parser, loss) = KLD(50) print(parser) print(parser.idxs_and_shapes) datum = {} datum['mu1']=np.zeros(50) datum['mu2']=np.ones(50) datum['sig1']=5 datum['sig2']=6 trial_vecs = [] for _ in range(5): trial_vecs.append(np.random.rand(50)) value_and_grad_fun = value_and_grad(pairwise_distance) value, grad = value_and_grad_fun(trial_vecs) print(trial_vecs) weights = parser.stack(datum) value_and_grad_fun = value_and_grad(loss) value, grad = value_and_grad_fun(weights) print(value) weights = weights - 10e-4*grad value, grad = value_and_grad_fun(weights) print(value) pass
def prediction(params, X): if len(X.shape)==1: # Add columns of 1s (we assume params[0] is bias term) N = X.shape[0] X = np.c_[np.ones(N), X] yhat = np.dot(X, params) return yhat
def test_getter(): def fun(input_tuple): A = np.sum(input_tuple[0]) B = np.sum(input_tuple[1]) C = np.sum(input_tuple[1]) return A + B + C d_fun = grad(fun) input_tuple = (npr.randn(5, 6), npr.randn(4, 3), npr.randn(2, 4)) result = d_fun(input_tuple) assert np.allclose(result[0], np.ones((5, 6))) assert np.allclose(result[1], 2 * np.ones((4, 3))) assert np.allclose(result[2], np.zeros((2, 4)))
def test_getter(): def fun(input_dict): A = np.sum(input_dict['item_1']) B = np.sum(input_dict['item_2']) C = np.sum(input_dict['item_2']) return A + B + C d_fun = grad(fun) input_dict = {'item_1' : npr.randn(5, 6), 'item_2' : npr.randn(4, 3), 'item_X' : npr.randn(2, 4)} result = d_fun(input_dict) assert np.allclose(result['item_1'], np.ones((5, 6))) assert np.allclose(result['item_2'], 2 * np.ones((4, 3))) assert np.allclose(result['item_X'], np.zeros((2, 4)))
def test_getter(): def fun(input_list): A = np.sum(input_list[0]) B = np.sum(input_list[1]) C = np.sum(input_list[1]) return A + B + C d_fun = grad(fun) input_list = [npr.randn(5, 6), npr.randn(4, 3), npr.randn(2, 4)] result = d_fun(input_list) print result assert np.allclose(result[0], np.ones((5, 6))) assert np.allclose(result[1], 2 * np.ones((4, 3))) assert np.allclose(result[2], np.zeros((2, 4)))
def x_with_bias(x): """Add a row of vector which value=1 for the bias to the input X x.shape=(batch_size, input_vector_length) => x_with_bias(x).shape=(batch_size, input_vector_length + 1) """ batch_size = x.shape[0] return np.concatenate((x, np.ones([batch_size, 1])), axis=1)
def PlanePrior(siz): D = np.ones(siz) D[:siz[0]/2, :] *= 20 for i in range(siz[0]/2, siz[0]): y = (i - siz[0]/2 + 1) z = 20.0 / y D[i, :] = z return D
def PhotometricError(iref, inew, R, T, points, D): # points is a tuple ([y], [x]); convert to homogeneous siz = iref.shape npoints = len(points[0]) f = siz[1] # focal length, FIXME Xref = np.vstack(((points[1] - siz[1]*0.5) / f, # x (siz[0]*0.5 - points[0]) / f, # y (left->right hand) np.ones(npoints))) # z = 1 # this is confusingly written -- i am broadcasting the translation T to # every column, but numpy broadcasting only works if it's rows, hence all # the transposes # print D * Xref Xnew = (np.dot(so3.exp(R), (D * Xref)).T + T).T # print Xnew # right -> left hand projection proj = Xnew[0:2] / Xnew[2] p = (-proj[1]*f + siz[0]*0.5, proj[0]*f + siz[1]*0.5) margin = 10 # int(siz[0] / 5) inwindow_mask = ((p[0] >= margin) & (p[0] < siz[0]-margin-1) & (p[1] >= margin) & (p[1] < siz[1]-margin-1)) npts_inw = sum(inwindow_mask) if npts_inw < 10: return 1e6, np.zeros(6 + npoints) # todo: filter points which are now out of the window oldpointidxs = (points[0][inwindow_mask], points[1][inwindow_mask]) newpointidxs = (p[0][inwindow_mask], p[1][inwindow_mask]) origpointidxs = np.nonzero(inwindow_mask)[0] E = InterpolatedValues(inew, newpointidxs) - iref[oldpointidxs] # dE/dk -> # d/dk r_p^2 = d/dk (Inew(w(r, T, D, p)) - Iref(p))^2 # = -2r_p dInew/dp dp/dw dw/dX dX/dk # = -2r_p * g(w(r, T, D, p)) * dw(r, T, D, p) # intensity gradients for each point Ig = InterpolatedGradients(inew, newpointidxs) # TODO: use tensors for this # gradients for R, T, and D gradient = np.zeros(6 + npoints) for i in range(npts_inw): # print 'newidx (y,x) = ', newpointidxs[0][i], newpointidxs[1][i] # Jacobian of w oi = origpointidxs[i] Jw = dw(Xref[0][oi], Xref[1][oi], D[oi], R, T) # scale back up into pixel space, right->left hand coords to get # Jacobian of p Jp = f * np.vstack((-Jw[1], Jw[0])) # print origpointidxs[i], 'Xref', Xref[:, i], 'Ig', Ig[:, i], \ # 'dwdRz', Jw[:, 2], 'dpdRz', Jp[:, 2] # full Jacobian = 2*E + Ig * Jp J = np.sign(E[i]) * np.dot(Ig[:, i], Jp) # print '2 E[i]', 2*E[i], 'Ig*Jp', np.dot(Ig[:, i], Jp) gradient[:6] += J[:6] # print J[:6] gradient[6+origpointidxs[i]] += J[6] print R, T, np.sum(np.abs(E)), npts_inw # return ((0.2*(npoints - npts_inw) + np.dot(E, E)), gradient) return np.sum(np.abs(E)) / (npts_inw), gradient / (npts_inw)
def expectedstats(natparam): J, h = natparam[:2] J = -2*J Ex = np.linalg.solve(J, h) ExxT = np.linalg.inv(J) + Ex[...,None] * Ex[...,None,:] En = np.ones(J.shape[0]) if J.ndim == 3 else 1. return ExxT, Ex, En, En
def evaluate_prior(all_params): # clean up code so we don't compute matrices twice layer_params, x0, y0 = unpack_all_params(all_params) log_prior = 0 for layer in xrange(n_layers): #import pdb; pdb.set_trace() mean, cov_params, noise_scale = unpack_kernel_params(layer_params[layer]) cov_y_y = covariance_function(cov_params, x0[layer], x0[layer]) + noise_scale * np.eye(len(y0[layer])) log_prior += mvn.logpdf(y0[layer],np.ones(len(cov_y_y))*mean,cov_y_y+np.eye(len(cov_y_y))*10) return log_prior
def main(): np.random.seed(1) xtrain, ytrain, params_true = make_data_linreg_1d() N = xtrain.shape[0] Xtrain = np.c_[np.ones(N), xtrain] # add column of 1s w_ols, loss_ols = LinregModel.ols_fit(Xtrain, ytrain) expt_configs = make_expt_config(N) nexpts = len(expt_configs) print nexpts nrows, ncols = nsubplots(nexpts) #nrows, ncols = 4, 2 loss_trace_fig = plt.figure("loss trace fig") param_trace_fig = plt.figure("param trace fig") folder = 'figures' for expt_num, config in enumerate(expt_configs): logger = sgd.SGDLogger(print_freq=10) np.random.seed(1) batchifier = sgd.MiniBatcher(Xtrain, ytrain, config['batch_size']) initial_params = np.zeros(2) lr_fun = lambda(iter): sgd.get_learning_rate_exp_decay(iter, config['init_lr'], config['lr_decay']) ttl = config_to_str(config) print '\nstarting experiment {}'.format(ttl) print config obj_fun = LinregModel.objective #grad_fun = LinregModel.gradient grad_fun = autograd.grad(obj_fun) result = sgd.sgd_minimize(initial_params, obj_fun, grad_fun, batchifier, config['n_steps'], lr_fun, config['momentum'], callback=logger.update) print result plotnum = expt_num + 1 ax = loss_trace_fig.add_subplot(nrows, ncols, plotnum) plot_loss_trace(logger.obj_trace, loss_ols, ax) ax.set_title(ttl) ax = param_trace_fig.add_subplot(nrows, ncols, plotnum) loss_fun = lambda w0, w1: LinregModel.objective([w0, w1], xtrain, ytrain) plot_error_surface(loss_fun, params_true, ax) plot_param_trace(logger.param_trace, ax) ax.set_title(ttl) plt.figure("loss trace fig") fname = os.path.join(folder, 'linreg_1d_sgd_loss_trace.png') plt.savefig(fname) plt.figure("param trace fig") fname = os.path.join(folder, 'linreg_1d_sgd_param_trace.png') plt.savefig(fname) plt.show()
def rmsprop(grad, x, callback=None, num_iters=100, step_size=0.1, gamma=0.9, eps = 10**-8): """Root mean squared prop: See Adagrad paper for details.""" avg_sq_grad = np.ones(len(x)) for i in range(num_iters): g = grad(x, i) if callback: callback(x, i, g) avg_sq_grad = avg_sq_grad * gamma + g**2 * (1 - gamma) x -= step_size * g/(np.sqrt(avg_sq_grad) + eps) return x
def _em_fit(self, init, free_vars_shape, fixed_vars, is_fixed_vars, priors, max_nt=50, min_nt=0, gtol=1e-3, verbose=False, **kwargs): # Check any arguments to be passed to the M-step optimisatio function optim_opts = kwargs.pop('optim_opts', {}) # unpack the inital values g, beta, mu_ivp = _var_mixer(init, free_vars_shape, fixed_vars, is_fixed_vars) alpha = 1000. # do some reshaping beta = beta.reshape((self.dim.R+1, self.dim.D)) mu_ivp = mu_ivp.reshape((len(self._ifix), len(self.Y_train_), self.dim.K)) # initalise pi uniformly pi = np.ones(len(self._ifix)) / len(self._ifix) # get the initial responsibilities r = self._get_responsibilities(pi, g, beta, mu_ivp, alpha) free_vars = init.copy() for nt in range(max_nt): free_vars = self._M_step(free_vars, r, alpha, free_vars_shape, fixed_vars, is_fixed_vars, priors, optim_opts=optim_opts, **kwargs) g_, beta, mu_ivp = _var_mixer(free_vars, free_vars_shape, fixed_vars, is_fixed_vars) beta = beta.reshape((self.dim.R+1, self.dim.D)) mu_ivp = mu_ivp.reshape((len(self._ifix), len(self.Y_train_), self.dim.K)) pi = self._update_pi(r) # check for convergence dg = np.linalg.norm(g_ - g) if verbose: print('iter {}. Delta g: {}'.format(nt+1, dg)) if dg <= gtol and nt >= min_nt: break else: g = g_ # E-step r = self._get_responsibilities(pi, g, beta, mu_ivp, alpha) self.g_ = g_ self.beta_ = beta self.mu_ivp_ = mu_ivp
def init_pgm_param(K, N, alpha, niw_conc=10., random_scale=0.): def init_niw_natparam(N): nu, S, m, kappa = N+niw_conc, (N+niw_conc)*np.eye(N), np.zeros(N), niw_conc m = m + random_scale * npr.randn(*m.shape) return niw.standard_to_natural(S, m, kappa, nu) dirichlet_natparam = alpha * (npr.rand(K) if random_scale else np.ones(K)) niw_natparam = np.stack([init_niw_natparam(N) for _ in range(K)]) return dirichlet_natparam, niw_natparam
def test_cast_to_int(): inds = np.ones(5)[:,None] def fun(W): W = np.concatenate((W, inds), axis=1) W = W[:,:-1] return W[np.int64(W[:,-1])].sum() W = np.random.randn(5, 10) check_grads(fun, W)
def draw_panel(self,ax,title,**kwargs): # set viewing limits on contour plot xvals = [self.w_hist[s][0] for s in range(len(self.w_hist))] xvals.append(self.w_init[0]) yvals = [self.w_hist[s][1] for s in range(len(self.w_hist))] yvals.append(self.w_init[1]) xmax = max(xvals) xmin = min(xvals) xgap = (xmax - xmin)*0.1 ymax = max(yvals) ymin = min(yvals) ygap = (ymax - ymin)*0.1 xmin -= xgap xmax += xgap ymin -= ygap ymax += ygap if 'xmin' in kwargs: xmin = kwargs['xmin'] if 'xmax' in kwargs: xmax = kwargs['xmax'] if 'ymin' in kwargs: ymin = kwargs['ymin'] if 'ymax' in kwargs: ymax = kwargs['ymax'] axes = False if 'axes' in kwargs: axes = kwargs['ymax'] pts = False if 'pts' in kwargs: pts = kwargs['pts'] pts = False if 'pts' in kwargs: pts = kwargs['pts'] linewidth = 2.5 if 'linewidth' in kwargs: linewidth = kwargs['linewidth'] #### define input space for function and evaluate #### w1 = np.linspace(xmin,xmax,400) w2 = np.linspace(ymin,ymax,400) w1_vals, w2_vals = np.meshgrid(w1,w2) w1_vals.shape = (len(w1)**2,1) w2_vals.shape = (len(w2)**2,1) h = np.concatenate((w1_vals,w2_vals),axis=1) func_vals = np.asarray([self.g(s) for s in h]) w1_vals.shape = (len(w1),len(w1)) w2_vals.shape = (len(w2),len(w2)) func_vals.shape = (len(w1),len(w2)) ### make contour right plot - as well as horizontal and vertical axes ### # set level ridges num_contours = kwargs['num_contours'] levelmin = min(func_vals.flatten()) levelmax = max(func_vals.flatten()) cutoff = 0.5 cutoff = (levelmax - levelmin)*cutoff numper = 3 levels1 = np.linspace(cutoff,levelmax,numper) num_contours -= numper levels2 = np.linspace(levelmin,cutoff,min(num_contours,numper)) levels = np.unique(np.append(levels1,levels2)) num_contours -= numper while num_contours > 0: cutoff = levels[1] levels2 = np.linspace(levelmin,cutoff,min(num_contours,numper)) levels = np.unique(np.append(levels2,levels)) num_contours -= numper a = ax.contour(w1_vals, w2_vals, func_vals,levels = levels,colors = 'k') ax.contourf(w1_vals, w2_vals, func_vals,levels = levels,cmap = 'Blues') if axes == True: ax.axhline(linestyle = '--', color = 'k',linewidth = 1) ax.axvline(linestyle = '--', color = 'k',linewidth = 1) # colors for points s = np.linspace(0,1,len(self.w_hist[:round(len(self.w_hist)/2)])) s.shape = (len(s),1) t = np.ones(len(self.w_hist[round(len(self.w_hist)/2):])) t.shape = (len(t),1) s = np.vstack((s,t)) colorspec = [] colorspec = np.concatenate((s,np.flipud(s)),1) colorspec = np.concatenate((colorspec,np.zeros((len(s),1))),1) ### plot function decrease plot in right panel for j in range(len(self.w_hist)): w_val = self.w_hist[j] g_val = self.g(w_val) # plot in left panel if pts == 'True': ax.scatter(w_val[0],w_val[1],s = 30,c = colorspec[j],edgecolor = 'k',linewidth = 1.5*math.sqrt((1/(float(j) + 1))),zorder = 3) # plot connector between points for visualization purposes if j > 0: w_old = self.w_hist[j-1] w_new = self.w_hist[j] ax.plot([w_old[0],w_new[0]],[w_old[1],w_new[1]],color = colorspec[j],linewidth = linewidth,alpha = 1,zorder = 2) # plot approx ax.plot([w_old[0],w_new[0]],[w_old[1],w_new[1]],color = 'k',linewidth = linewidth + 0.4,alpha = 1,zorder = 1) # plot approx # clean panel ax.set_title(title,fontsize = 12) ax.set_xlabel('$w_1$',fontsize = 12) ax.set_ylabel('$w_2$',fontsize = 12,rotation = 0) ax.axhline(y=0, color='k',zorder = 0,linewidth = 0.5) ax.axvline(x=0, color='k',zorder = 0,linewidth = 0.5) ax.set_xlim([xmin,xmax]) ax.set_ylim([ymin,ymax])
return const - 0.5 * np.dot(np.dot(z.T, pinv), z) if __name__ == '__main__': t0 = time.time() rs = np.random.npr.RandomState(0) num_samples = 500 num_steps = 32 num_sampler_optimization_steps = 400 sampler_learn_rate = 0.01 D = 2 init_mean = np.zeros(D) init_log_stddevs = np.log(0.1*np.ones(D)) init_output_weights = 0.1*rs.randn(num_steps, D) init_transform_weights = 0.1*rs.randn(num_steps, D) init_biases = 0.1*rs.randn(num_steps) logprob_mvn = build_logprob_mvn(mean=np.array([0.2,0.4]), cov=np.array([[1.0,0.9], [0.9,1.0]])) flow_sample, parser = build_flow_sampler_with_inputs(D, num_steps) parser.add_shape('mean', D) parser.add_shape('log_stddev', D) sampler_params = np.zeros(len(parser)) parser.put(sampler_params, 'mean', init_mean) parser.put(sampler_params, 'log_stddev', init_log_stddevs) parser.put(sampler_params, 'output weights', init_output_weights) parser.put(sampler_params, 'transform weights', init_transform_weights) parser.put(sampler_params, 'biases', init_biases)
def single_input_plot(self, g, weight_histories, cost_histories, **kwargs): # adjust viewing range wmin = -3.1 wmax = 3.1 if 'wmin' in kwargs: wmin = kwargs['wmin'] if 'wmax' in kwargs: wmax = kwargs['wmax'] onerun_perplot = False if 'onerun_perplot' in kwargs: onerun_perplot = kwargs['onerun_perplot'] ### initialize figure fig = plt.figure(figsize=(9, 4)) artist = fig # remove whitespace from figure #fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace #fig.subplots_adjust(wspace=0.01,hspace=0.01) # create subplot with 2 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1]) ax1 = plt.subplot(gs[0]) ax2 = plt.subplot(gs[1]) ### plot function in both panels w_plot = np.linspace(wmin, wmax, 500) g_plot = g(w_plot) gmin = np.min(g_plot) gmax = np.max(g_plot) g_range = gmax - gmin ggap = g_range * 0.1 gmin -= ggap gmax += ggap # plot function, axes lines ax1.plot(w_plot, g_plot, color='k', zorder=2) # plot function ax1.axhline(y=0, color='k', zorder=1, linewidth=0.25) ax1.axvline(x=0, color='k', zorder=1, linewidth=0.25) ax1.set_xlabel(r'$w$', fontsize=13) ax1.set_ylabel(r'$g(w)$', fontsize=13, rotation=0, labelpad=25) ax1.set_xlim(wmin, wmax) ax1.set_ylim(gmin, gmax) ax2.plot(w_plot, g_plot, color='k', zorder=2) # plot function ax2.axhline(y=0, color='k', zorder=1, linewidth=0.25) ax2.axvline(x=0, color='k', zorder=1, linewidth=0.25) ax2.set_xlabel(r'$w$', fontsize=13) ax2.set_ylabel(r'$g(w)$', fontsize=13, rotation=0, labelpad=25) ax2.set_xlim(wmin, wmax) ax2.set_ylim(gmin, gmax) #### loop over histories and plot each for j in range(len(weight_histories)): w_hist = weight_histories[j] c_hist = cost_histories[j] # colors for points --> green as the algorithm begins, yellow as it converges, red at final point s = np.linspace(0, 1, len(w_hist[:round(len(w_hist) / 2)])) s.shape = (len(s), 1) t = np.ones(len(w_hist[round(len(w_hist) / 2):])) t.shape = (len(t), 1) s = np.vstack((s, t)) self.colorspec = [] self.colorspec = np.concatenate((s, np.flipud(s)), 1) self.colorspec = np.concatenate( (self.colorspec, np.zeros((len(s), 1))), 1) ### plot all history points ax = ax2 if onerun_perplot == True: if j == 0: ax = ax1 if j == 1: ax = ax2 for k in range(len(w_hist)): # pick out current weight and function value from history, then plot w_val = w_hist[k] g_val = c_hist[k] ax.scatter(w_val, g_val, s=90, c=self.colorspec[k], edgecolor='k', linewidth=0.5 * ((1 / (float(k) + 1)))**(0.4), zorder=3, marker='X') # evaluation on function ax.scatter(w_val, 0, s=90, facecolor=self.colorspec[k], edgecolor='k', linewidth=0.5 * ((1 / (float(k) + 1)))**(0.4), zorder=3)
return probe if __name__ == '__main__': # grid_delta = np.load('adhesin/phantom/grid_delta.npy') # grid_beta = np.load('adhesin/phantom/grid_beta.npy') # grid_delta = np.load('cone_256_foam/phantom/grid_delta.npy') # grid_beta = np.load('cone_256_foam/phantom/grid_beta.npy') grid_delta = dxchange.read_tiff( 'cone_256_foam/test0/intermediate/current.tiff') grid_beta = np.load('cone_256_foam/phantom/grid_beta.npy') grid_delta = np.reshape(grid_delta, [1, *grid_delta.shape]) grid_beta = np.reshape(grid_beta, [1, *grid_beta.shape]) probe_real = np.ones([*grid_delta.shape[1:3]]) probe_imag = np.zeros([*grid_delta.shape[1:3]]) # f = open('test/conv_ir_report.csv', 'a') # f.write('kernel_size,time\n') wavefield, probe_array, t = multislice_propagate_cnn(grid_delta, grid_beta, probe_real, probe_imag, 5000, [1e-7] * 3, kernel_size=17, free_prop_cm=None, debug=True) dxchange.write_tiff(np.array(probe_array),
from numpy import exp, log, sqrt from scipy.misc import logsumexp from autograd import grad, hessian import distributions as dist #import seaborn as sns import matplotlib as mpl import matplotlib.pyplot as plt x = np.linspace(-2, 8, 1000) sp.random.seed(2) targd = dist.mixt(1, [ dist.mvnorm(np.ones(1), np.ones(1)), dist.mvnorm(np.ones(1) + 3.8, np.ones(1)) ], [0.7, 0.3]) q0 = dist.mvnorm(np.ones(1) + 3, np.ones(1) * 2) samps = q0.rvs(20) lw = targd.logpdf(samps).flatten() - q0.logpdf(samps) lw = lw - logsumexp(lw) q1 = dist.mixt(1, [dist.mvnorm(mu, np.ones(1)) for mu in samps], lw.flatten(), comp_w_in_logspace=True) fig, ax = plt.subplots(figsize=(6, 3)) ax.plot(x, exp(targd.logpdf(x)), label='target density', linewidth=2) ax.plot(x, exp(q0.logpdf(x)), '-.', label='q0', linewidth=2) ax.plot(x, exp(q1.logpdf(x)), '--', label='q1', linewidth=2) ax.legend(loc='best') ax.set_xticks([])
def sample(self, T, input=None, tag=None, prefix=None, with_noise=True): N = self.N K = self.K D = (self.D, ) if isinstance(self.D, int) else self.D M = (self.M, ) if isinstance(self.M, int) else self.M assert isinstance(D, tuple) assert isinstance(M, tuple) # If prefix is given, pad the output with it if prefix is None: pad = 1 z = np.zeros(T + 1, dtype=int) x = np.zeros((T + 1, ) + D) # input = np.zeros((T+1,) + M) if input is None else input input = np.zeros((T + 1, ) + M) if input is None else np.concatenate( (np.zeros((1, ) + M), input)) xmask = np.ones((T + 1, ) + D, dtype=bool) # Sample the first state from the initial distribution pi0 = self.init_state_distn.initial_state_distn z[0] = npr.choice(self.K, p=pi0) x[0] = self.dynamics.sample_x(z[0], x[:0], tag=tag, with_noise=with_noise) else: zhist, xhist, yhist = prefix pad = len(zhist) assert zhist.dtype == int and zhist.min() >= 0 and zhist.max() < K assert xhist.shape == (pad, D) assert yhist.shape == (pad, N) z = np.concatenate((zhist, np.zeros(T, dtype=int))) x = np.concatenate((xhist, np.zeros((T, ) + D))) # input = np.zeros((T+pad,) + M) if input is None else input input = np.zeros((T + pad, ) + M) if input is None else np.concatenate( (np.zeros((pad, ) + M), input)) xmask = np.ones((T + pad, ) + D, dtype=bool) # Sample z and x for t in range(pad, T + pad): Pt = np.exp( self.transitions.log_transition_matrices(x[t - 1:t + 1], input[t - 1:t + 1], mask=xmask[t - 1:t + 1], tag=tag))[0] z[t] = npr.choice(self.K, p=Pt[z[t - 1]]) x[t] = self.dynamics.sample_x(z[t], x[:t], input=input[t], tag=tag, with_noise=with_noise) # Sample observations given latent states # TODO: sample in the loop above? y = self.emissions.sample(z, x, input=input, tag=tag) return z[pad:], x[pad:], y[pad:]
def __init__(self, N, K, D, M=0, single_subspace=True, **kwargs): super(_StudentsTEmissionsMixin, self).__init__(N, K, D, M, single_subspace=single_subspace, **kwargs) self.inv_etas = -4 + npr.randn(1, N) if single_subspace else npr.randn(K, N) self.inv_nus = np.log(4) * np.ones(1, N) if single_subspace else np.log(4) * np.ones(K, N)
def fit( self, df, duration_col=None, event_col=None, show_progress=False, timeline=None, weights_col=None, robust=False, initial_point=None, ): """ Fit the accelerated failure time model to a dataset. Parameters ---------- df: DataFrame a Pandas DataFrame with necessary columns `duration_col` and `event_col` (see below), covariates columns, and special columns (weights). `duration_col` refers to the lifetimes of the subjects. `event_col` refers to whether the 'death' events was observed: 1 if observed, 0 else (censored). duration_col: string the name of the column in DataFrame that contains the subjects' lifetimes. event_col: string, optional the name of the column in DataFrame that contains the subjects' death observation. If left as None, assume all individuals are uncensored. show_progress: boolean, optional (default=False) since the fitter is iterative, show convergence diagnostics. Useful if convergence is failing. timeline: array, optional Specify a timeline that will be used for plotting and prediction weights_col: string the column in df that specifies weights per observation. robust: boolean, optional (default=False) Compute the robust errors using the Huber sandwich estimator. initial_point: (d,) numpy array, optional initialize the starting point of the iterative algorithm. Default is the zero vector. Returns ------- self: self with additional new properties: ``print_summary``, ``params_``, ``confidence_intervals_`` and more Examples -------- >>> N, d = 80000, 2 >>> # some numbers take from http://statwonk.com/parametric-survival.html >>> breakpoints = (1, 31, 34, 62, 65) >>> betas = np.array( >>> [ >>> [1.0, -0.2, np.log(15)], >>> [5.0, -0.4, np.log(333)], >>> [9.0, -0.6, np.log(18)], >>> [5.0, -0.8, np.log(500)], >>> [2.0, -1.0, np.log(20)], >>> [1.0, -1.2, np.log(500)], >>> ] >>> ) >>> X = 0.1 * np.random.exponential(size=(N, d)) >>> X = np.c_[X, np.ones(N)] >>> T = np.empty(N) >>> for i in range(N): >>> lambdas = np.exp(-betas.dot(X[i, :])) >>> T[i] = piecewise_exponential_survival_data(1, breakpoints, lambdas)[0] >>> T_censor = np.minimum( >>> T.mean() * np.random.exponential(size=N), 110 >>> ) # 110 is the end of observation, eg. current time. >>> df = pd.DataFrame(X[:, :-1], columns=["var1", "var2"]) >>> df["T"] = np.round(np.maximum(np.minimum(T, T_censor), 0.1), 1) >>> df["E"] = T <= T_censor >>> pew = PiecewiseExponentialRegressionFitter(breakpoints=breakpoints, penalizer=0.0001).fit(df, "T", "E") >>> pew.print_summary() >>> pew.plot() """ if duration_col is None: raise TypeError("duration_col cannot be None.") self._time_fit_was_called = datetime.utcnow().strftime( "%Y-%m-%d %H:%M:%S") + " UTC" self.duration_col = duration_col self.event_col = event_col self.weights_col = weights_col self._n_examples = df.shape[0] self.timeline = timeline self.robust = robust df = df.copy() T = pass_for_numeric_dtypes_or_raise_array( df.pop(duration_col)).astype(float) E = (pass_for_numeric_dtypes_or_raise_array(df.pop(self.event_col)) if (self.event_col is not None) else pd.Series(np.ones( self._n_examples, dtype=bool), index=df.index, name="E")) weights = (pass_for_numeric_dtypes_or_raise_array( df.pop(self.weights_col)).astype(float) if (self.weights_col is not None) else pd.Series( np.ones(self._n_examples, dtype=float), index=df.index, name="weights")) # check to make sure their weights are okay if self.weights_col: if (weights.astype(int) != weights).any() and not self.robust: warnings.warn( dedent( """It appears your weights are not integers, possibly propensity or sampling scores then? It's important to know that the naive variance estimates of the coefficients are biased. Instead a) set `robust=True` in the call to `fit`, or b) use Monte Carlo to estimate the variances. See paper "Variance estimation when using inverse probability of treatment weighting (IPTW) with survival analysis""" ), StatisticalWarning, ) if (weights <= 0).any(): raise ValueError( "values in weight column %s must be positive." % self.weights_col) df = df.astype(float) self._check_values(df, T, E, self.event_col) E = E.astype(bool) self.durations = T.copy() self.event_observed = E.copy() self.weights = weights.copy() if np.any(self.durations <= 0): raise ValueError( "This model does not allow for non-positive durations. Suggestion: add a small positive value to zero elements." ) if self.fit_intercept: assert "_intercept" not in df df["_intercept"] = 1.0 self._LOOKUP_SLICE = self._create_slicer(len(df.columns)) _norm_std = df.std(0) self._norm_mean = df.mean(0) # if we included an intercept, we need to fix not divide by zero. if self.fit_intercept: _norm_std["_intercept"] = 1.0 else: _norm_std[_norm_std < 1e-8] = 1.0 _index = pd.MultiIndex.from_tuples( sum([[(name, c) for c in df.columns] for name in self._fitted_parameter_names], [])) self._norm_std = pd.Series(np.concatenate([_norm_std.values] * self.n_breakpoints), index=_index) _params, self._log_likelihood, self._hessian_ = self._fit_model( T.values, E.values, weights.values, normalize(df, 0, _norm_std).values, show_progress=show_progress, initial_point=initial_point, ) self.params_ = _params / self._norm_std self.variance_matrix_ = self._compute_variance_matrix() self.standard_errors_ = self._compute_standard_errors( T.values, E.values, weights.values, df.values) self.confidence_intervals_ = self._compute_confidence_intervals() self._predicted_cumulative_hazard_ = self.predict_cumulative_hazard( df, times=[np.percentile(T, 75)]).T return self
def fun(x): return np.tensordot(x * np.ones((2, 2)), x * np.ones((2, 2)), 2)
print("restoring pickled initialization weights") unpickled = np.load(picklefilename) init_weights = unpickled['init_weights'] else: # Build initialization objective objective, init_gradient = build_init_objective( L2_VAR_2, NUM_TRAIN, train_images, train_labels, C, D, L) # Build callback for ADAM optimizer def init_callback(params, t, g): lik = -objective(params, t) print("Initialization iteration {} log-likelihood {}".format( t, lik)) # initialize weights pre_init_weights = np.ones(L) # optimize weights print("Initializing weights...") init_weights = adam(init_gradient, pre_init_weights, step_size=0.1, num_iters=INIT_ITERS, callback=init_callback) # pickle processed data in /cache (if doesn't already exist) if not os.path.exists('cache'): print('creating cache folder') os.makedirs('cache') if not os.path.isfile(picklefilename): print('saving pickled regression initalization data')
# Make an LDS with somewhat interesting dynamics parameters true_lds = LDS(N, D, emissions="calcium", emission_kwargs={"bin_size":bin_size,"link":link_func}) A0 = .99 * random_rotation(D, theta=np.pi/40) # S = (1 + 3 * npr.rand(D)) S = np.arange(1, D+1) R = np.linalg.svd(npr.randn(D, D))[0] * S A = R.dot(A0).dot(np.linalg.inv(R)) b = np.zeros(D) true_lds.dynamics.As[0] = A true_lds.dynamics.bs[0] = b true_lds.dynamics.Sigmas = true_lds.dynamics.Sigmas / np.max(true_lds.dynamics.Sigmas[0]) * 0.5 # true_lds.dynamics.Sigmas = true_lds.dynamics.Sigmas # true_lds.emissions.ds[0] = np.clip(npr.randn(N), -10, 0.1) true_lds.emissions.ds[0] = 10.0 + 3.0 * npr.randn(N) true_lds.emissions.As[0] = np.clip(0.85 + 0.05 * npr.randn(N), 0.8, 0.95) true_lds.emissions.betas[0] = 1.0 * np.ones(N) # set noise on correct scale true_lds.emissions.inv_etas[0] = np.log(1e-2 * np.ones(N)) x, y = true_lds.sample(T) smooth_y = true_lds.smooth(x, y) plt.ion() plt.figure() plt.plot(x) plt.figure() for n in range(N): plt.plot(y[:, n] + 10 * n, '-k') # plt.plot(smooth_y[:, n] + 4 * n, 'r--')
num_samples = 50 num_langevin_steps = 15 num_sampler_optimization_steps = 100 sampler_learn_rate = 0.001 images_per_row = 10 layer_sizes = [784, 200, 100, 10] L2_reg = 1.0 D = 784 init_init_stddev_scale = 0.2 init_langevin_stepsize = 0.0001 init_langevin_noise_size = 0.000001 init_gradient_power = 0.95 init_log_gradient_scales = np.log(np.ones((1, D))) prior_relax = 0.05 prior_downscale = 0.1 # train_mnist_model() # Comment after running once. with open('mnist_models.pkl') as f: trained_weights, all_mean, all_cov = pickle.load(f) # Regularize all_cov all_cov = prior_downscale * (all_cov + prior_relax * np.eye(D)) N_weights, predict_fun, loss_fun, frac_err, nn_loglik = make_nn_funs( layer_sizes, L2_reg)
from numpy import exp, log, sqrt from scipy.misc import logsumexp from autograd import grad, hessian import distributions as dist #import seaborn as sns import matplotlib as mpl import matplotlib.pyplot as plt x = np.linspace(-2, 8, 1000) sp.random.seed(5) targd = dist.mixt(1, [ dist.mvnorm(np.ones(1) + 1, np.ones(1)), dist.mvnorm(np.ones(1) + 3.8, np.ones(1)) ], [0.8, 0.2]) qrw = dist.mvnorm(np.zeros(1), np.ones(1) * 0.4) qind = dist.mvnorm(np.ones(1) * 2, np.ones(1) * 0.2) fig, ax = plt.subplots(figsize=(4, 2)) ax.plot(x, exp(targd.logpdf(x)) / exp(targd.logpdf(x)).max(), label=r'$\pi$', linewidth=2) for i in range(3): current = targd.rvs().flatten() ax.plot(x, exp(qrw.logpdf(x - current)) / exp(qrw.logpdf(x - current)).max() /
def read_image_data_knn(): print('Reading image data ...') train_x = np.load('../../Data/data_train_knn.npy') train_y = np.load('../../Data/train_labels_knn.npy') test_x = np.load('../../Data/data_test.npy') return (train_x, train_y, test_x) ############################################################################ train_x, train_y, test_x = read_image_data() print('Train=', train_x.shape) print('Test=', test_x.shape) # Create dummy test output values to compute accuracy test_y = np.ones(test_x.shape[0]) predicted_y = np.random.randint(0, 4, test_x.shape[0]) print('DUMMY Accuracy=%0.4f' % accuracy_score(test_y, predicted_y, normalize=True)) # Select the section to run by changing the associated boolean to True # Code for 2b run2 = False # Code for 3b run3 = False # Code for 4a run4 = False # Code for 5a run5a = True # Code for 5b run5b = True
def initialize_variational_params(self, data, input, mask, tag): data = interpolate_data(data, mask) q_mu = data.copy() q_sigma_inv = -4 * np.ones((data.shape[0], self.D)) return q_mu, q_sigma_inv
def m_step(self, expectations, datas, inputs, masks, tags, **kwargs): expected_joints = sum([np.sum(Ezzp1, axis=0) for _, Ezzp1, _ in expectations]) + 1e-16 expected_joints += self.kappa * np.eye(self.K) + (self.alpha-1) * np.ones((self.K, self.K)) P = (expected_joints / expected_joints.sum(axis=1, keepdims=True)) + 1e-16 assert np.all(P >= 0), "mode is well defined only when transition matrix entries are non-negative! Check alpha >= 1" self.log_Ps = np.log(P)
def sample(self, T, prefix=None, input=None, tag=None, with_noise=True): """ Sample synthetic data from the model. Optionally, condition on a given prefix (preceding discrete states and data). Parameters ---------- T : int number of time steps to sample prefix : (zpre, xpre) Optional prefix of discrete states (zpre) and continuous states (xpre) zpre must be an array of integers taking values 0...num_states-1. xpre must be an array of the same length that has preceding observations. input : (T, input_dim) array_like Optional inputs to specify for sampling tag : object Optional tag indicating which "type" of sampled data with_noise : bool Whether or not to sample data with noise. Returns ------- z_sample : array_like of type int Sequence of sampled discrete states x_sample : (T x observation_dim) array_like Array of sampled data """ K = self.K D = (self.D, ) if isinstance(self.D, int) else self.D M = (self.M, ) if isinstance(self.M, int) else self.M assert isinstance(D, tuple) assert isinstance(M, tuple) assert T > 0 # Check the inputs if input is not None: assert input.shape == (T, ) + M # Get the type of the observations dummy_data = self.observations.sample_x(0, np.empty(0, ) + D) dtype = dummy_data.dtype # Initialize the data array if prefix is None: # No prefix is given. Sample the initial state as the prefix. pad = 1 z = np.zeros(T, dtype=int) data = np.zeros((T, ) + D, dtype=dtype) input = np.zeros((T, ) + M) if input is None else input mask = np.ones((T, ) + D, dtype=bool) # Sample the first state from the initial distribution pi0 = self.init_state_distn.initial_state_distn z[0] = npr.choice(self.K, p=pi0) data[0] = self.observations.sample_x(z[0], data[:0], input=input[0], with_noise=with_noise) # We only need to sample T-1 datapoints now T = T - 1 else: # Check that the prefix is of the right type zpre, xpre = prefix pad = len(zpre) assert zpre.dtype == int and zpre.min() >= 0 and zpre.max() < K assert xpre.shape == (pad, ) + D # Construct the states, data, inputs, and mask arrays z = np.concatenate((zpre, np.zeros(T, dtype=int))) data = np.concatenate((xpre, np.zeros((T, ) + D, dtype))) input = np.zeros((T + pad, ) + M) if input is None else np.concatenate( (np.zeros((pad, ) + M), input)) mask = np.ones((T + pad, ) + D, dtype=bool) # Convert the discrete states to the range (1, ..., K_total) m = self.state_map K_total = len(m) _, starts = np.unique(m, return_index=True) z = starts[z] # Fill in the rest of the data for t in range(pad, pad + T): Pt = self.transitions.transition_matrices(data[t - 1:t + 1], input[t - 1:t + 1], mask=mask[t - 1:t + 1], tag=tag)[0] z[t] = npr.choice(K_total, p=Pt[z[t - 1]]) data[t] = self.observations.sample_x(m[z[t]], data[:t], input=input[t], tag=tag, with_noise=with_noise) # Collapse the states z = m[z] # Return the whole data if no prefix is given. # Otherwise, just return the simulated part. if prefix is None: return z, data else: return z[pad:], data[pad:]
def main(): ### Setup # autoencoder, encoder, decoder = load_autoencoder('models/12 17PM November 08 2017.h5')#models/2.7e-06.h5') # def encode(q): # return encoder.predict(numpy.array([q]))[0].astype(numpy.float64) # def decode(z): # return decoder.predict(numpy.array([z]))[0].astype(numpy.float64) encode, decode = train_model() # Constants d = 2 # dimensions I = numpy.identity(d) B = numpy.concatenate((I, -I), axis=1) # Difference matrix # Simulation Parameters spring_const = 10.0 # Technically could vary per spring h = 0.005 mass = 0.05 # Initial conditions starting_stretch = 1 #0.6 starting_points = numpy.array([ [0, 1], [0, 0], [1, 1], [1, 0], [2, 1], [2, 0], [3, 1], [3, 0], [4, 1], [4, 0], ]) * 0.1 + 0.3 n_points = len(starting_points) # Num points q_initial = starting_points.flatten() z_initial = encode(q_initial) print("HIOIIIIII") print(q_initial) print(z_initial) print(decode(z_initial)) pinned_points = numpy.array([0, 1]) q_mask = numpy.ones(n_points * d, dtype=bool) q_mask[numpy.concatenate([pinned_points * d + i for i in range(d)])] = False springs = [ (0, 2), (0, 3), (2, 3), (1, 2), (1, 3), (2, 4), (2, 3), (2, 5), (3, 5), (3, 4), (4, 5), (4, 6), (4, 7), (5, 6), (5, 7), (6, 7), (6, 8), (6, 9), (7, 8), (7, 9), (8, 9), ] n_springs = len(springs) P_matrices = construct_P_matrices(springs, n_points, d) all_spring_offsets = (B @ (P_matrices @ q_initial).T).T rest_lens = numpy.linalg.norm(all_spring_offsets, axis=1) * starting_stretch mass_matrix = numpy.identity(len(q_initial)) * mass # Mass matrix external_forces = numpy.array([0, -9.8] * n_points) def kinetic_energy(q_k, q_k1): """ Profile this to see if using numpy.dot is different from numpy.matmul (@)""" d_q = q_k1 - q_k energy = 1.0 / (2 * h**2) * d_q.T @ mass_matrix @ d_q return energy def potential_energy(q_k, q_k1): q_tilde = 0.5 * (q_k + q_k1) # sum = 0.0 # for i in range(len(rest_lens)): # TODO I might be able to do this simply with @ (see all_spring_offsets) # P_i = P_matrices[i] # l_i = rest_lens[i] # d_q_tilde_i_sq = q_tilde.T @ P_i.T @ B.T @ B @ P_i @ q_tilde # sum += (1.0 - 1.0 / l_i * numpy.sqrt(d_q_tilde_i_sq)) ** 2 # Optimized but ugly version sum = numpy.sum((1.0 - (1.0 / rest_lens) * numpy.sqrt( numpy.einsum('ij,ij->i', q_tilde.T @ P_matrices.transpose( (0, 2, 1)) @ B.T, (B @ P_matrices @ q_tilde))))**2) return 0.5 * spring_const * sum def discrete_lagrangian(q_k, q_k1): return kinetic_energy(q_k, q_k1) - potential_energy(q_k, q_k1) D1_Ld = autograd.grad(discrete_lagrangian, 0) # (q_t, q_t+1) -> R^N*d D2_Ld = autograd.grad(discrete_lagrangian, 1) # (q_t-1, q_t) -> R^N*d # Want D1_Ld + D2_Ld = 0 # Do root finding def DEL(new_q, cur_q, prev_q): # SUPER hacky way of adding constrained points for i in pinned_points: new_q = numpy.insert(new_q, i * d, q_initial[i * d]) new_q = numpy.insert(new_q, i * d + 1, q_initial[i * d + 1]) res = D1_Ld(cur_q, new_q) + D2_Ld( prev_q, cur_q) + mass_matrix @ external_forces # SUPER hacky way of adding constrained points return res[q_mask] jac_DEL = autograd.jacobian(DEL, 0) def latent_DEL(new_z, cur_q, prev_q): new_q = decode(new_z) res = D1_Ld(cur_q, new_q) + D2_Ld( prev_q, cur_q) + mass_matrix @ external_forces return res def latent_DEL_objective(new_z, cur_q, prev_q): res = latent_DEL(new_z, cur_q, prev_q) return res.T @ res ### Simulation q_history = [] save_freq = 1 current_frame = 0 output_path = 'pca_ae_configurations.pickle' prev_q = q_initial cur_q = q_initial prev_z = z_initial cur_z = z_initial while True: #sol = optimize.root(latent_DEL, cur_z, method='broyden1', args=(cur_q, prev_q))#, jac=jac_DEL) # Note numerical jacobian seems much faster sol = optimize.minimize(latent_DEL_objective, cur_z, args=(cur_q, prev_q), method='L-BFGS-B', options={ 'gtol': 1e-6, 'eps': 1e-06, 'disp': False }) prev_z = cur_z cur_z = sol.x prev_q = cur_q cur_q = decode(cur_z) render(cur_q * 10, springs, save_frames=True) if save_freq > 0: current_frame += 1 q_history.append(cur_q) if current_frame % save_freq == 0: with open(output_path, 'wb') as f: pickle.dump(q_history, f)
def activations(weights, *args): cat_state = np.concatenate(args + (np.ones((args[0].shape[0],1)),), axis=1) return np.dot(cat_state, weights)
import autograd.numpy as np from qutip import qeye, sigmax, sigmay, tensor, fock from tested.getCtrl import getCtrl, pltCtrl from tested.getFid import getFid from tested.minimize import minimize a0 = np.ones([2, 6, 3]) T = np.pi sx = np.array(sigmax().full()) sy = np.array(sigmay().full()) U0 = np.array(qeye(2).full()) Ugoal = sx Hcs = [sx, sy] def Hat(a, t): c = getCtrl(a, t, T) return sum([c[i] * Hcs[i] for i in range(len(Hcs))]) def goalFunc(U): return 1 - getFid(U, Ugoal) a = minimize(a0, U0, Hat, T, goalFunc) pltCtrl(a, T)
(r[..., np.newaxis], g[..., np.newaxis], b[..., np.newaxis]), axis=2)) ax.set_xticks([]) ax.set_yticks([]) plt.draw() if render: plt.savefig('step{0:03d}.png'.format(t), bbox_inches='tight') plt.pause(0.001) if __name__ == '__main__': simulation_timesteps = 20 print("Loading initial and target states...") init_vx = np.ones((rows, cols)) init_vy = np.zeros((rows, cols)) # Initialize the occlusion to be a block. init_occlusion = -np.ones((rows, cols)) init_occlusion[15:25, 15:25] = 0.0 init_occlusion = init_occlusion.ravel() def drag(vx): return np.mean(init_vx - vx) def lift(vy): return np.mean(vy - init_vy) def objective(params): cur_occlusion = np.reshape(params, (rows, cols))
plt.tight_layout() if save_figures: plt.savefig("hmm_2.pdf") # # Fit an HMM to this synthetic data # In[7]: N_iters = 50 hmm = ssm.HMM(K, D, observations="gaussian") hmm_lls = hmm.fit(y, method="em", num_em_iters=N_iters) plt.plot(hmm_lls, label="EM") plt.plot([0, N_iters], true_ll * np.ones(2), ':k', label="True") plt.xlabel("EM Iteration") plt.ylabel("Log Probability") plt.legend(loc="lower right") # In[8]: # Find a permutation of the states that best matches the true and inferred states hmm.permute(find_permutation(z, hmm.most_likely_states(y))) # In[11]: # Plot the true and inferred discrete states hmm_z = hmm.most_likely_states(y) plt.figure(figsize=(8, 4))
def log_marginal_likelihood(params, x, y): mean, cov_params, noise_scale = unpack_kernel_params(params) cov_y_y = cov_func(cov_params, x, x) + noise_scale * np.eye(len(y)) prior_mean = mean * np.ones(len(y)) return mvn.logpdf(y, prior_mean, cov_y_y)
def plot_fit_and_feature_space(self,w,model,feat,**kwargs): # construct figure fig = plt.figure(figsize=(9,4)) # create subplot with 2 panels gs = gridspec.GridSpec(1, 2, width_ratios=[1,1]) ax1 = plt.subplot(gs[0]); ax2 = plt.subplot(gs[1]); view = [20,20] if 'view' in kwargs: view = kwargs['view'] ##### plot left panel in original space #### # scatter points xmin,xmax,ymin,ymax = self.scatter_pts_2d(self.x,ax1) # clean up panel ax1.set_xlim([xmin,xmax]) ax1.set_ylim([ymin,ymax]) # label axes ax1.set_xlabel(r'$x$', fontsize = 16) ax1.set_ylabel(r'$y$', rotation = 0,fontsize = 16,labelpad = 10) # create fit s = np.linspace(xmin,xmax,300)[np.newaxis,:] normalizer = lambda a: a if 'normalizer' in kwargs: normalizer = kwargs['normalizer'] t = np.tanh(model(normalizer(s),w)) ax1.plot(s.flatten(),t.flatten(),linewidth = 2,c = 'lime') #### plot fit in transformed feature space ##### # check if feature transform has internal parameters x_transformed = 0 sig = signature(feat) if len(sig.parameters) == 2: if np.shape(w)[1] == 1: x_transformed = feat(normalizer(self.x),w) else: x_transformed = feat(normalizer(self.x),w[0]) else: x_transformed = feat(normalizer(self.x)) # two dimensional transformed feature space if x_transformed.shape[0] == 1: s = np.linspace(xmin,xmax,300)[np.newaxis,:] # scatter points xmin,xmax,ymin,ymax = self.scatter_pts_2d(x_transformed,ax2) # produce plot s2 = copy.deepcopy(s) if len(sig.parameters) == 2: if np.shape(w)[1] == 1: s2 = feat(normalizer(s),w) else: s2 = feat(normalizer(s),w[0]) else: s2 = feat(normalizer(s)) t = np.tanh(model(normalizer(s),w)) ax2.plot(s2.flatten(),t.flatten(),linewidth = 2,c = 'lime') # label axes ax2.set_xlabel(r'$f\left(x,\mathbf{w}^{\star}\right)$', fontsize = 16) ax2.set_ylabel(r'$y$', rotation = 0,fontsize = 16,labelpad = 10) # three dimensional transformed feature space if x_transformed.shape[0] == 2: # create panel ax2 = plt.subplot(gs[1],projection = '3d'); s = np.linspace(xmin,xmax,100)[np.newaxis,:] # plot data in 3d xmin,xmax,xmin1,xmax1,ymin,ymax = self.scatter_3d_points(x_transformed,ax2) # create and plot fit s2 = copy.deepcopy(s) if len(sig.parameters) == 2: s2 = feat(normalizer(s),w[0]) else: s2 = feat(normalizer(s)) # reshape for plotting a = s2[0,:] b = s2[1,:] a = np.linspace(xmin,xmax,100) b = np.linspace(xmin1,xmax1,100) a,b = np.meshgrid(a,b) # get firstem a.shape = (1,np.size(s)**2) f1 = feat(normalizer(a))[0,:] # secondm b.shape = (1,np.size(s)**2) f2 = feat(normalizer(b))[1,:] # tack a 1 onto the top of each input point all at once c = np.vstack((a,b)) o = np.ones((1,np.shape(c)[1])) c = np.vstack((o,c)) r = np.tanh(np.dot(c.T,w)) # various a.shape = (np.size(s),np.size(s)) b.shape = (np.size(s),np.size(s)) r.shape = (np.size(s),np.size(s)) ax2.plot_surface(a,b,r,alpha = 0.1,color = 'lime',rstride=15, cstride=15,linewidth=0.5,edgecolor = 'k') ax2.set_xlim([np.min(a),np.max(a)]) ax2.set_ylim([np.min(b),np.max(b)]) ''' a,b = np.meshgrid(t1,t2) a.shape = (1,np.size(s)**2) b.shape = (1,np.size(s)**2) ''' ''' c = np.vstack((a,b)) o = np.ones((1,np.shape(c)[1])) c = np.vstack((o,c)) # tack a 1 onto the top of each input point all at once r = (np.dot(c.T,w)) a.shape = (np.size(s),np.size(s)) b.shape = (np.size(s),np.size(s)) r.shape = (np.size(s),np.size(s)) ax2.plot_surface(a,b,r,alpha = 0.1,color = 'lime',rstride=15, cstride=15,linewidth=0.5,edgecolor = 'k') ''' # label axes #self.move_axis_left(ax2) ax2.set_xlabel(r'$f_1(x)$', fontsize = 12,labelpad = 5) ax2.set_ylabel(r'$f_2(x)$', rotation = 0,fontsize = 12,labelpad = 5) ax2.set_zlabel(r'$y$', rotation = 0,fontsize = 12,labelpad = 0) self.move_axis_left(ax2) ax2.xaxis.set_major_formatter(FormatStrFormatter('%.1f')) ax2.yaxis.set_major_formatter(FormatStrFormatter('%.1f')) ax2.view_init(view[0],view[1])
def test_lds_log_probability_perf(T=1000, D=10, N_iter=10): """ Compare performance of banded method vs message passing in pylds. """ print("Comparing methods for T={} D={}".format(T, D)) from pylds.lds_messages_interface import kalman_info_filter, kalman_filter # Convert LDS parameters into info form for pylds As, bs, Qi_sqrts, ms, Ri_sqrts = make_lds_parameters(T, D) Qis = np.matmul(Qi_sqrts, np.swapaxes(Qi_sqrts, -1, -2)) Ris = np.matmul(Ri_sqrts, np.swapaxes(Ri_sqrts, -1, -2)) x = npr.randn(T, D) print("Timing banded method") start = time.time() for itr in range(N_iter): lds_log_probability(x, As, bs, Qi_sqrts, ms, Ri_sqrts) stop = time.time() print("Time per iter: {:.4f}".format((stop - start) / N_iter)) # Compare to Kalman Filter mu_init = np.zeros(D) sigma_init = np.eye(D) Bs = np.ones((D, 1)) sigma_states = np.linalg.inv(Qis) Cs = np.eye(D) Ds = np.zeros((D, 1)) sigma_obs = np.linalg.inv(Ris) inputs = bs data = ms print("Timing PyLDS message passing (kalman_filter)") start = time.time() for itr in range(N_iter): kalman_filter(mu_init, sigma_init, np.concatenate([As, np.eye(D)[None, :, :]]), Bs, np.concatenate([sigma_states, np.eye(D)[None, :, :]]), Cs, Ds, sigma_obs, inputs, data) stop = time.time() print("Time per iter: {:.4f}".format((stop - start) / N_iter)) # Info form comparison J_init = np.zeros((D, D)) h_init = np.zeros(D) log_Z_init = 0 J_diag, J_lower_diag, h = convert_lds_to_block_tridiag( As, bs, Qi_sqrts, ms, Ri_sqrts) J_pair_21 = J_lower_diag J_pair_22 = J_diag[1:] J_pair_11 = J_diag[:-1] J_pair_11[1:] = 0 h_pair_2 = h[1:] h_pair_1 = h[:-1] h_pair_1[1:] = 0 log_Z_pair = 0 J_node = np.zeros((T, D, D)) h_node = np.zeros((T, D)) log_Z_node = 0 print("Timing PyLDS message passing (kalman_info_filter)") start = time.time() for itr in range(N_iter): kalman_info_filter(J_init, h_init, log_Z_init, J_pair_11, J_pair_21, J_pair_22, h_pair_1, h_pair_2, log_Z_pair, J_node, h_node, log_Z_node) stop = time.time() print("Time per iter: {:.4f}".format((stop - start) / N_iter))
def fprop(params): """ Forward pass of the NTM. """ W = params # aliasing for brevity xs, hs, ys, ps, ts, os = {}, {}, {}, {}, {}, {} def l(): """ Silly utility function that should be called in init. """ return [{} for _ in xrange(self.heads)] rs = l() k_rs, beta_rs, g_rs, s_rs, gamma_rs = l(),l(),l(),l(),l() k_ws, beta_ws, g_ws, s_ws, gamma_ws = l(),l(),l(),l(),l() adds, erases = l(),l() w_ws, w_rs = l(),l() # read weights and write weights for idx in range(self.heads): rs[idx][-1] = self.W['rsInit' + str(idx)] # stores values read from memory w_ws[idx][-1] = softmax(self.W['w_wsInit' + str(idx)]) w_rs[idx][-1] = softmax(self.W['w_rsInit' + str(idx)]) mems = {} # the state of the memory at every timestep mems[-1] = self.W['memsInit'] loss = 0 for t in xrange(len(inputs)): xs[t] = np.reshape(np.array(inputs[t]),inputs[t].shape[::-1]) rsum = 0 for idx in range(self.heads): rsum = rsum + np.dot(W['rh' + str(idx)], np.reshape(rs[idx][t-1],(self.M,1))) hs[t] = np.tanh(np.dot(W['xh'], xs[t]) + rsum + W['bh']) os[t] = np.tanh(np.dot(W['ho'], hs[t]) + W['bo']) for idx in range(self.heads): # parameters to the read head k_rs[idx][t] = np.tanh(np.dot(W['ok_r' + str(idx)],os[t]) + W['bk_r' + str(idx)]) beta_rs[idx][t] = softplus(np.dot(W['obeta_r' + str(idx)],os[t]) + W['bbeta_r' + str(idx)]) g_rs[idx][t] = sigmoid(np.dot(W['og_r' + str(idx)],os[t]) + W['bg_r' + str(idx)]) s_rs[idx][t] = softmax(np.dot(W['os_r' + str(idx)],os[t]) + W['bs_r' + str(idx)]) gamma_rs[idx][t] = 1 + sigmoid(np.dot(W['ogamma_r' + str(idx)], os[t]) + W['bgamma_r' + str(idx)]) # parameters to the write head k_ws[idx][t] = np.tanh(np.dot(W['ok_w' + str(idx)],os[t]) + W['bk_w' + str(idx)]) beta_ws[idx][t] = softplus(np.dot(W['obeta_w' + str(idx)], os[t]) + W['bbeta_w' + str(idx)]) g_ws[idx][t] = sigmoid(np.dot(W['og_w' + str(idx)],os[t]) + W['bg_w' + str(idx)]) s_ws[idx][t] = softmax(np.dot(W['os_w' + str(idx)],os[t]) + W['bs_w' + str(idx)]) gamma_ws[idx][t] = 1 + sigmoid(np.dot(W['ogamma_w' + str(idx)], os[t]) + W['bgamma_w' + str(idx)]) # the erase and add vectors # these are also parameters to the write head # but they describe "what" is to be written rather than "where" adds[idx][t] = np.tanh(np.dot(W['oadds' + str(idx)], os[t]) + W['badds' + str(idx)]) erases[idx][t] = sigmoid(np.dot(W['oerases' + str(idx)], os[t]) + W['erases' + str(idx)]) w_ws[idx][t] = addressing.create_weights( k_ws[idx][t] , beta_ws[idx][t] , g_ws[idx][t] , s_ws[idx][t] , gamma_ws[idx][t] , w_ws[idx][t-1] , mems[t-1]) w_rs[idx][t] = addressing.create_weights( k_rs[idx][t] , beta_rs[idx][t] , g_rs[idx][t] , s_rs[idx][t] , gamma_rs[idx][t] , w_rs[idx][t-1] , mems[t-1]) ys[t] = np.dot(W['oy'], os[t]) + W['by'] ps[t] = sigmoid(ys[t]) one = np.ones(ps[t].shape) ts[t] = np.reshape(np.array(targets[t]),(self.out_size,1)) epsilon = 2**-23 # to prevent log(0) a = np.multiply(ts[t] , np.log2(ps[t] + epsilon)) b = np.multiply(one - ts[t], np.log2(one-ps[t] + epsilon)) loss = loss - (a + b) for idx in range(self.heads): # read from the memory rs[idx][t] = memory.read(mems[t-1],w_rs[idx][t]) # write into the memory mems[t] = memory.write(mems[t-1],w_ws[idx][t],erases[idx][t],adds[idx][t]) self.stats = [loss, ps, w_rs, w_ws, adds, erases] return np.sum(loss)
true_rarhmm = rARHMM(nb_states=3, dm_obs=2, trans_type='neural') # trajectory lengths T = [1250, 1150, 1025] true_z, x = true_rarhmm.sample(horizon=T) true_ll = true_rarhmm.log_norm(x) rarhmm = rARHMM(nb_states=3, dm_obs=2, trans_type='neural') rarhmm.initialize(x) lls = rarhmm.em(x, nb_iter=100, prec=0., verbose=True) print("true_ll=", true_ll, "hmm_ll=", lls[-1]) plt.figure(figsize=(5, 5)) plt.plot(np.ones(len(lls)) * true_ll, '-r') plt.plot(lls) plt.show() _, rarhmm_z = rarhmm.viterbi(x) _seq = npr.choice(len(x)) rarhmm.permute(permutation(true_z[_seq], rarhmm_z[_seq], K1=3, K2=3)) _, rarhmm_z = rarhmm.viterbi(x[_seq]) plt.figure(figsize=(8, 4)) plt.subplot(211) plt.imshow(true_z[_seq][None, :], aspect="auto", cmap=cmap, vmin=0, vmax=len(colors) - 1) plt.xlim(0, len(x[_seq])) plt.ylabel("$z_{\\mathrm{true}}$") plt.yticks([])
def init_var_params(layer_sizes, scale=-5, scale_mean=1): _, num_weights = shapes_and_num(layer_sizes) return rs.randn(num_weights) * scale_mean, np.ones( num_weights) * scale # mean, log_std
def _add_intercept(self, X): if self.fit_intercept: return np.c_[np.ones(len(X)), X]
# wDc = np.array([1.0]) wDc = np.array([-5.5]) w = np.concatenate((wDc, wfilt, wfilt2)) # Generate simulated dataset # Xmat = np.hstack((np.ones((T,1)), npr.randn(T, D_in))) Cov = np.array([[1.0, 0.4], [0.4, 1.0]]) L = np.linalg.cholesky(Cov) Xstim = (L @ npr.randn(2, T * 2)).T # split in halfs from scipy.linalg import hankel Xmat1 = hankel(Xstim[:, 0], Xstim[:, 0][-19:]) Xmat2 = hankel(Xstim[:, 1], Xstim[:, 1][-19:]) Xmat = np.hstack((np.ones((T * 2, 1)), Xmat1, Xmat2)) # import ssm # from ssm import LDS # from ssm.util import random_rotation # true_lds = LDS(2, D_in, emissions="gaussian") # A0 = .95 * random_rotation(D_in, theta=np.pi/40) # S = np.arange(1, D_in+1) # R = np.linalg.svd(npr.randn(D_in, D_in))[0] * S # A = R.dot(A0).dot(np.linalg.inv(R)) # b = np.zeros(D_in) # true_lds.dynamics.As[0] = A # true_lds.dynamics.bs[0] = b # true_lds.dynamics.Sigmas = true_lds.dynamics.Sigmas / np.max(true_lds.dynamics.Sigmas[0]) * 0.5 # x, y = true_lds.sample(T) # x = x / np.max(x) * 5.0