def _get_responsibilities(self, pi, g, beta, mu_ivp, alpha): """ Gets the posterior responsibilities for each comp. of the mixture. """ probs = [[]]*len(self.N_data) for i, ifx in enumerate(self._ifix): zM = self._forward(g, beta, mu_ivp[i], ifx) for q, yq in enumerate(self.Y_train_): logprob = norm.logpdf( yq, zM[self.data_inds[q], :, q], scale=1/np.sqrt(alpha)) # sum over the dimension component logprob = logprob.sum(-1) if probs[q] == []: probs[q] = logprob else: probs[q] = np.column_stack((probs[q], logprob)) probs = [lp - pi for lp in probs] # subtract the maxmium for exponential normalize probs = [p - np.atleast_1d(p.max(axis=-1))[:, None] for p in probs] probs = [np.exp(p) / np.exp(p).sum(-1)[:, None] for p in probs] return probs
def make_pixel_grid(self): """ makes a stack of points corresponding to each point in a pixel grid with input shape """ y_grid = np.arange(self.nelec.shape[0], dtype=np.float) + 1 x_grid = np.arange(self.nelec.shape[1], dtype=np.float) + 1 xx, yy = np.meshgrid(x_grid, y_grid, indexing='xy') # whenever we flatten and reshape use C ordering... return np.column_stack((xx.ravel(order='C'), yy.ravel(order='C')))
def constructL(d,icf): # Autograd does not support indexed assignment to arrays A[0,0] = x constructL.Lparamidx = d def make_L_col(i): nelems = d-i-1 col = np.concatenate((np.zeros(i+1),icf[constructL.Lparamidx:(constructL.Lparamidx+nelems)])) constructL.Lparamidx += nelems return col columns = [make_L_col(i) for i in range(d)] return np.column_stack(columns)
def create_pixel_grid(image, loc): v_s = image.equa2pixel(loc) bound = image.R minx_b, maxx_b = max(0, int(v_s[0] - bound)), min(int(v_s[0] + bound + 1), image.nelec.shape[1]) miny_b, maxy_b = max(0, int(v_s[1] - bound)), min(int(v_s[1] + bound + 1), image.nelec.shape[0]) y_grid = np.arange(miny_b, maxy_b, dtype=np.float) x_grid = np.arange(minx_b, maxx_b, dtype=np.float) xx, yy = np.meshgrid(x_grid, y_grid, indexing='xy') pixel_grid = np.column_stack((xx.ravel(order='C'), yy.ravel(order='C'))) return xx.astype(int), yy.astype(int),pixel_grid
def resample_photons(self, srcs, verbose=False): """resample photons - store source-specific images""" # first, clear out old sample images for src in srcs: src.clear_sample_images() # generate per-source sample image patch for each fits image in # this field. keep track of photons due to noise noise_sums = {} for band, img in self.img_dict.iteritems(): if verbose: print " ... resampling band %s " % band samp_imgs, noise_sum = \ cel_mcmc.sample_source_photons_single_image_cython( img, [s.params for s in srcs] ) # tell each source to keep track of it's source-specific sampled # images (and the image it was stripped out of) for src, samp_img in zip(srcs, samp_imgs): if samp_img is not None: # cache pixel grid for each sample image y_grid = np.arange(samp_img.y0, samp_img.y1, dtype=np.float) x_grid = np.arange(samp_img.x0, samp_img.x1, dtype=np.float) xx, yy = np.meshgrid(x_grid, y_grid, indexing='xy') pixel_grid = np.column_stack((xx.ravel(order='C'), yy.ravel(order='C'))) src.sample_image_list.append((samp_img, img, pixel_grid)) # keep track of noise sums noise_sums[band] = noise_sum # resample noise parameter in each fits image for band, img in self.img_dict.iteritems(): a_n = self.a_0 + noise_sums[band] b_n = self.b_0 + img.nelec.size eps_tmp = img.epsilon img.epsilon = np.random.gamma(a_n, 1./b_n)
def fit_linear_regression(Xs, ys, weights=None, fit_intercept=True, prior_mean=0, prior_variance=1, nu0=1, Psi0=1 ): """ Fit a linear regression y_i ~ N(Wx_i + b, diag(S)) for W, b, S. :param Xs: array or list of arrays :param ys: array or list of arrays :param fit_intercept: if False drop b """ Xs = Xs if isinstance(Xs, (list, tuple)) else [Xs] ys = ys if isinstance(ys, (list, tuple)) else [ys] assert len(Xs) == len(ys) p, d = Xs[0].shape[1], ys[0].shape[1] assert all([X.shape[1] == p for X in Xs]) assert all([y.shape[1] == d for y in ys]) assert all([X.shape[0] == y.shape[0] for X, y in zip(Xs, ys)]) prior_mean = prior_mean * np.zeros((d, p)) prior_variance = prior_variance * np.eye(p) # Check the weights. Default to all ones. if weights is not None: weights = weights if isinstance(weights, (list, tuple)) else [weights] else: weights = [np.ones(X.shape[0]) for X in Xs] # Add weak prior on intercept if fit_intercept: prior_mean = np.column_stack((prior_mean, np.zeros(d))) prior_variance = block_diag(prior_variance, np.eye(1)) # Compute the posterior J = np.linalg.inv(prior_variance) h = np.dot(J, prior_mean.T) for X, y, weight in zip(Xs, ys, weights): X = np.column_stack((X, np.ones(X.shape[0]))) if fit_intercept else X J += np.dot(X.T * weight, X) h += np.dot(X.T * weight, y) # Solve for the MAP estimate W = np.linalg.solve(J, h).T if fit_intercept: W, b = W[:, :-1], W[:, -1] else: b = 0 # Compute the residual and the posterior variance nu = nu0 Psi = Psi0 * np.eye(d) for X, y, weight in zip(Xs, ys, weights): yhat = np.dot(X, W.T) + b resid = y - yhat nu += np.sum(weight) tmp1 = np.einsum('t,ti,tj->ij', weight, resid, resid) tmp2 = np.sum(weight[:, None, None] * resid[:, :, None] * resid[:, None, :], axis=0) assert np.allclose(tmp1, tmp2) Psi += tmp1 # Get MAP estimate of posterior covariance Sigma = Psi / (nu + d + 1) if fit_intercept: return W, b, Sigma else: return W, Sigma
def fit_linear_regression(Xs, ys, weights=None, mu0=0, sigmasq0=1, alpha0=1, beta0=1, fit_intercept=True): """ Fit a linear regression y_i ~ N(Wx_i + b, diag(S)) for W, b, S. :param Xs: array or list of arrays :param ys: array or list of arrays :param fit_intercept: if False drop b """ Xs = Xs if isinstance(Xs, (list, tuple)) else [Xs] ys = ys if isinstance(ys, (list, tuple)) else [ys] assert len(Xs) == len(ys) D = Xs[0].shape[1] P = ys[0].shape[1] assert all([X.shape[1] == D for X in Xs]) assert all([y.shape[1] == P for y in ys]) assert all([X.shape[0] == y.shape[0] for X, y in zip(Xs, ys)]) mu0 = mu0 * np.zeros((P, D)) sigmasq0 = sigmasq0 * np.eye(D) # Make sure the weights are the weights if weights is not None: weights = weights if isinstance(weights, (list, tuple)) else [weights] else: weights = [np.ones(X.shape[0]) for X in Xs] # Add weak prior on intercept if fit_intercept: mu0 = np.column_stack((mu0, np.zeros(P))) sigmasq0 = block_diag(sigmasq0, np.eye(1)) # Compute the posterior J = np.linalg.inv(sigmasq0) h = np.dot(J, mu0.T) for X, y, weight in zip(Xs, ys, weights): X = np.column_stack((X, np.ones(X.shape[0]))) if fit_intercept else X J += np.dot(X.T * weight, X) h += np.dot(X.T * weight, y) # Solve for the MAP estimate W = np.linalg.solve(J, h).T if fit_intercept: W, b = W[:, :-1], W[:, -1] else: b = 0 # Compute the residual and the posterior variance alpha = alpha0 beta = beta0 * np.ones(P) for X, y, weight in zip(Xs, ys, weights): yhat = np.dot(X, W.T) + b resid = y - yhat alpha += 0.5 * np.sum(weight) beta += 0.5 * np.sum(weight[:, None] * resid**2, axis=0) # Get MAP estimate of posterior mode of precision sigmasq = beta / (alpha + 1e-16) if fit_intercept: return W, b, sigmasq else: return W, sigmasq
def curve(problem, n_points=200): X = anp.linspace(problem.xl[0], problem.xu[0], n_points)[:, None] F = problem.evaluate(X) return anp.column_stack([X, F])
Xtest = np.random.normal(0, 3, size=(Ntest, D)) ytest = f(Xtest)[:, None] + np.random.normal( 0, np.sqrt(sigma2), size=(Ntest, 1)) ####################################################################################################################################################3 # GPy ####################################################################################################################################################3 # Grid for prediction Q = 12 xs = np.linspace(-10, 10, Q) ys = np.linspace(-10, 10, Q) zs = np.linspace(-10, 10, Q) ws = np.linspace(-10, 10, Q) Xp, Yp, Zp, Wp = np.meshgrid(xs, ys, zs, ws) XYZ = np.column_stack((Xp.ravel(), Yp.ravel(), Zp.ravel(), Wp.ravel())) fp = f(XYZ) # Build kernel lengthscale = 10. variance = 10. rbf = GPy.kern.RBF(input_dim=D, lengthscale=lengthscale, variance=variance) # fit initial model gpy_model = GPy.models.GPRegression(X=X, Y=y[:, None], kernel=rbf, noise_var=sigma2) # make predictions mu_gpy, var_gpy = gpy_model.predict_noiseless(Xnew=XYZ)
def _evaluate(self, x, out, *args, **kwargs): f1 = x[:, 0] g = 1 + 9.0 / (self.n_var - 1) * anp.sum(x[:, 1:], axis=1) f2 = g * (1 - anp.power((f1 / g), 0.5)) out["F"] = anp.column_stack([f1, f2])
def _evaluate(self, x, out, *args, **kwargs): l = [] for i in range(x.shape[1] - 1): l.append(100 * anp.square((x[:, i + 1] - anp.square(x[:, i]))) + anp.square((1 - x[:, i]))) out["F"] = anp.sum(anp.column_stack(l), axis=1)
def _evaluate(self, x, out, *args, **kwargs): # Defining the Objective and constrains# Q_rt1 = x[0] # quantity of the retailer in the forward cycle Q_rt2 = x[1] # quantity of the retailer in the forward cycle Q_rt3 = x[2] # quantity of the retailer in the forward cycle Q_d1 = x[3] # Quantity of the distributer Q_d2 = x[4] # Quantity of the distributer Q_d3 = x[5] # Quantity of the distributer Q_m1 = x[6] # Quantity of the Manufacturer Q_m2 = x[7] # Quantity of the Manufacturer Q_m3 = x[8] # Quantity of the Manufacturer Q_s1 = x[9] # Quantity of Supplied Parts Q_s2 = x[10] # Quantity of Supplied Parts Q_s3 = x[11] # Quantity of Supplied Parts t_r = (U_Demand) / (x[0]) # Cycle time of the supply chain# #cycle time of the retailer t_d = n_d * t_r # cycle time of the Distribiter t_m = (n_m * n_d * t_r) # cycle time of the Manufacturer t_s = n_s * n_m * n_d * t_r # cycle time of the supplier t_tp = t_s # cycle time of the third party S_jfS = 30 # Job Index factor number of fixed jobs at the supplier assumed to be 30 fixed employees % S_jfM = 30 # Job index for the number of fixed jobs by Mamufacturer assumed to be 30 fixed employees % S_jfD = 30 # Job index for the number of fixed jobs by distributer assumed to be 30 fixed employees% S_jfRT = 30 # Job index for the number of fixed jobs by retialer assumed to be 30 fixed employees% S_jfTP = 20 # Job index for the number of fixed jobs by third party recovery assumed to be 20 fixed employees% S_jvS = 270 # Job Index factor number of variable jobs at the supplier assumed to be 270 workers per facility% S_jvM = 270 # Job index for the number of variable jobs by Mamufacturer 270 workers per facility% S_jvD = 270 # Job index for the number of variable jobs by distributer 270 workers per facility% S_jvRT = 270 # Job index for the number of variable jobs by retialer 270 workers per facility% S_jvTP = 100 # Job index for the number of variable jobs by third party recovery 100 workers per facility% S_u = 20 # Employee satisfaction factor of the refurbrished parts for the third party disassembler% S_rt = 30 # Customer satisfaction factor of the refurbrished parts% S_ds = 5 # Number of lost days at work% # Number of lost days from injuries or work damage at the suppliers / month% S_dm = 5 # Number of lost days from injuries or work damage at the manufactuer% S_dd = 5 # Number of lost days from injuries or work damage at the distributer% S_drt = 5 # Number of lost days from injuries or work damage at the retailer% S_dtp = 5 # Number of lost days from injuries or work damage at the third party% # Enviromental Aspect of the supply chain (Emissions calculated from carbon footprint)% E_q = 10 # Emission factor from production line E_tp = 10 # Emission from wastes removal% # Transportation emission cost% E_ts = 20 # Emission from Transportation made by the supplier% E_tm = 20 # Emission from Transportation made by the manufacturer% E_td = 20 # Emission from Transportation made by the distributer% E_trt = 20 # Emission from Transportation made by the retailer% E_ttp = 20 # Emission from Transportation made by the third party% i_s = 1 i_ss = np.arange(i_s, n_s + 1, 1) tc_s1 = list(range(i_s, n_s + 1)) for i_s in i_ss: tc_s1 = np.sum(((i_ss) / n_s) * Q_s1 * t_s) i_s = i_s + 1 # Adding value of Supplier integer# tc_s4 = (tc_s1) TC_s1 = (S_s * (1 / (n_s * t_s))) + ( ((H_s + TS_s) / (n_s * (t_s))) * tc_s4) # cost of the supplier for component 1% i_s = 1 # starting of the loop# i_ss = np.arange(i_s, n_s + 1, 1) # for w1 in w11: tc_s2 = list(range(i_s, n_s + 1)) for i_s in i_ss: tc_s2 = np.sum((i_ss / n_s) * Q_s2 * t_s) # ((x(11)) +Q_TP# i_s = i_s + 1 # Adding value of Supplier integer tc_s5 = (tc_s2) TC_s2 = (S_s * (1 / (n_s * t_s))) + (((H_s + TS_s) / (n_s * (t_s))) * tc_s5) i_s = 1 # starting of the loop# tc_s3 = list(range(i_s, n_s + 1)) for i_s in i_ss: tc_s3 = np.sum((i_ss / n_s) * Q_s3 * t_s) # ((x(12)+ Q_TP))% i_s = i_s + 1 # Adding value of Supplier integer (No addition for Q_TP )% tc_s6 = tc_s3 TC_s3 = (S_s * (1 / (n_s * t_s))) + ( ((H_s + TS_s) / (n_s * (t_s))) * tc_s6) # cost of the supplier for component 3% i_m = 1 # starting of the loop# i_mm = np.arange(i_m, n_m + 1, 1) # for w1 in w11: tc_m2 = list(range(i_m, n_m + 1)) for i_m in i_mm: tc_m1 = np.arange(1, n_m, 1) # Defining range with starting and ending point tc_m2 = np.sum((1 - ((i_mm) / (n_m))) * ((Q_m1) + Q_TP)) # Defining range with start & ending point# i_m = i_m + 1 # Adding value of manufacturer integer# tc_m3 = (tc_m2) tc_s7 = np.arange(1, n_s, 1) # Total cost of manufacturer# tc_m = sum(tc_m1) tc_s8 = sum(tc_s7) TC_m = (H_m * ((0.5 * (Q_m1 ** 2) * (1 / P_m1)) \ + (tc_m * (Q_m1 * t_m * (1 / (n_m ** 2)))))) \ + ((S_m1 + TS_m) * (1 / t_m)) + ((S_ms + TS_tp) * (1 / t_s)) \ + (H_ms * (1 / t_s) * (((((Q_s1 + Q_TP) * Q_m1) / P_m1)) \ + (tc_s8 * (((Q_s1) + Q_TP) / n_s) * (t_m - (Q_m1 / P_m1))))) TC_m2 = (H_m * ((0.5 * (Q_m2 ** 2) * (1 / P_m2)) \ + (tc_m * (Q_m2 * t_m * (1 / (n_m ** 2)))))) \ + ((S_m2 + TS_m) * (1 / t_m)) + ((S_ms + TS_tp) * (1 / t_s)) \ + (H_ms * (1 / t_s) * (((((Q_s2 + Q_TP) * Q_m2) / P_m2)) \ + (tc_s8 * (((Q_s2) + Q_TP) / n_s) * (t_m - (Q_m2 / P_m2))))) TC_m3 = (H_m * ((0.5 * (Q_m3 ** 2) * (1 / P_m3)) \ + (tc_m * (Q_m3 * t_m * (1 / (n_m ** 2)))))) \ + ((S_m3 + TS_m) * (1 / t_m)) + ((S_ms + TS_tp) * (1 / t_s)) \ + (H_ms * (1 / t_s) * (((((Q_s3 + Q_TP) * Q_m3) / P_m3)) \ + (tc_s8 * (((Q_s3) + Q_TP) / n_s) * (t_m - (Q_m3 / P_m3))))) i_d = 1 i_dd = np.arange(i_d, n_d + 1, 1) # for w1 in w11: tc_d1 = list(range(i_d, n_d + 1)) tc_d2 = list(range(i_d, n_d + 1)) tc_d3 = list(range(i_d, n_d + 1)) for i_d in i_dd: tc_d1 = np.sum(((i_dd) / (n_d)) * (Q_d1)) # Cost of the Distributer for Product 1%% tc_d2 = np.sum(((i_dd) / (n_d)) * (Q_d2)) # Cost of the Distributer for Product 2%% tc_d3 = np.sum(((i_d) / (n_d)) * (Q_d3)) # Cost of the Distributer for Product 3%% i_d = i_d + 1 tc_d_f = (tc_d1) + (tc_d2) + (tc_d3) TC_d = (H_dr * (tc_d_f / n_d)) + ( (S_d + TS_d) * (1 / t_d)) # Total cost of the distributer of the supply chain% # Total cost of retailer TC_rt = (H_rt * ((Q_rt1) / 2)) + ((S_r + TS_rt) * (1 / t_r)) # Cost of the retailer%% TC_rt2 = (H_rt * (Q_rt2 / 2)) + ((S_r + TS_rt) * (1 / t_r)) # Cost of the retailer for product 2%% TC_rt3 = (H_rt * ((Q_rt3) / 2)) + ((S_r + TS_rt) * (1 / t_r)) # Cost of the retailer for product 3%% # Total cost of third party recovery TC_tp = ((H_tp / 2) * Q_TP) + ((S_tp + TS_tp) * (1 / t_tp)) S_jfS = 30 # Job Index factor number of fixed jobs at the supplier assumed to be 30 fixed employees % S_jfM = 30 # Job index for the number of fixed jobs by Mamufacturer assumed to be 30 fixed employees % S_jfD = 30 # Job index for the number of fixed jobs by distributer assumed to be 30 fixed employees% S_jfRT = 30 # Job index for the number of fixed jobs by retialer assumed to be 30 fixed employees% S_jfTP = 20 # Job index for the number of fixed jobs by third party recovery assumed to be 20 fixed employees% S_jvS = 270 # Job Index factor number of variable jobs at the supplier assumed to be 270 workers per facility% S_jvM = 270 # Job index for the number of variable jobs by Mamufacturer 270 workers per facility% S_jvD = 270 # Job index for the number of variable jobs by distributer 270 workers per facility% S_jvRT = 270 # Job index for the number of variable jobs by retialer 270 workers per facility% S_jvTP = 100 # Job index for the number of variable jobs by third party recovery 100 workers per facility% S_u = 20 # Employee satisfaction factor of the refurbrished parts for the third party disassembler% S_rt = 30 # Customer satisfaction factor of the refurbrished parts% # Number of lost days at work% S_ds = 5 # Number of lost days from injuries or work damage at the suppliers / month% S_dm = 5 # Number of lost days from injuries or work damage at the manufactuer% S_dd = 5 # Number of lost days from injuries or work damage at the distributer% S_drt = 5 # Number of lost days from injuries or work damage at the retailer% S_dtp = 5 # Number of lost days from injuries or work damage at the third party% # Enviromental Aspect of the supply chain (Emissions calculated from carbon footprint)% E_q = 10 # Emission factor from production line E_tp = 10 # Emission from wastes removal% # Transportation emission cost% E_ts = 20 # Emission from Transportation made by the supplier% E_tm = 20 # Emission from Transportation made by the manufacturer% E_td = 20 # Emission from Transportation made by the distributer% E_trt = 20 # Emission from Transportation made by the retailer% E_ttp = 20 # Emission from Transportation made by the third party% # Cycle time% EQO = TC_s1 + TC_s2 + TC_s3 + TC_m + TC_m2 + TC_m3 + TC_d + TC_rt \ + TC_rt2 + TC_rt3 + TC_tp # Economical aspect# LSC = (S_jfS + S_jfM + S_jfD + S_jfRT + S_jfTP) \ + ((S_jvS * Q_s1) + (S_jvD * Q_d1) + (S_jvM * Q_m1) \ + (S_jvRT * Q_rt1) + (S_jvTP * Q_TP)) \ + (S_u * (U_Demand)) + (S_rt * Q_rt1) - (S_ds * Q_s1) \ + (S_dd * Q_d1) + (S_dm * Q_m1) + (S_drt * Q_rt1) \ + (S_dtp * Q_TP) # Social aspect equation% ESC = (E_q * (Q_s1 + Q_d1 + Q_m1 + Q_rt1)) \ + (E_ts * (1 / t_s)) + (E_td * (1 / t_d)) \ + (E_tm * (1 / t_m)) + (E_trt * (1 / t_r)) \ + (E_ts * (1 / t_tp)) + (E_tp * Q_TP) # Enviromental aspect w1 = 1 w2 = 1 w3 = 1 f1 = EQO * w1 f2 = LSC * w2 f3 = ESC * w3 g1 = -x[0] + U_Demand g2 = -x[1] + U_Demand g3 = -x[2] + U_Demand g4 = -f1 - f2 - f3 g5 = -((x[9]) + Q_TP) + (n_s * x[6]) g6 = -((x[10]) + Q_TP) + (n_s * x[7]) g7 = -((x[11]) + Q_TP) + (n_s * x[8]) g8 = (n_m * (x[3])) - x[6] g9 = (n_m * (x[4])) - x[7] g10 = (n_m * (x[5])) - x[8] g11 = -x[3] + (n_d * x[0]) g12 = -x[4] + (n_d * x[1]) g13 = -x[5] + (n_d * x[2]) g14 = -x[0] g15 = -x[1] g16 = -x[2] g17 = -f1 out["F"] = anp.column_stack([f1, f2, f3]) out["G"] = anp.column_stack([g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, g11, g12, \ g13, g14, g15, g16,g17])
def build_thmat(mu_n): mumat = np.row_stack([mus[:n, :], mu_n, mus[n + 1:, :]]) return np.column_stack([mumat, lns])
def lbs(lns2): thmat = np.column_stack([mus, lns2]) return lower_bound_MoG(thmat, s2min=s2min)
def getHOD_general(self, u, theta, pos, vel, acc, jerk, snap, t): z = np.array((-np.sin(theta), np.cos(theta))) zp = np.array((-np.cos(theta), -np.sin(theta))) zpp = np.array((np.sin(theta), -np.cos(theta))) """ state is [ x z xdot zdot ] input is [ u theta ] state_input is [ x z xdot zdot u theta ] x is state in quad.py u is u torque """ # TODO Learner does not use theta vel and torque for now... x_t = np.array((pos[0], pos[1], theta, vel[0], vel[1], 0)) u_t = np.array((u, 0)) state_input = np.array((pos[0], pos[1], vel[0], vel[1], u, theta)) #acc_factor = 1/1.4 - 1 #fm = acc_factor * u * z #fm = self.learner.predict(x_t, u_t) #a = u * z - np.array((0, self.model.g)) + acc_factor * u * z - acc #a = u * z - np.array((0, self.model.g)) + fm - acc #assert np.allclose(a, np.zeros(2), atol=1e-4) #dfm_du = acc_factor * z #dfm_dtheta = acc_factor * u * zp dstate = np.hstack((vel, acc)) ddstate = np.hstack((acc, jerk)) #dfm_du = self.learner.get_deriv_u(x_t, u_t) #dfm_dtheta = self.learner.get_deriv_theta(x_t, u_t) #dfm_dstate = self.learner.get_deriv_state(x_t, u_t) #print("Get derivs old") #print(dfm_du) #print(dfm_dtheta) #print(dfm_dstate) #print("Got derivs new") #print(dfm_dutheta) #print(dfm_dstate) #input() dfdx = np.column_stack((z, u * zp)) #dfdx += np.column_stack((dfm_du, dfm_dtheta)) dfdt = -jerk if self.deriv_correct: dfm_dstate, dfm_dutheta = self.learner.get_derivs_state_input( state_input) dfdx += dfm_dutheta dfdt += dfm_dstate.dot(dstate) dfdt += self.learner.get_deriv_time(t) assert np.linalg.matrix_rank(dfdx) == 2 xdot = np.linalg.solve(dfdx, -dfdt) #d2fm_dudtheta = acc_factor * zp #d2fm_du2 = np.zeros(2) #d2fm_dtheta2 = acc_factor * u * zpp #d2fm_dudtheta = self.learner.get_dderiv_utheta(x_t, u_t) #d2fm_du2 = self.learner.get_dderiv_u2(x_t, u_t) #d2fm_dtheta2 = self.learner.get_dderiv_theta2(x_t, u_t) #print("Got dderivs old") #print(d2fm_dudtheta) #print(d2fm_du2) #print(d2fm_dtheta2) #print("Got dderivs new") #print(d2fm_dinput2) #d2fm_dx2 = np.empty((2, 2, 2)) #d2fm_dx2[:, 0, 0] = d2fm_du2 #d2fm_dx2[:, 0, 1] = d2fm_dudtheta #d2fm_dx2[:, 1, 0] = d2fm_dudtheta #d2fm_dx2[:, 1, 1] = d2fm_dtheta2 #print("full input deriv from old is") #print(d2fm_dx2) #assert np.allclose(d2fm_dx2, d2fm_dinput2) #input() d2fdx2 = np.array( ((((0, -np.cos(theta)), (-np.cos(theta), u * np.sin(theta)))), ((0, -np.sin(theta)), (-np.sin(theta), -u * np.cos(theta))))) d2fdt2 = -snap if self.deriv_correct and self.correct_snap: d2fm_dstate_input2 = self.learner.get_dderiv_state_input( state_input) d2fm_dstate2 = d2fm_dstate_input2[:, :4, :4] d2fm_dinput2 = d2fm_dstate_input2[:, 4:, 4:] d2fdx2 += d2fm_dinput2 d2fdt2 += dfm_dstate.dot(ddstate) + np.tensordot( d2fm_dstate2, dstate, axes=1).dot(dstate) d2fdt2 += self.learner.get_dderiv_time(t) xddot = np.linalg.solve( dfdx, -d2fdt2 - np.tensordot(d2fdx2, xdot, axes=1).dot(xdot)) return xdot[1], xddot[1]
D = Dpp - 1 # unpack mean and variance parameters mu = theta[:, :D] s2 = np.exp(theta[:, -1]) + s2min chols = np.array([np.eye(D) * np.sqrt(s) for s in s2]) #lndets = D*np.log(s2) return mog.mog_samples(nsamps, mu, chols, pis=np.ones(N) / float(N)) if __name__ == "__main__": # test lower bound MoG N, D = 10, 2 mu = np.random.randn(N, D) lns = np.random.randn(N) theta = np.column_stack([mu, lns]) print lower_bound_MoG(theta) # generate samples, and compute Monte Carlo Entropy --- makes sure lower # bound is reasonable import pyprind Ntrials = 50 gaps = np.zeros(Ntrials) for i in pyprind.prog_bar(range(Ntrials)): N = np.random.randint(20) + 2 D = np.random.randint(20) + 2 mu = np.random.randn(N, D) # test lower bound MoG lns = np.random.randn(N) theta = np.column_stack([mu, lns])
A = R.dot(A0).dot(np.linalg.inv(R)) b = npr.randn(D) true_lds.dynamics.As[0] = A true_lds.dynamics.bs[0] = b _, x, y = true_lds.sample(T) # In[4]: # Plot the dynamics vector field xmins = x.min(axis=0) xmaxs = x.max(axis=0) npts = 20 true_lds.dynamics.As[0] = A XX, YY = np.meshgrid(np.linspace(xmins[0], xmaxs[0], npts), np.linspace(xmins[1], xmaxs[1], npts)) XY = np.column_stack((XX.ravel(), YY.ravel(), np.zeros((npts**2, D - 2)))) dx = XY.dot(A.T) + b - XY plt.figure(figsize=(6, 6)) plt.quiver(XX, YY, dx[:, 0], dx[:, 1], color=colors[0]) plt.plot(x[:, 0], x[:, 1], '-k', lw=3) plt.xlabel("$x_1$") plt.ylabel("$x_2$") plt.title("Simulated Latent States") plt.tight_layout() if save_figures: plt.savefig("lds_1.pdf") # In[5]:
# --- save posterior parameters # ############################################# if args.npvi: init_with_mfvi = True if init_with_mfvi: mfvi_lam = mfvi_init() # initialize theta theta_mfvi = np.atleast_2d( np.concatenate([mfvi_lam[:D], [2 * mfvi_lam[D:].mean()]])) mu0 = vi.bbvi_npvi.mogsamples(args.ncomp, theta_mfvi) # create npvi object theta0 = np.column_stack( [mu0, np.ones(args.ncomp) * theta_mfvi[0, -1]]) else: theta0 = np.column_stack( [10 * np.random.randn(args.ncomp, D), -2 * np.ones(args.ncomp)]) # create initial theta and sample npvi = vi.NPVI(lnpdf, D=D) mu, s2, elbo_vals, theta = npvi.run(theta0.copy(), niter=1000, verbose=False, path=args.output) print elbo_vals # save output here npvi_outfile = os.path.join(args.output, "npvi_%d-comp.npz" % args.ncomp)
def construct_cv_grads(vbobj, lam, eps, elbo_gsamps=None, method="hessian"): """ main method to construct reduced variance reparameterization gradients using a variety of methods. Methods: - "mc" : full monte carlo estimator - "hessian" : uses full hessian information - "hessian_diag" : uses only hessian diag information - "hvp_with_loo_diag_approx" : uses other samples to approximate - "hvp_with_mc_variance" : """ # unpack variational parameters assert eps.ndim == 2, "epsilon needs to be nsamps x D" ns, D = eps.shape m_lam, s_lam = lam[:D], np.exp(lam[D:]) # generate samples if necessary if elbo_gsamps is None: elbo_gsamps = elbo_grad_samps_mat(vbobj, lam, eps) if method == "mc": # full monte carlo --- this is a No-op return elbo_gsamps elif method == "hessian": """ full hessian approximation """ # full hessian, including diagonal gmu = vbobj.glnpdf(m_lam) H = vbobj.hlnpdf(m_lam) Hdiag = np.diag(H) # construct normal approx samples of data term dLdz = gmu + np.dot(H, (s_lam*eps).T).T #dLds = (dLdz*eps + 1/s_lam[None,:]) * s_lam dLds = dLdz*eps*s_lam + 1. elbo_gsamps_tilde = np.column_stack([dLdz, dLds]) # characterize the mean of the dLds component (and z comp) dLds_mu = (Hdiag*s_lam + 1/s_lam) * s_lam gsamps_tilde_mean = np.concatenate([gmu, dLds_mu]) # subtract mean to compute control variate elbo_gsamps_cv = elbo_gsamps - \ (elbo_gsamps_tilde - gsamps_tilde_mean) return elbo_gsamps_cv elif method == "hessian_diag": """ use only hessian diagonal for RV model """ gmu = vbobj.glnpdf(m_lam) H = vbobj.hlnpdf(m_lam) Hdiag = np.diag(H) # construct normal approx samples of data term dLdz = gmu + Hdiag * s_lam * eps dLds = (dLdz*eps + 1/s_lam[None,:]) * s_lam elbo_gsamps_tilde = np.column_stack([dLdz, dLds]) # construct mean dLds_mu = (Hdiag*s_lam + 1/s_lam) * s_lam gsamps_tilde_mean = np.concatenate([gmu, dLds_mu]) elbo_gsamps_cv = elbo_gsamps - \ (elbo_gsamps_tilde - gsamps_tilde_mean) return elbo_gsamps_cv elif method == "hvp_with_loo_diag_approx": """ use other samples to estimate a per-sample diagonal expectation """ assert ns > 1, "loo approximations require more than 1 sample" # compute hessian vector products and save them for both parts #hvps = np.array([vbobj.hvplnpdf(m_lam, s_lam*e) for e in eps]) hvp_lam = vbobj.hvplnpdf_maker(m_lam) hvps = np.array([hvp_lam(s_lam*e) for e in eps]) gmu = vbobj.glnpdf(m_lam) # construct normal approx samples of data term dLdz = gmu + hvps #dLds = (dLdz*eps + 1/s_lam[None,:]) * s_lam dLds = dLdz * (eps*s_lam) + 1 # compute Leave One Out approximate diagonal (per-sample mean of dLds) Hdiag_sum = np.sum(eps*hvps, axis=0) Hdiag_s = (Hdiag_sum[None,:] - eps*hvps) / float(ns-1) dLds_mu = (Hdiag_s + 1/s_lam[None,:]) * s_lam # compute gsamps_cv - mean(gsamps_cv), and finally the var reduced #elbo_gsamps_tilde_centered = \ # np.column_stack([ hvps, dLds - dLds_mu ]) #elbo_gsamps_cv = elbo_gsamps - elbo_gsamps_tilde_centered #return elbo_gsamps_cv elbo_gsamps[:,:D] -= hvps elbo_gsamps[:,D:] -= (dLds - dLds_mu) return elbo_gsamps elif method == "hvp_with_loo_direct_approx": # compute hessian vector products and save them for both parts assert ns > 1, "loo approximations require more than 1 sample" gmu = vbobj.glnpdf(m_lam) hvps = np.array([vbobj.hvplnpdf(m_lam, s_lam*e) for e in eps]) # construct normal approx samples of data term dLdz = gmu + hvps dLds = (dLdz*eps + 1/s_lam[None,:]) * s_lam elbo_gsamps_tilde = np.column_stack([dLdz, dLds]) # compute Leave One Out approximate diagonal (per-sample mean of dLds) dLds_sum = np.sum(dLds, axis=0) dLds_mu = (dLds_sum[None,:] - dLds) / float(ns-1) # compute gsamps_cv - mean(gsamps_cv), and finally the var reduced elbo_gsamps_tilde_centered = \ np.column_stack([ dLdz - gmu, dLds - dLds_mu ]) elbo_gsamps_cv = elbo_gsamps - elbo_gsamps_tilde_centered return elbo_gsamps_cv elif method == "hvp_with_mc_variance": hvp_lam = vbobj.hvplnpdf_maker(m_lam) hvps = np.array([hvp_lam(s_lam*e) for e in eps]) elbo_gsamps[:,:D] -= hvps return elbo_gsamps # not implemented raise NotImplementedError("%s not implemented"%method)
def construct_C(lam): Cs = [] for r, sl in enumerate(slices): Cr = np.concatenate([np.zeros(r+1), lam[sl]]) Cs.append(Cr) return np.column_stack(Cs)
def run(self, theta, niter=10, tol=.0001, verbose=False): """ runs NPV for ... iterations mimics npv_run.m from Sam Gershman's original matlab code USAGE: [F mu s2] = npv_run(nlogpdf,theta,[nIter]) INPUTS: theta - [N x D+1] initial parameter settings, where N is the number of components, D is the number of latent variables in the model, and the last column contains the log bandwidths (variances) nIter (optional) - maximum number of iterations (default: 10) tol (optional) - change in the evidence lower bound (ELBO) for convergence (default: 0.0001) OUTPUTS: F - [nIter x 1] approximate ELBO value at each iteration mu - [N x D] component means s2 - [N x 1] component bandwidths """ N, Dpp = theta.shape D = Dpp - 1 # set LBFGS optim arguments disp = 10 if verbose else None opts = { 'disp': disp, 'maxiter': 5000, 'gtol': 1e-7, 'ftol': 1e-7 } #, 'factr':1e2} elbo_vals = np.zeros(niter) for ii in xrange(niter): elbo_vals[ii] = self.mc_elbo(theta) print "iteration %d (elbo = %2.4f)" % (ii, elbo_vals[ii]) # first-order approximation (L1): optimize mu, one component at a time print " ... optimizing mus " for n in xrange(N): print " ... %d / %d " % (n, N) fun, gfun = self.make_elbo1_funs(theta, n) res = minimize(fun, x0=theta[n, :D], jac=gfun, method='L-BFGS-B', options=opts) theta[n, :D] = res.x #print theta[:,:D] #print " ... elbo: ", self.mc_elbo(theta) # second-order approximation (L2): optimize s2 print " ... optimizing sigmas" mu = theta[:, :D] h = np.zeros(N) for n in xrange(N): # compute Hessian trace using finite differencing or autograd h[n] = np.sum(np.diag(hessian(self.lnpdf)(mu[n]))) fun, gfun = self.make_elbo2_funs(theta, h) res = minimize(fun, x0=theta[:, -1], jac=gfun, method='L-BFGS-B', options=opts) theta = np.column_stack([mu, res.x]) # calculate the approximate ELBO (L2) #if (ii > 1) and (np.abs(elbo_vals[ii] - elbo_vals[ii-1] < tol)) # TODO check for convergence #if (ii > 1) and (np.abs(F[ii]-F[ii-1]) < tol) # break # end % check for convergence # unpack params and return mu = theta[:, :D] s2 = np.exp(theta[:, -1]) + self.s2min return mu, s2, elbo_vals, theta
def fit_scalar_glm(Xs, ys, model="bernoulli", mean_function="logistic", model_hypers={}, fit_intercept=True, weights=None, X_variances=None, prior=None, proximal_point=None, threshold=1e-6, step_size=1, max_iter=50, verbose=False): """ Fit a GLM with vector inputs X and scalar outputs y. The user provides the inputs, outputs, the model type (i.e. the conditional distribution of the data), and the mean function that maps linear weighted inputs to the expected value of the output. The following models are supported: - Gaussian - Bernoulli - Poisson - Negative binomial (fixed r) Arguments --------- Xs: array of shape (n, p) or list of arrays with shapes [(n_1, p), (n_2, p), ..., (n_M, p)] containing covariates for the GLM. ys: array of shape (n,) or list of arrays with shapes [(n_1,), (n_2,), ..., (n_M,)] containing the scalar outputs of the GLM. model: string specifying the conditional distribution of of the data. Currently supported values are: - "gaussian" - "bernoulli" - "poisson" - "negative binomial" mean_function: string or lambda function specifying the mapping from the projected data to the mean of the output. Currently supported values are: - "identity" - "logistic" - "exp" - "softplus" It is up to the user to make sure that the chosen mean function has the correct range for the corresponding model. For example, model="bernoulli" and mean_function="exp" will fail. model_hypers: dictionary of hyperparameters for the model. For example, the negative binomial requires an extra hyperparameter for the "number of failures". For valid values of the `model_hypers`, see ssm.regression.model_kwarg_descriptions. fit_intercept: bool specifying whether or not to fit an intercept term. If True, the output will include the weights (an array of length p), and a scalar intercept value. weights: array of shape (n,) or list of arrays with shapes [(n_1,), (n_2,), ..., (n_M,)] containing non-negative weights associated with each data point. For example, these are used when fitting mixtures of GLMs with the EM algorithm. X_variances: array of shape (n, p, p) or list of arrays with shapes [(n_1, p, p), (n_2, p, p), ..., (n_M, p, p)] containing the covariance of given covariates. These are used when the data itself is uncertain, but where we have distributions q(X) and q(y) on the inputs and outputs, respectively. (We assume X and y are independent.) In this case, Xs and ys are treated as the marginal means E[X] and E[y] respectively. To fit the GLM, we also need the marginal covariances of the inputs. These are specified here as an array of covariance matrices, or as a list of arrays of covariance matrices, one for each data point. prior: tuple of (mean, variance) of a Gaussian prior on the weights of the GLM. The mean must be a scalar or an array of shape (p,) if fit_intercept is False or (p+1,) otherwise. If scalar, it is multiplied by a vector of ones. The variance can be a positive scalar or a (p, p) or (p+1, p+1) matrix, depending again on whether fit_intercept is True. proximal_point: tuple of (array, positive scalar) for the proximal point algorithm. The array must be of shape (p,) if fit_intercept is False or (p+1,) otherwise. It specifies the current value of the parameters that we should not deviate too far from. The positive scalar specifies the inverse strength of this regularization. As this values goes to zero, the fitted value must be exactly the proximal point given in the array. Effectively, these specify an another Gaussian prior, which will multiplied with the prior above. threshold: positive scalar value specifying the mean absolute deviation in weights required for convergence. step_size: scalar value in (0, 1] specifying the linear combination of the next weights and current weights. A step size of 1 means that each iteration goes all the way to the mode of the quadratic approximation. max_iter: int, maximum number of iterations of the Newton-Raphson algorithm. verbose: bool, whether or not to print diagnostic messages. """ Xs = Xs if isinstance(Xs, (list, tuple)) else [Xs] ys = ys if isinstance(ys, (list, tuple)) else [ys] assert len(Xs) == len(ys) p = Xs[0].shape[1] assert all([y.ndim == 1 for y in ys]) assert all([X.shape[1] == p for X in Xs]) assert all([y.shape[0] == X.shape[0] for X, y in zip(Xs, ys)]) # Check the weights. Default to all ones. if weights is not None: weights = weights if isinstance(weights, (list, tuple)) else [weights] assert all([weight.shape == (X.shape[0],) for X, weight in zip(Xs, weights)]) else: weights = [np.ones(X.shape[0]) for X in Xs] # If the inputs are uncertain, the user may specify the marginal variance # of the data points. These must be an array of (p, p) covariance matrices. if X_variances is not None: X_variances = X_variances if isinstance(X_variances, (list, tuple)) else [X_variances] assert all([X_var.shape == (X.shape[0], p, p) for X, X_var in zip(Xs, X_variances)]) else: X_variances = [np.zeros((X.shape[0], p, p)) for X in Xs] # Add a column to X if fitting the intercept as well # Note: this could be memory intensive, but the code is a lot simpler. if fit_intercept: Xs = [np.column_stack((X, np.ones(X.shape[0]))) for X in Xs] new_X_variances = [np.zeros((X.shape[0], p+1, p+1)) for X in Xs] for X_var, new_X_var in zip(X_variances, new_X_variances): new_X_var[:, :p, :p] = X_var X_variances = new_X_variances p += 1 # Check the model specification model = model.lower() assert model in ("gaussian", "bernoulli", "poisson", "negative_binomial") # Initialize the prior if prior is None: prior_mean = np.zeros(p) prior_precision = np.zeros((p, p)) else: assert isinstance(prior, (tuple, list)) and len(prior) == 2 prior_mean, prior_variance = prior if np.isscalar(prior_mean): prior_mean = prior_mean * np.ones(p) else: assert prior_mean.shape == (p,) if np.isscalar(prior_variance): assert prior_variance > 0 prior_precision = 1 / prior_variance * np.eye(p) else: assert prior_variance.shape == (p, p) prior_precision = np.linalg.inv(prior_variance) # Incorporate the proximal point into the prior, if specified. if proximal_point is not None: # Make sure the point and the regularization strength are both specified. assert isinstance(proximal_point, (tuple, list)) and len(proximal_point) == 2 point, alpha = proximal_point assert point.shape == (p,) assert np.isscalar(alpha) and alpha > 0 # Combine the proximal point regularizer with the Gaussian prior. new_precision = prior_precision + 1 / alpha * np.eye(p) prior_mean = np.linalg.solve(new_precision, np.dot(prior_precision, prior_mean) + point / alpha) prior_precision = new_precision # Get the partition function (A) and mean function (f). # These determine the mapping from inputs to natural parameters (g). A = lambda eta: partition_functions[model](eta, **model_hypers) f = mean_functions[mean_function] if isinstance(mean_function, str) else mean_function g = lambda u: canonical_link_functions[model](f(u), **model_hypers) # Compute necessary derivatives for IRLS # When y is a scalar, these are all R^1 ->R^1 scalar functions df = elementwise_grad(f) dg = elementwise_grad(g) d2g = elementwise_grad(dg) dA = elementwise_grad(A) d2A = elementwise_grad(dA) # Construct the linear approximation for the gradient in the case of uncertain inputs h = lambda x, y, theta: g() # Initialize the weights, theta theta = np.zeros(p) dtheta = np.inf converged = False for itr in range(max_iter): if verbose: print("Iteration ", itr, "delta theta: ", dtheta) # Check convergence converged = dtheta < threshold if converged: print("Converged in ", itr, " iterations.") break # Compute the negative Hessian (J) and the gradient (h) of the objective J = prior_precision.copy() h = -np.dot(prior_precision, (theta - prior_mean)) for X, y, weight, X_var in zip(Xs, ys, weights, X_variances): # Project inputs with current parameters and get predicted values u = np.dot(X, theta) yhat = f(u) # Compute the weights G and R G = dg(u) R = d2g(u) * (yhat - y) + G**2 * d2A(g(u)) # Linearize the gradient for uncertain data H = G * (y - yhat) # dH = d2g(u) * (y - yhat) - dg(u) * df(u) dH = G * (y - yhat) - G**2 * d2A(g(u)) # nearly the same as R! # Update the negative Hessian weighted_X = X * R[:, None] * weight[:, None] J += np.dot(weighted_X.T, X) J += np.einsum('npq,n->pq', X_var, R) # Update the gradient h += np.dot(weighted_X.T, H / R) h += np.einsum('npq,n,q-> p', X_var, dH, theta) # Solve for the Newton update # (current parameters + negative Hessian^{-1} gradient) next_theta = theta + np.linalg.solve(J, h) # Check for convergence dtheta = np.mean(abs(next_theta - theta)) theta = (1 - step_size) * theta + step_size * next_theta # Output warning if terminated without convergence if not converged: warn("Newtons method failed to converge in {} iterations.".format(max_iter)) # Return the weights and intercept if necessary if fit_intercept: return theta[:-1], theta[-1] else: return theta
def fit_mixture_jointly(num_comps, lnpdf, D, num_iters=1000, step_size=.2, num_samps_per_component=100, fix_samples=True, init_comp_list=None, ax=None, xlim=None, ylim=None): # define the mixture elbo as a function of only mixing weights. # to do this, we take L samples from each component, and note that # the ELBO decomposes into the sum of expectations wrt each component # ELBO(rho) = Eq[lnpi(x) - ln q(x; rho)] # = sum_c rho_c \int q_c(x; rho) [lnpi(x) - ln q(x; rho)] C = num_comps L = num_samps_per_component from autil.util.misc import WeightsParser parser = WeightsParser() parser.add_shape("ln_weights", (C - 1, )) parser.add_shape("means", (C, D)) parser.add_shape("lnstds", (C, D)) init_rhos = simplex_to_unconstrained(np.ones(C) * (1. / C)) init_vars = -2 * np.ones((C, D)) init_means = .001 * np.random.randn(C, D) if init_comp_list is not None: assert len(init_comp_list) == C pis = np.array([c[0] for c in init_comp_list]) init_rhos = simplex_to_unconstrained(pis) init_means = np.row_stack([c[1][:D] for c in init_comp_list]) init_vars = np.row_stack([c[1][D:] for c in init_comp_list]) init_params = np.zeros(parser.num_weights) init_params = parser.set(init_params, "ln_weights", init_rhos) init_params = parser.set(init_params, "means", init_means) init_params = parser.set(init_params, "lnstds", init_vars) def joint_elbo(params, i, eps_tens=None): # sample from each cluster's normal --- transform into if eps_tens is None: eps_tens = np.random.randn(C, L, D) lnstds = parser.get(params, "lnstds") means = parser.get(params, "means") Csamps = eps_tens * np.exp(lnstds)[:, None, :] + means[:, None, :] # make qln pdf for params icovs = np.array( [np.diag(np.exp(-2 * lnstds[c])) for c in xrange(lnstds.shape[0])]) dets = np.exp(np.sum(2 * lnstds, 1)) lnws = parser.get(params, "ln_weights") pis = unconstrained_to_simplex(lnws) qlogprob = lambda x: mog.mog_logprob(x, means, icovs, dets, pis) # compute E_q_c[ lnq(x) ] for each component lnq_terms = np.reshape(qlogprob(np.reshape(Csamps, (-1, D))), (C, L)) lnq_means = np.mean(lnq_terms, 1) # compute E[pi(x)] for each component pi_lls = np.array([lnpdf(c, 0) for c in Csamps]) pi_lls_mean = np.mean(pi_lls, 1) return np.sum(pis * (pi_lls_mean - lnq_means)) # first fit a single gaussian using BBVI def callback(params, i, g): if i % 2 == 0: print "weight opt iter %d, lower bound %2.4f" % ( i, joint_elbo(params, i)) #print " weights = ", unconstrained_to_simplex(params) #print " gmag, grad = ", np.sqrt(np.sum(g**2)), g if ax is not None: import matplotlib.pyplot as plt plt.ion() import seaborn as sns sns.set_style('white') import autil.util.plots as pu ax.cla() # background isocontours (target) + foreground isocontours (approx) pu.plot_isocontours(ax, lambda x: np.exp(lnpdf(x, i)), xlim=xlim, ylim=ylim, fill=True) pis = unconstrained_to_simplex(params) pu.plot_isocontours(ax, lambda x: np.exp(qlogprob(x, pis)), xlim=xlim, ylim=ylim, colors='darkred') plt.draw() plt.pause(1. / 30.) def break_cond(x, i, g): gmag = np.sqrt(np.sum(g**2)) #if gmag < 1e-4: # return True return False if fix_samples: eps_tens = np.random.randn(C, L, D) var_obj = lambda x, t: -1. * joint_elbo(x, t, eps_tens=eps_tens) else: var_obj = lambda x, t: -1. * joint_elbo(x, t) # optimize component var_obj_grad = grad(var_obj) #fit_params = adam(var_obj_grad, init_params, num_iters=num_iters, # step_size=step_size, callback=callback, # break_cond=break_cond) fit_params = sgd(var_obj_grad, init_params, num_iters=num_iters, step_size=step_size, callback=callback, break_cond=break_cond, mass=.01) # unpack new var params --- compute normalized rho's pis_new = unconstrained_to_simplex(parser.get(fit_params, "ln_weights")) means_new = parser.get(fit_params, "means") stds_new = parser.get(fit_params, "lnstds") lams_new = np.column_stack([means_new, stds_new]) comp_list_new = [(p, l) for p, l in zip(pis_new, lams_new)] return comp_list_new
def _evaluate(self, x, out, *args, **kwargs): x1, x2, x3, x4, x5 = anp.split(x, 5, axis=1) x1 = self.x1[x1] x2 = self.x2[x2] x3 = self.x3[x3] x4 = self.x4[x4] x5 = self.x5[x5] pi = anp.pi mu = 0.5 s = 1.5 M_f = 3 # Nm ri_max = 80 # mm t_max = 3 # mm n = 250 # rpm w = pi * n/30 # rad/s R_sr = (2/3) * ((x2**3 - x1**3)/(x2**2 - x1**2)) # mm p_max = 1 # MPa T_max = 15 # s I_z = 55 # kg*m^2 ro_min = 90 # mm F_max = 1000 # N A = pi * (x2**2 - x1**2) # mm^2 deltaR = 20 # mm rho = 0.0000078 # kg/mm^2 delta = 0.5 # mm ro_max = 110 # mm Z_max = 9 p_rz = x4/A # N/mm^2 L_max = 30 # mm Vsr_max = 10 #m/s M_s = 40 # Nm ri_min = 60 # mm t_min = 1.5 # mm M_h = (2/3) * mu * x4 * x5 * ((x2**3 - x1**3)/(x2**2 - x1**2)) # N*mm Vsr = (pi * R_sr * n)/30 # mm/s T = (I_z * w)/(M_h/1000 + M_f) g1 = (x2 - x1 - deltaR) * -1 g2 = (L_max - (x5 + 1) * (x3 + delta)) * -1 g3 = (p_max - p_rz) * -1 g4 = (p_max * Vsr_max*1000 - p_rz * Vsr) * -1 g5 = (Vsr_max*1000 - Vsr) * -1 g6 = (M_h/1000 - (s * M_s)) * -1 g7 = T * -1 g8 = (T_max - T) * -1 _g9 = -x1 + ri_min _g10 = x1 - ri_max _g11 = -x2 + ro_min _g12 = x2 - ro_max _g13 = -x3 + t_min _g14 = x3 - t_max _g15 = -x4 _g16 = x4 - F_max _g17 = -x5 + 2 _g18 = x5 - Z_max f1 = pi * (x2**2 - x1**2) * x3 * (x5 + 1) * rho f2 = T out["F"] = anp.column_stack([f1, f2]) out["G"] = anp.column_stack([g1, g2, g3, g4, g5, g6, g7, g8])
def fit_linear_regression(Xs, ys, weights=None, mu0=0, sigmasq0=1, nu0=1, Psi0=1, fit_intercept=True): """ Fit a linear regression y_i ~ N(Wx_i + b, diag(S)) for W, b, S. :param Xs: array or list of arrays :param ys: array or list of arrays :param fit_intercept: if False drop b """ Xs = Xs if isinstance(Xs, (list, tuple)) else [Xs] ys = ys if isinstance(ys, (list, tuple)) else [ys] assert len(Xs) == len(ys) D = Xs[0].shape[1] P = ys[0].shape[1] assert all([X.shape[1] == D for X in Xs]) assert all([y.shape[1] == P for y in ys]) assert all([X.shape[0] == y.shape[0] for X, y in zip(Xs, ys)]) mu0 = mu0 * np.zeros((P, D)) sigmasq0 = sigmasq0 * np.eye(D) # Make sure the weights are the weights if weights is not None: weights = weights if isinstance(weights, (list, tuple)) else [weights] else: weights = [np.ones(X.shape[0]) for X in Xs] # Add weak prior on intercept if fit_intercept: mu0 = np.column_stack((mu0, np.zeros(P))) sigmasq0 = block_diag(sigmasq0, np.eye(1)) # Compute the posterior J = np.linalg.inv(sigmasq0) h = np.dot(J, mu0.T) for X, y, weight in zip(Xs, ys, weights): X = np.column_stack((X, np.ones(X.shape[0]))) if fit_intercept else X J += np.dot(X.T * weight, X) h += np.dot(X.T * weight, y) # Solve for the MAP estimate W = np.linalg.solve(J, h).T if fit_intercept: W, b = W[:, :-1], W[:, -1] else: b = 0 # Compute the residual and the posterior variance nu = nu0 Psi = Psi0 * np.eye(P) for X, y, weight in zip(Xs, ys, weights): yhat = np.dot(X, W.T) + b resid = y - yhat nu += np.sum(weight) tmp1 = np.einsum('t,ti,tj->ij', weight, resid, resid) tmp2 = np.sum(weight[:, None, None] * resid[:, :, None] * resid[:, None, :], axis=0) assert np.allclose(tmp1, tmp2) Psi += tmp1 # Get MAP estimate of posterior covariance Sigma = Psi / (nu + P + 1) if fit_intercept: return W, b, Sigma else: return W, Sigma
num_iters=1000, print_intvl=10) # Find the permutation that matches the true and inferred states slds.permute(find_permutation(z, slds.most_likely_states(slds_x, y))) slds_z = slds.most_likely_states(slds_x, y) # Smooth the observations slds_y = slds.smooth(slds_x, y) # Plot the true and inferred states plt.figure(figsize=(8, 6)) xlim = (0, 200) plt.subplot(311) plt.imshow(np.column_stack((z, slds_z)).T, aspect="auto") plt.yticks([0, 1], ["$z_{{\\mathrm{{true}}}}$", "$z_{{\\mathrm{{inf}}}}$"]) plt.xlim(xlim) plt.subplot(312) plt.plot(x, '-k') plt.plot(slds_x, ':') plt.ylabel("$x$") plt.xlim(xlim) plt.subplot(313) plt.plot(y + 4 * np.arange(N), '-k') # plt.plot(slds_y + 4 * np.arange(N), ':') plt.ylabel("$y$") plt.xlabel("time") plt.xlim(xlim)
def gen_point_source_psf_image( u, # source location in equatorial coordinates image, # FitsImage object xlim = None, # compute model image only on patch defined ylim = None, # by xlim ylimcompute only for this patch check_overlap = True, # speedup to check overlap before computing return_patch = True, # return the small patch as opposed to large patch (memory/speed purposes) psf_grid = None, # cached PSF grid to be filled out pixel_grid = None # Nx2 matrix of discrete pixel values to evaluate mog at ): """ generates a PSF image (assigns density values to pixels) """ # compute pixel space location of source # returns the X,Y = Width, Height pixel coordinate corresponding to u # compute pixel space location, v_{n,s} v_s = image.equa2pixel(u) does_not_overlap = check_overlap and \ (v_s[0] < -50 or v_s[0] > 2*image.nelec.shape[0] or v_s[1] < -50 or v_s[0] > 2*image.nelec.shape[1]) if does_not_overlap: return None, None, None # create sub-image - make sure it doesn't go outside of field pixels if xlim is None and ylim is None: bound = image.R minx_b, maxx_b = max(0, int(v_s[0] - bound)), min(int(v_s[0] + bound + 1), image.nelec.shape[1]) miny_b, maxy_b = max(0, int(v_s[1] - bound)), min(int(v_s[1] + bound + 1), image.nelec.shape[0]) y_grid = np.arange(miny_b, maxy_b, dtype=np.float) x_grid = np.arange(minx_b, maxx_b, dtype=np.float) xx, yy = np.meshgrid(x_grid, y_grid, indexing='xy') pixel_grid = np.column_stack((xx.ravel(order='C'), yy.ravel(order='C'))) else: miny_b, maxy_b = ylim minx_b, maxx_b = xlim if pixel_grid is None: y_grid = np.arange(miny_b, maxy_b, dtype=np.float) x_grid = np.arange(minx_b, maxx_b, dtype=np.float) xx, yy = np.meshgrid(x_grid, y_grid, indexing='xy') pixel_grid = np.column_stack((xx.ravel(order='C'), yy.ravel(order='C'))) grid_shape = (maxy_b-miny_b, maxx_b-minx_b) #psf_grid_small = gmm_like_2d(x = pixel_grid, # ws = image.weights, ## mus = image.means + v_s, # sigs = image.covars) psf_grid_small = np.exp(mog_funs.mog_loglike(pixel_grid, means = image.means+v_s, icovs = image.invcovars, dets = np.exp(image.logdets), pis = image.weights)) # return the small patch and it's bounding box in the bigger fits_image if return_patch: return psf_grid_small.reshape(grid_shape, order='C'), \ (miny_b, maxy_b), (minx_b, maxx_b) # instantiate a PSF grid if psf_grid is None: psf_grid = np.zeros(image.nelec.shape, dtype=np.float) # create full field grid psf_grid[miny_b:maxy_b, minx_b:maxx_b] = \ psf_grid_small.reshape(xx.shape, order='C') return psf_grid, (0, psf_grid.shape[0]), (0, psf_grid.shape[1])
def forward(self, x, input, tag): inputs = np.column_stack((x, input)) for W, b in zip(self.weights, self.biases): outputs = np.dot(inputs, W) + b inputs = np.tanh(outputs) return outputs[:, None, :]
def _calc_pareto_front(self, n_pareto_points=100): x = 1 + anp.linspace(0, 1, n_pareto_points) * 30 pf = anp.column_stack([x, (self.m - 1) / x]) if self.normalize: pf = normalize(pf) return pf
def _evaluate(self, x, out, *args, **kwargs): f1 = 10 + (x[:, 0] - 2)**2 + x[:, 1]**4 + 5 * x[:, 2] f2 = x[:, 0]**2 + (x[:, 1] - 1)**2 + 4 * (x[:, 2] - 2)**3 f3 = 2 * (x[:, 0] + 2) + (x[:, 1] - 2)**3 + (x[:, 2] - 1)**2 out["F"] = anp.column_stack([f1, f2, f3])
def rdc(x, y, f=np.sin, k=20, s=1 / 6., n=1): """ Computes the Randomized Dependence Coefficient x,y: numpy arrays 1-D or 2-D If 1-D, size (samples,) If 2-D, size (samples, variables) f: function to use for random projection k: number of random projections to use s: scale parameter n: number of times to compute the RDC and return the median (for stability) According to the paper, the coefficient should be relatively insensitive to the settings of the f, k, and s parameters. Source: https://github.com/garydoranjr/rdc """ #x = x.reshape((len(x))) #y = y.reshape((len(y))) if n > 1: values = [] for i in range(n): try: values.append(rdc(x, y, f, k, s, 1)) except np.linalg.linalg.LinAlgError: pass return np.median(values) if len(x.shape) == 1: x = x.reshape((-1, 1)) if len(y.shape) == 1: y = y.reshape((-1, 1)) # Copula Transformation cx = np.column_stack([rankdata(xc, method='ordinal') for xc in x.T]) / float(x.size) cy = np.column_stack([rankdata(yc, method='ordinal') for yc in y.T]) / float(y.size) # Add a vector of ones so that w.x + b is just a dot product O = np.ones(cx.shape[0]) X = np.column_stack([cx, O]) Y = np.column_stack([cy, O]) # Random linear projections Rx = (s / X.shape[1]) * np.random.randn(X.shape[1], k) Ry = (s / Y.shape[1]) * np.random.randn(Y.shape[1], k) X = np.dot(X, Rx) Y = np.dot(Y, Ry) # Apply non-linear function to random projections fX = f(X) fY = f(Y) # Compute full covariance matrix C = np.cov(np.hstack([fX, fY]).T) # Due to numerical issues, if k is too large, # then rank(fX) < k or rank(fY) < k, so we need # to find the largest k such that the eigenvalues # (canonical correlations) are real-valued k0 = k lb = 1 ub = k while True: # Compute canonical correlations Cxx = C[:k, :k] Cyy = C[k0:k0 + k, k0:k0 + k] Cxy = C[:k, k0:k0 + k] Cyx = C[k0:k0 + k, :k] eigs = np.linalg.eigvals( np.dot(np.dot(np.linalg.pinv(Cxx), Cxy), np.dot(np.linalg.pinv(Cyy), Cyx))) # Binary search if k is too large if not (np.all(np.isreal(eigs)) and 0 <= np.min(eigs) and np.max(eigs) <= 1): ub -= 1 k = (ub + lb) // 2 continue if lb == ub: break lb = k if ub == lb + 1: k = ub else: k = (ub + lb) // 2 return np.sqrt(np.max(eigs))
# Speficy whether or not to save figures save_figures = True # In[2]: # Set the parameters of the HMM T = 200 # number of time bins K = 5 # number of discrete states D = 2 # data dimension # Make an HMM true_hmm = HMM(K, D, observations="gaussian") # Manually tweak the means to make them farther apart thetas = np.linspace(0, 2 * np.pi, K, endpoint=False) true_hmm.observations.mus = 3 * np.column_stack( (np.cos(thetas), np.sin(thetas))) # In[3]: # Sample some data from the HMM z, y = true_hmm.sample(T) true_ll = true_hmm.log_probability(y) # In[4]: # Plot the observation distributions lim = .85 * abs(y).max() XX, YY = np.meshgrid(np.linspace(-lim, lim, 100), np.linspace(-lim, lim, 100)) data = np.column_stack((XX.ravel(), YY.ravel())) input = np.zeros((data.shape[0], 0)) mask = np.ones_like(data, dtype=bool)
def _evaluate(self, x, out, *args, **kwargs): f1 = - anp.sum(self.P * x, axis=1) f2 = anp.sum(x, axis=1) out["F"] = anp.column_stack([f1, f2]) out["G"] = (anp.sum(self.W * x, axis=1) - self.C)
for n in range(N): plt.subplot(N, 1, n + 1) for lmbda in rates: plt.plot(lmbda[:, n]) plt.xlabel("time") plt.ylabel("$\\lambda_{}(t)$".format(n + 1)) plt.suptitle("Simulated firing rates") # In[6]: # Plot the nonlinear firing rate map for neuron 1 xmin, xmax = np.concatenate(xs).min(), np.concatenate(xs).max() npts = 50 xx = np.linspace(xmin - 1, xmax + 1, npts) XX, YY = np.meshgrid(xx, xx) XY = np.column_stack((XX.ravel(), YY.ravel())) tuning_curves = true_lds.smooth(XY, np.zeros((npts**2, N))) assert np.all(tuning_curves > 0) # In[7]: vmax = 1.1 * tuning_curves.max() plt.figure(figsize=(12, 12)) splt = 3 for i in range(splt): for j in range(splt): n = i * splt + j if n < N: ax = plt.subplot(splt, splt, n + 1) im = plt.imshow(tuning_curves[:, n].reshape((npts, npts)), vmin=0,
def gen_galaxy_prof_psf_image(prof_type, R, u, img, return_patch=True, xlim=None, ylim=None): """ generate the profile galaxy psf image given: - prof_type : either 'exp' or 'dev' - R_s : the rotation of the ellipse (like a Cholesky decomposition of a Covariance matrix) - u_s : center of the profile """ assert galaxy_prof_dict.has_key(prof_type), "unknown galaxy profile type" v_s = img.equa2pixel(u) # convolve image PSF and galaxy profile (generate mixture components) weights, means, covars = \ celeste_fast.gen_galaxy_prof_psf_mixture_params( W = np.dot(R, R.T), #np.ndarray[FLOAT_t, ndim=2] W, v_s = v_s, #np.ndarray[FLOAT_t, ndim=1] v_s, image_ws = img.weights, #np.ndarray[FLOAT_t, ndim=1] image_ws, image_means = img.means, #np.ndarray[FLOAT_t, ndim=2] image_means, image_covars = img.covars, #np.ndarray[FLOAT_t, ndim=3] image_covars, gal_prof_amp = galaxy_prof_dict[prof_type].amp, #np.ndarray[FLOAT_t, ndim=1] gal_prof_amp, gal_prof_sigs = galaxy_prof_dict[prof_type].var[:,0,0] #np.ndarray[FLOAT_t, ndim=1] gal_prof_sigs, ) ERROR = 0.00001 bound = calc_bounding_radius(weights, means, covars, ERROR, center=v_s) if xlim is not None and ylim is not None: minx_b, maxx_b = xlim miny_b, maxy_b = ylim else: minx_b, maxx_b = max(0, int(v_s[0] - bound)), min(int(v_s[0] + bound + 1), img.nelec.shape[1]) miny_b, maxy_b = max(0, int(v_s[1] - bound)), min(int(v_s[1] + bound + 1), img.nelec.shape[0]) y_grid = np.arange(miny_b, maxy_b, dtype=np.float) x_grid = np.arange(minx_b, maxx_b, dtype=np.float) xx, yy = np.meshgrid(x_grid, y_grid, indexing='xy') sub_pix_grid = np.column_stack((xx.ravel(order='C'), yy.ravel(order='C'))) psf_grid_small = gmm_like_2d(x=sub_pix_grid, ws=weights, mus=means, sigs=covars) if return_patch: return psf_grid_small.reshape(xx.shape, order='C'), (miny_b, maxy_b), (minx_b, maxx_b) # create full field grid psf_grid = np.zeros(img.nelec.shape) psf_grid[miny_b:maxy_b, minx_b:maxx_b] = \ psf_grid_small.reshape(xx.shape, order='C') return psf_grid, (0, psf_grid.shape[0]), (0, psf_grid.shape[1])
def rec2matrix(samps): return np.column_stack(( logit(samps['theta']), logit(samps['phi']/np.pi), np.log(samps['sigma']), samps['fluxes']))
def _evaluate(self, x, out, *args, **kwargs): l = [] for i in range(x.shape[1] - 1): val = 100 * (x[:, i + 1] - x[:, i] ** 2) ** 2 + (1 - x[:, i]) ** 2 l.append(val) out["F"] = anp.sum(anp.column_stack(l), axis=1)