def training_core(c, xi, yin, lambdas, tol, tau, eta): #implements the gradient descent time marching method for Total Variation learning #this is #lambda is the regularization parameter #c is the radial basis function parameter #tau is the step-size of the gradient descent method #tol is tolerance for the stopping criteria dim1, dim2 = xi.shape w = np.random.random((dim1, 1)) PSI = psi(xi, c, xi, w) w = np.linalg.inv(PSI.T.dot(PSI) + eta * np.identity(dim1)).dot( PSI.T.dot(yin)) w = np.reshape(w, (dim1, 1)) nr = 1 i = 0 while nr > tol: if i == 50: break i = i + 1 PSI = psi(xi, c, xi, w) DUDT = dudtv(c, xi, w, yin, lambdas) residual = np.linalg.inv(PSI.T.dot(PSI) + eta * np.identity(dim1)).dot( PSI.T.dot(DUDT)) w = w + tau * residual nr = np.linalg.norm(residual) / len(w) #print('iter= %3.0i, rel.residual= %1.2e' % (i,nr)) yout = psi(xi, c, xi, w).dot(w).T inds = np.where(yout > 0) yout[inds] = 1 inds = np.where(yout < 0) yout[inds] = -1 return yout, w
def constraint(x): arr_Y_pos, arr_Y_neg, arr_a_pos, arr_a_neg = expand(x) arr_C_pos = list() arr_C_neg = list() def conj(z): return np.real(z) - 1j * np.imag(z) for i in range(n_decomp): arr_C_pos.append(conj(arr_Y_pos[i].T) @ arr_Y_pos[i]) arr_C_neg.append(conj(arr_Y_neg[i].T) @ arr_Y_neg[i]) retvec = np.array([]) # TP constraint for i in range(n_decomp): pt = anp_partial_trace(arr_C_pos[i], [2**n_qubits, 2**n_qubits], 1) vec = (pt - arr_a_pos[i] * np.identity(2**n_qubits)).flatten() retvec = np.hstack([retvec, vec]) pt = anp_partial_trace(arr_C_neg[i], [2**n_qubits, 2**n_qubits], 1) vec = (pt - arr_a_neg[i] * np.identity(2**n_qubits)).flatten() retvec = np.hstack([retvec, vec]) # equality constraint C_sum = np.zeros_like(target_choi) for i in range(n_decomp): C_sum += arr_C_pos[i] - arr_C_neg[i] vec = (C_sum - target_choi).flatten() retvec = np.hstack([retvec, vec]) # separate complex and real part retvec = np.hstack([np.real(retvec), np.imag(retvec)]) return retvec
def rho(parameters, matrix_data, Y_data, sample_indices, kernel_keyword="RBF", reg=0.000001): kernel = kernels_dic[kernel_keyword] kernel_matrix = kernel(matrix_data, matrix_data, parameters) pi = pi_matrix(sample_indices, (sample_indices.shape[0], matrix_data.shape[0])) sample_matrix = np.matmul(pi, np.matmul(kernel_matrix, np.transpose(pi))) Y_sample = Y_data[sample_indices] lambda_term = reg inverse_data = np.linalg.inv(kernel_matrix + lambda_term * np.identity(kernel_matrix.shape[0])) inverse_sample = np.linalg.inv(sample_matrix + lambda_term * np.identity(sample_matrix.shape[0])) top = np.matmul(Y_sample.T, np.matmul(inverse_sample, Y_sample)) bottom = np.matmul(Y_data.T, np.matmul(inverse_data, Y_data)) return 1 - top / bottom
def BFGS(f, grad_f, hess_0, x): dims = len(x) H = hess_0 assert np.all(np.linalg.eig(H)[0] > 0) and np.all(H == H.T), "Initial hessian must be SPD" positions = [x] while np.linalg.norm(grad_f(x)) > 1e-7: p = -np.matmul(np.linalg.inv(hess_0), grad_f(x)) # p /= np.linalg.norm(p) a = get_line_length(f, grad_f, x, p, a_max=10) x_next = x + a*p s = x_next - x y = grad_f(x_next) - grad_f(x) rho = 1 / np.matmul(y, s) # Note the second term is a scalar (rho) multiplied by a matrix t1 = (np.identity(dims) - rho * np.matmul(s[:,np.newaxis], y[np.newaxis,:])) t2 = (np.identity(dims) - rho * np.matmul(y[:,np.newaxis], s[np.newaxis,:])) t3 = rho * np.matmul(s[:,np.newaxis], s[np.newaxis,:]) H = many_matmul(t1, H, t2) + t3 x = x_next positions.append(x) return np.array(positions)
def log_transition_matrices(self, data, input, mask, tag): T, D = data.shape # Previous state effect log_Ps = np.tile(self.log_Ps[None, :, :], (T - 1, 1, 1)) # Input effect log_Ps = log_Ps + np.dot(input[1:], self.Ws.T)[:, None, :] # Past observations effect #Off diagonal elements of transition matrix (state switches), from past observations log_Ps_offdiag = np.tile( np.dot(data[:-1], self.Rs.T)[:, None, :], (1, self.K, 1)) mult_offdiag = 1 - np.tile( np.identity(self.K)[None, :, :], (log_Ps_offdiag.shape[0], 1, 1)) #Diagonal elements of transition matrix (stickiness), from past observations log_Ps_diag = np.tile( np.dot(data[:-1], self.Ss.T)[:, None, :], (1, self.K, 1)) mult_diag = np.tile( np.identity(self.K)[None, :, :], (log_Ps_diag.shape[0], 1, 1)) log_Ps = log_Ps_diag * mult_diag #Diagonal elements (stickness) from past observations log_Ps = log_Ps + np.identity( self.K) * self.s #Diagonal elements (stickness) bias log_Ps = log_Ps + log_Ps_offdiag * mult_offdiag #Off diagonal elements (state switching) from past observations log_Ps = log_Ps + (1 - np.identity( self.K)) * self.r #Off diagonal elements (state switching) bias return log_Ps - logsumexp(log_Ps, axis=2, keepdims=True) #Normalize
def run_fold(X, y, Xt, yt): var1, var2, scale1, scale2 = optimimize_hyper(X, y, max_em_itt=20, learning_rate=1e-3, verbose=True) K1 = var1 * Matern32(X, X, scale1) K2 = var2 * Matern32(X, X, scale2) print('\n') print('Var1: %3.2f' % var1) print('Var2: %3.2f' % var2) print('Scale1: %3.2f' % scale1) print('Scale2: %3.2f' % scale2) print('\n') print(100 * '-') print('Predicting...') print(100 * '-') mu1, Sigma1, mu2, Sigma2 = run_ep(X, y, K1, K2, max_itt=max_itt) Ktf1 = var1 * Matern32(Xt, X, scale1) Ktf2 = var2 * Matern32(Xt, X, scale2) Ktt1 = var1 * Matern32(Xt, Xt, scale1) Ktt2 = var2 * Matern32(Xt, Xt, scale2) L1 = np.linalg.cholesky(K1 + 1e-8 * np.identity(len(X))) L2 = np.linalg.cholesky(K2 + 1e-8 * np.identity(len(X))) g1 = np.linalg.solve(L1, Ktf1.T) g2 = np.linalg.solve(L2, Ktf2.T) h1 = np.linalg.solve(L1.T, g1) h2 = np.linalg.solve(L2.T, g2) ft1_mean = g1.T @ np.linalg.solve(L1, mu1) ft2_mean = g2.T @ np.linalg.solve(L2, mu2) ft1_cov = Ktt1 - g1.T @ g1 + h1.T @ Sigma1 @ h1 ft2_cov = Ktt2 - g2.T @ g2 + h2.T @ Sigma2 @ h2 print(100 * '-') print('Computing NLPD') print(100 * '-') # compute log predictive density for ytest set nlpds = [] for i in range(len(Xt)): Zn, m11, v1, m21, v2 = compute_moments_hsced([], yt[i], ft1_mean[i], ft1_cov[i, i], ft2_mean[i], ft2_cov[i, i], num_points=num_points) nlpds.append(-np.log(Zn)) return np.mean(nlpds)
def main(): args = parse_args() seed = args.seed or np.random.randint(10000) print("Using seed: %r" % seed) np.random.seed(seed) d = 20 r = 6 T = 500 n_pred = 250 n_iter = 500 var = 0.1 data = generate_normal_data(nonlinearity, d=d, T=T, n_pred=n_pred, r=r, var=var) C0 = 0.1 * np.random.randn(d, r) theta0 = 0.1 * np.random.rand(r, 1) v0 = 0.1 V0 = np.kron(v0, np.eye(r)) mu0 = np.zeros([r, 1]) P0 = np.zeros([r, r]) Qs = {k: 0 * np.identity(r) for k in range(T + 1)} Rs = {k: np.identity(d) for k in range(T + 1)} psmf = PSMFIterSynthetic(theta0, C0, V0, mu0, P0, Qs, Rs, nonlinearity) psmf.run( data["y_train"], data["y_obs"], data["theta_true"], data["C_true"], data["x_true"], T, n_iter, n_pred, adam_gam=1e-3, live_plot=args.live_plot, verbose=args.verbose, ) output_files = dict( fit=args.output_fit, bases=args.output_bases, cost_y=args.output_cost_y, cost_theta=args.output_cost_theta, ) psmf.figures_save( data["y_obs"], n_pred, T, x_true=data["x_true"], output_files=output_files, ) psmf.figures_close()
def entanglement_row(): if full_connectivity: if n_qubits == 1: return np.identity(2) elif n_qubits == 2: return CX elif n_qubits == 3: return ICX @ CIX @ CXI else: if n_qubits == 1: return np.identity(2) elif n_qubits == 2: return CX elif n_qubits == 3: return ICX @ CXI raise
def construct_P_matrices(springs, n_points, d): return numpy.array([ numpy.concatenate(( numpy.concatenate((numpy.zeros((d, d * s_k[0])), numpy.identity(d), numpy.zeros( (d, d * n_points - d * (s_k[0] + 1)))), axis=1), numpy.concatenate((numpy.zeros((d, d * s_k[1])), numpy.identity(d), numpy.zeros( (d, d * n_points - d * (s_k[1] + 1)))), axis=1), )) for s_k in springs ])
def dPositionVectorde(self, x, mu): """ derivatives of position vector wrt eccentricity vector typed """ TA = x[5] cTA = np.cos(TA) sTA = np.sin(TA) h = self.hVector(x) e = self.eVector(x, mu) hMag = self.hMag(x) eMag = self.eMag(x, mu) eUnit = e / eMag crossProduct = np.cross(h, e) crossProductMag = np.linalg.norm(crossProduct) crossProductUnit = crossProduct / crossProductMag factor = hMag**2 / mu #term1_old = np.outer((-cTA / ((1.0 + eMag * cTA)**2)) * eUnit, eUnit * cTA + crossProduct / crossProductMag * sTA) term1 = (-cTA / (1. + eMag * cTA)**2) * np.outer( cTA * eUnit + sTA * crossProductUnit, eUnit) factor2 = 1.0 / (1.0 + eMag * cTA) #term2_old = factor2 * ((cTA / eMag) * (np.identity(3) - (1.0 / eMag**2) * np.outer(e, e)) + (sTA / crossProductMag) * np.dot((np.identity(3) - (1.0 / crossProductMag**2) * np.outer(crossProduct, crossProduct)), self.mathUtil.crossmat(h))) term2 = factor2 * ( (cTA / eMag) * (np.identity(3) - (1. / eMag**2) * np.outer(e, e)) + (sTA / crossProductMag) * np.dot( (np.identity(3) - (1. / crossProductMag**2) * np.outer( crossProduct, crossProduct)), self.mathUtil.crossmat(h))) #term2 = term2_old # print(term1_old) # print("\n") # print(term1) # print("\n") # print("\n") # print("\n") # print("\n") # print(term2_old) # print("\n") # print(term2) # print("\n") # print(term3) # print("\n") drde = factor * (term1 + term2) return drde
def dPositionVectordh(self, x, mu): """ derivatives of position vector wrt angular momentum vector typed """ TA = x[5] cTA = np.cos(TA) sTA = np.sin(TA) h = self.hVector(x) e = self.eVector(x, mu) hMag = self.hMag(x) eMag = self.eMag(x, mu) eUnit = e / eMag crossProduct = np.cross(h, e) crossProductMag = np.linalg.norm(crossProduct) factor = 1.0 / (mu * (1.0 + eMag * cTA)) term1 = cTA * np.outer(eUnit, 2.0 * h) #term2 = sTA * np.dot((1.0 / crossProductMag) * (np.identity(3) - (1.0 / crossProductMag**2) * np.outer(crossProduct, crossProduct)), -self.mathUtil.crossmat(e)) term2 = (sTA / crossProductMag) * ( 2. * np.outer(crossProduct, h) + hMag**2 * np.dot( (-np.identity(3) + (1. / crossProductMag**2) * np.outer( crossProduct, crossProduct)), self.mathUtil.crossmat(e))) drdh = factor * (term1 + term2) return drdh
def get_marginal(self, u, V, R, x_test): ''' current metric to test convergence-- log space predictive marginal likelihood ''' I = self.sigx*np.identity(self.dimx) mu = np.zeros(self.dimx,) n_samples = 200 ll = 0 test_size = x_test.shape[0] for i in xrange(test_size): x = x_test[i] mc = 0 for j in xrange(n_samples): w = self.sample_w(u, V) var = np.dot(w, np.transpose(w)) var = np.add(var, I) px = gaussian.Gaussian_full(mu, var) px = px.eval(x)#eval_log_properly(x) mc = mc + px mc = mc/float(n_samples) mc = np.log(mc) ll += mc return (ll/float(test_size))
def channel_trace(num_q, num_anc): # kraus ops: Id x <x| kraus_ops = list() for x in range(2**num_anc): xbra = np.eye(1, 2**num_anc, x) kraus_ops.append(np.kron(np.identity(2**num_q), xbra)) return Kraus(kraus_ops)
def dVelocityVectorde(self, x, mu): """ derivatives of Velocity vector wrt eccentricity vector typed """ TA = x[5] sTA = np.sin(TA) cTA = np.cos(TA) h = self.hVector(x) e = self.eVector(x, mu) hCross = self.mathUtil.crossmat(h) hMag = np.linalg.norm(h) eMag = np.linalg.norm(e) crossProduct = np.cross(h, e) crossProductMag = np.linalg.norm(crossProduct) term1 = sTA * (1.0 / eMag) * (np.identity(3) - (1.0 / eMag**2) * np.outer(e, e)) term21 = np.outer((1.0 / crossProductMag) * crossProduct, (1.0 / eMag) * e) term22 = (eMag + cTA) * (1.0 / crossProductMag) * ( hCross - (1.0 / crossProductMag**2) * np.dot(np.outer(crossProduct, crossProduct), hCross)) term2 = -(term21 + term22) factor = -mu / hMag dvde = factor * (term1 + term2) return dvde
def P_i(T, a, idx): """ Probability of absorbtions given an observed chain: We need to partition the transition matrix $$ T = \begin{pmatrix} Q & R \\ 0 & I \end{pmatrix} $$ where: $Q$: the non-absorbing transitions, $R$: non-absorbing to absorbing transitions Then, probability of being absorbed is given as $$P = (I-Q)^{-1} R$$ In this case, we only want the probability of transitioning from the most recent state to the current absorbing state. """ a_trans = np.array(a)[0:idx] # visited a_absrb = np.array(a)[idx:len(a) - idx] # not visited Q = T[a_trans, :][:, a_trans] R = T[a_trans, :][:, a_absrb] I = np.identity(Q.shape[0]) P = np.dot(np.linalg.pinv(I - Q), R) return P[-1, 0] # ...from previous state (P[-1,:] by construction) into next
def __init__(self, state_dim: np.ndarray, target_state: np.ndarray = None, weights: np.ndarray = None, cost_width: np.ndarray = None): """ Initialize saturated loss function :param state_dim: state dimensionality :param target_state: target state which should be reached :param weights: weight matrix :param cost_width: TODO what is this """ self.state_dim = state_dim # set target state to all zeros if not other specified self.target_state = np.atleast_2d( np.zeros(self.state_dim) if target_state is None else target_state) # weight matrix self.weights = np.identity( self.state_dim) if weights is None else weights # ----------------------------------------------------- # This is only useful if we have any penalties etc. self.cost_width = np.array([1]) if cost_width is None else cost_width
def __init__(self, simspark_ip='localhost', simspark_port=3100, teamname='DAInamite', player_id=0, sync_mode=True): super(ForwardKinematicsAgent, self).__init__(simspark_ip, simspark_port, teamname, player_id, sync_mode) self.transforms = {n: identity(4) for n in self.joint_names} # chains defines the name of chain and joints of the chain self.chains = { 'Head': ['HeadYaw', 'HeadPitch'], 'LArm': ['LShoulderPitch', 'LShoulderRoll', 'LElbowYaw', 'LElbowRoll'], 'LLeg': [ 'LHipYawPitch', 'LHipRoll', 'LHipPitch', 'LKneePitch', 'LAnklePitch', 'LAnkleRoll' ], 'RLeg': [ 'RHipYawPitch', 'RHipRoll', 'RHipPitch', 'RKneePitch', 'RAnklePitch', 'RAnkleRoll' ], 'RArm': ['RShoulderPitch', 'RShoulderRoll', 'RElbowYaw', 'RElbowRoll'] }
def forward_kinematics_2(self, effector_name, thetas): T = identity(4) for i, joint in enumerate(self.chains[effector_name]): angle = thetas[i] Tl = self.local_trans(joint, angle) T = dot(T, Tl) return T
def optimize_p0_stochastic_gradient_descent(xs, ns, K=100, init_Sigma=None, batch_SNP_size=1, batch_sim_size=None, p0s=None): if batch_sim_size is None: batch_sim_size = K x0s = xs[0, :] n0s = ns[0, :] if p0s is None: p0s = sim_p0s(x0s, n0s, K) n = xs.shape[0] - 1 no_SNPs = xs.shape[1] if init_Sigma is None: init_Sigma = (np.identity(n) - 0.1) + 0.1 y = matrix_to_vector(init_Sigma) lik = get_clean_p0_llik(xs, ns, p0s, sign=1.0) glik = grad(lik) liks, gliks = get_partial_lik0s(no_SNPs, batch_SNP_size, K, batch_sim_size, xs, ns, p0s, sign=1.0) return sgd(liks, gliks, y=y, big_eval=lik, evals=10000)
def learn_maxpl(imgs): """Learn the weights and bias for the Hopfield network by maximizing the pseudo log-likelihood.""" img_size = np.prod(imgs[0].shape) fake_weights = np.random.normal(0, 0.1, (img_size, img_size)) bias = np.random.normal(0, 0.1, (img_size)) diag_mask = np.ones((img_size, img_size)) - np.identity(img_size) def objective(params, iter): fake_weights, bias = params weights = np.multiply((fake_weights + fake_weights.T) / 2, diag_mask) pll = 0 for i in range(len(imgs)): img = np.reshape(imgs[i], -1) activations = np.matmul(weights, img) + bias output = sigmoid(activations) eps = 1e-10 img[img < 0] = 0 pll += np.sum(np.multiply(img, np.log(output+eps)) + np.multiply(1-img, np.log(1-output+eps))) if iter % 100 == 0: print(-pll) return -pll g = grad(objective) fake_weights, bias = sgd(g, (fake_weights, bias), num_iters=300, step_size=0.001) weights = np.multiply((fake_weights + fake_weights.T) / 2, diag_mask) plt.imsave('weights_mpl.jpg', weights) return weights, bias
def adaptive_hmc(U, K, grad_U, mass, inv_mass, iters, q_0, integrator): D = q_0.shape[1] q_hist = [] q_hist.append(q_0.reshape(D, )) accepted_num = 0 cur_q = q_0.copy() for i in range(iters): nxt_q = integrator(U, K, grad_U, cur_q, mass, inv_mass, L=200, eps=0.05) if np.any(nxt_q != cur_q): accepted_num += 1 q_hist.append(np.asarray(nxt_q.reshape(D, ))) cur_q = nxt_q if i % 50 == 0: print("progressed {}%".format(i * 100 / iters)) if i % 1000 and len(q_hist) > 200: # every 1000 iterations, we re-estimate the covariance of estimated target mass = adaptive_metric(q_hist[-200:]) inv_mass = np.linalg.inv(mass + np.identity(D) * 1e-5) print("The acceptance rate is {}".format(accepted_num / iters)) #corrplot(np.asarray(q_hist)) return q_hist
def _update_weights_output(self, lambda0): # Ridge Regression E_lambda0 = np.identity(self.num_reservoir_nodes) * lambda0 # lambda0 inv_x = np.linalg.inv( self.log_reservoir_nodes.T @ self.log_reservoir_nodes + E_lambda0) # update weights of output layer self.weights_output = ( inv_x @ self.log_reservoir_nodes.T) @ self.inputs
def update_posterior(nu, tau, K): s_sqrt = np.diag(np.sqrt(tau)) B = np.identity(len(nu)) + s_sqrt @ K @ s_sqrt L = np.linalg.cholesky(B) V = np.linalg.solve(L, s_sqrt @ K) Sigma = K - V.T @ V mu = Sigma @ nu return mu, Sigma, L
def f3_grad(x): B = np.array([[3, -1], [-1, 3]]) a = np.array([[1], [0]]) b = np.array([[0], [-1]]) c1 = 1 - np.exp(-np.dot((x - a).transpose(), (x - a))) \ - np.exp(-np.dot(np.dot((x - b).transpose(), B), (x-b))) \ + 1/10 * np.log(np.linalg.det(np.dot(1/100, np.identity(2)) + np.dot(x, x.transpose()))) return c1
def test_matrix_functions(n): dim = 3 + int(4 * np.random.rand()) print(dim) matrix = [] for i in range(dim): row = [] for j in range(dim): row.append( pe.pseudo_Obs(np.random.rand(), 0.2 + 0.1 * np.random.rand(), 'e1')) matrix.append(row) matrix = np.array(matrix) @ np.identity(dim) # Check inverse of matrix inv = pe.linalg.mat_mat_op(np.linalg.inv, matrix) check_inv = matrix @ inv for (i, j), entry in np.ndenumerate(check_inv): entry.gamma_method() if (i == j): assert math.isclose( entry.value, 1.0, abs_tol=1e-9), 'value ' + str(i) + ',' + str(j) + ' ' + str( entry.value) else: assert math.isclose( entry.value, 0.0, abs_tol=1e-9), 'value ' + str(i) + ',' + str(j) + ' ' + str( entry.value) assert math.isclose( entry.dvalue, 0.0, abs_tol=1e-9), 'dvalue ' + str(i) + ',' + str(j) + ' ' + str( entry.dvalue) # Check Cholesky decomposition sym = np.dot(matrix, matrix.T) cholesky = pe.linalg.mat_mat_op(np.linalg.cholesky, sym) check = cholesky @ cholesky.T for (i, j), entry in np.ndenumerate(check): diff = entry - sym[i, j] diff.gamma_method() assert math.isclose(diff.value, 0.0, abs_tol=1e-9), 'value ' + str(i) + ',' + str(j) assert math.isclose(diff.dvalue, 0.0, abs_tol=1e-9), 'dvalue ' + str(i) + ',' + str(j) # Check eigh e, v = pe.linalg.eigh(sym) for i in range(dim): tmp = sym @ v[:, i] - v[:, i] * e[i] for j in range(dim): tmp[j].gamma_method() assert math.isclose(tmp[j].value, 0.0, abs_tol=1e-9), 'value ' + str(i) + ',' + str(j) assert math.isclose( tmp[j].dvalue, 0.0, abs_tol=1e-9), 'dvalue ' + str(i) + ',' + str(j)
def drfunc(self, theta): # (kN + kn) x n x T x N kgain = self.helper_vars['kgain'] ntrial = self.helper_vars['ntrial'] nroi = self.helper_vars['nroi'] rcurr = self.rfunc(theta) c, h = self.compute_ch(theta) # k x n, k x N drdh = c[:, :, np.newaxis, np.newaxis] * rcurr[np.newaxis] # k x n x T x N drdc = h[:, np.newaxis, np.newaxis, :] * rcurr[np.newaxis] # k x n x T x N drdh = drdh[:, np.newaxis] * np.identity(ntrial)[ np.newaxis, :, np.newaxis, np.newaxis, :] # k x N x n x T x N drdc = drdc[:, np.newaxis] * np.identity(nroi)[ np.newaxis, :, :, np.newaxis, np.newaxis] # k x n x n x T x N drdh = drdh.reshape((drdh.shape[0] * drdh.shape[1], ) + drdh.shape[2:]) drdc = drdc.reshape((drdc.shape[0] * drdc.shape[1], ) + drdc.shape[2:]) deriv = np.concatenate((drdh, drdc), axis=0) # (kN + kn) x n x T x N return deriv
def unit_vector_deriv(self, x): """Calculate derivative of unit vector x_unit w.r.t. its non-unit vector x""" dims = np.shape(x) n = np.amax(dims) xmag = self.column_vector_norm2(x) d_x_unit_d_x = (1. / xmag) * (np.identity(n) - (1. / xmag**2) * np.outer(x, x)) return d_x_unit_d_x
def M(time, hstep): Id = np.identity(Ndim) Om = Omega(time, time + hstep, A, alpha, order) if order == 4: C_ = Om * (Id - (1 / 12) * (Om**2)) elif order == 6: C_ = Om * (Id - (1 / 12) * (Om**2) * (1 - (1 / 10) * (Om**2))) M_ = np.linalg.inv(Id - 0.5 * C_) * (Id + 0.5 * C_) return M_
def get_other_params(q_init, x_fin, y_fin, z_fin): other_params = {} n = 50 other_params['n'] = 50 # weight's of cost function other_params['rho_vel'] = 100.0 other_params['rho_acc'] = 1000.0 other_params['rho_b'] = 1000.0 other_params['rho_jerk'] = 10000.0 other_params['rho_orient'] = 1000.0 other_params['rho_pos'] = 1000.0 # joint limits of franka manipulator q_min = np.array([-165.0, -100.0, -165.0, -165.0, -165.0, -1.0, -165.0 ]) * np.pi / 180 q_max = np.array([165.0, 101.0, 165.0, 1.0, 165.0, 214.0, 165.0 ]) * np.pi / 180 other_params['q_min_traj'] = np.hstack((q_min[0]*np.ones(n), q_min[1]*np.ones(n), q_min[2]*np.ones(n),\ q_min[3]*np.ones(n), q_min[4]*np.ones(n), q_min[5]*np.ones(n), \ q_min[6]*np.ones(n))) other_params['q_max_traj'] = np.hstack((q_max[0]*np.ones(n), q_max[1]*np.ones(n), q_max[2]*np.ones(n), \ q_max[3]*np.ones(n), q_max[4]*np.ones(n), q_max[5]*np.ones(n), \ q_max[6]*np.ones(n))) # first, second and third order difference matrices A = np.identity(n) other_params['A_vel'] = np.diff(A, axis=0) other_params['A_acc'] = np.diff(other_params['A_vel'], axis=0) other_params['A_jerk'] = np.diff(other_params['A_acc'], axis=0) # desired axis-angle for end effector roll_des = -87.2 * np.pi / 180 pitch_des = -41.0 * np.pi / 180 other_params['roll_des'] = roll_des other_params['pitch_des'] = pitch_des # mid point index other_params['mid_index'] = 25 other_params['q_1_init'] = q_init[0] other_params['q_2_init'] = q_init[1] other_params['q_3_init'] = q_init[2] other_params['q_4_init'] = q_init[3] other_params['q_5_init'] = q_init[4] other_params['q_6_init'] = q_init[5] other_params['q_7_init'] = q_init[6] other_params['x_fin'] = x_fin other_params['y_fin'] = y_fin other_params['z_fin'] = z_fin return other_params
def f3(x): a = np.array([[1], [0]]) b = np.array([[0], [-1]]) matrix = np.array([[3, -1], [-1, 3]]) return 1 - ( np.exp(-np.matmul(np.transpose(x - a), x - a)) + np.exp(-np.matmul(np.matmul(np.transpose(x - b), matrix), x - b)) - 0.1 * np.log( np.linalg.det(0.01 * np.identity(2) + np.matmul(x, np.transpose(x)))))
def ridgeData(X_train, Y_train, regularization_factor): # This wasn't tested for dim(X_train) != 2 or dim(Y_train) != 1 N, D = X_train.shape assert(Y_train.shape == (N,)) ridge_matrix = np.sqrt(ridge_precision) * np.identity(D) X_trainp = np.concatenate((X_train, ridge_matrix), 0) zeros = np.zeros([D for i in range(dim(Y_train))]) assert(dim(zeros) == dim(Y_train)) Y_trainp = np.concatenate((Y_train, zeros), 0) return X_trainp, Y_trainp
def evaluate_joint(self, x, z, w): ''' evaluates joint of x and z ''' pz = gaussian.Gaussian_full(np.zeros(self.dimz), np.eye(self.dimz)) pz = pz.eval_log(z.reshape((self.dimz,))) mu = np.dot(w,z).reshape((self.dimx,)) px = gaussian.Gaussian_full(mu, self.sigx*np.identity(self.dimx)) px = px.eval_log_properly(x) #----------------------------------------------------------EVAL return px+pz
def KL_two_gaussians(params): d = np.shape(params)[0]-1 mu = params[0:d,0] toSigma = params[0:d,1:d+1] intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma))) Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma)) muPrior = np.zeros(d) sigmaPrior = np.identity(d) #print Sigma #print np.linalg.det(Sigma) return 1/2*(np.log(np.linalg.det(Sigma)/np.linalg.det(sigmaPrior))-d+np.trace(np.dot(np.linalg.inv(Sigma),sigmaPrior))+np.dot(np.transpose(mu-muPrior),np.dot(np.linalg.inv(Sigma),mu-muPrior)))
def estimate_affine_transform(keypoints0, keypoints1): assert (keypoints0.shape == keypoints1.shape) keypoints0 = np.column_stack((keypoints0, np.ones(keypoints0.shape[0]))) params0 = irls.fit(keypoints0, keypoints1[:, 0]) params1 = irls.fit(keypoints0, keypoints1[:, 1]) M = np.identity(3) M[0] = params0 M[1] = params1 return AffineTransform(M)
def __init__(self, n, dimx, dimz): ''' n: size of dataset dimx: dimensions of observed variables dimz: dimensions of local latent variables ''' ''' Generative procedure global param W and latent variables are not visible ''' self.n = n self.sigx = 0.1 self.W = np.random.normal(0,1, size = (dimx,dimz)) self.dimz = dimz self.dimx = dimx #data data = util.generate_data(n, self.W, self.sigx, dimx, dimz) self.observed = data[0] self.latent = data[1] ''' Model Parameters: mean and precision ''' #SEP params f = dimx*dimz self.SEP_prior_mean = np.zeros(f).reshape((f,1)) self.SEP_prior_prec = np.identity(f) self.u = np.zeros(f).reshape((f,)) self.V = (1e-4)*np.eye(f) self.R = np.random.randn(dimz,dimx) self.S = np.identity(dimz) I = np.linalg.inv(self.sigx*np.eye(3)) S = np.eye(2) + np.dot(np.dot(self.W.T, I), self.W) S = np.linalg.inv(S) self.S = S
def KL_via_sampling(params,eps): #also need to include lognormal as a replacement for gamma distribution #this is giving log of negatives d = np.shape(params)[0]-1 mu = params[0:d,0] Sigma = params[0:d,1:d+1] di = np.diag_indices(d) Sigma[di] = np.exp(Sigma[di]) muPrior = np.zeros(d) sigmaPrior = np.identity(d) E = 0 for j in range(np.shape(eps)[0]): beta = mu+np.dot(Sigma,eps[j,:]) E+= np.log(normal_pdf(beta,mu,Sigma)/normal_pdf(beta,muPrior,sigmaPrior)) E = np.mean(E) return E
def generate_data(n, W, sigx, dimx, dimz): ''' generates factor analysis data ''' observed = np.zeros([n, dimx]) latent = np.zeros([n, dimz]) for i in xrange(n): #latent variable z = np.random.normal(0,1, size = (dimz,)) #observed mu = np.dot(W,z) cov = sigx*np.identity(dimx) x = np.random.multivariate_normal(mu, cov) observed[i] = x latent[i] = z return observed, latent
def true_marg(self, x_test): ''' returns true predictive marginal likelihood based on generative model params: W ''' I = self.sigx*np.identity(self.dimx) mu = np.zeros(self.dimx,) test_size = x_test.shape[0] test_size = x_test.shape[0] ll = 0 for i in range(test_size): x = x_test[i] var = np.dot(self.W, self.W.T) var = np.add(var, I) px = gaussian.Gaussian_full(mu, var) px = px.eval(x) ll += np.log(px) return (ll/float(test_size))
def marginal_likelihood(self, W0): a = self.sigx*np.identity(self.dimx) win = lambda w: np.dot(w, w.transpose()) + a const = lambda w: -(self.n/2.0)*np.log( np.linalg.det(win(w)) ) pdin = lambda w: np.linalg.inv( win(w) ) pd = lambda w,i: np.dot(np.dot(self.observed[i].transpose(), pdin(w)), self.observed[i]) final = lambda w: sum(pd(w, i) for i in range(self.n)) evidence = lambda w: - const(w) + 0.5*final(w) gradient = grad(evidence) ans, a = util.gradient_descent(evidence, W0) #plot learning curve plt.plot(a) plt.show() return ans
def __init__(self, n, dimx, dimz): ''' n: size of dataset dimx: dimensions of observed variables dimz: dimensions of local latent variables ''' ''' Generative procedure global param W and latent variables are not visible ''' self.n = n self.sigx = 0.1 np.random.seed(1234) self.W = np.random.normal(0,1, size = (dimx,dimz)) self.dimz = dimz self.dimx = dimx #data data = util.generate_data(n, self.W, self.sigx, dimx, dimz) self.observed = data[0] self.latent = data[1] ''' Model Parameters: mean and precision ''' #SEP params f = dimx*dimz self.SEP_prior_mean = np.zeros(f).reshape((f,1)) #fx1 self.SEP_prior_prec = np.identity(f) #fxf self.u = np.random.randn(f) self.V = 1e-4*np.eye(f) #recogntion model parameters self.R = np.random.randn(dimz,dimx) self.S = self.sigx*np.eye(dimz)
N_weights, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) f_out = open(filename, 'w') f_out.write(" Train err | Test err | Alpha\n") f_out.close() final_test_err = loss_fun(W, alpha, train_images, train_labels) print(N, final_test_err) return final_test_err N_data, train_images, train_labels, test_images, test_labels = get_wine_data() if __name__ == '__main__': # Initialize weights rs = npr.RandomState(11) param_scale = 0.1 max_N = 7 N = 4.5 num_init_weights = 36 + 9 * (max_N - 1) + 3 * max_N W = np.ravel(np.identity(int(np.sqrt(num_init_weights)) + 1)) run_nn_grad = grad(run_nn, 0) params = np.concatenate((np.array([N]), W)) optimize.minimize(run_nn, params, jac=run_nn_grad, method='BFGS', \ args=(12, 3), options={'disp': True})
print grad_ml(length_scale) print "Initial Parameters: ", init_params print "Optimized Parameters: ", cov_params.x opt_length_scale = np.exp(cov_params.x[0]) # Calculate the covariance matrices with optimized length scale # to "condition" based on the observed (top of page 16) ok_xx = calcSigma(x,x,opt_length_scale) ok_xxs = calcSigma(x,x_star,opt_length_scale) ok_xsx = calcSigma(x_star,x,opt_length_scale) ok_xsxs = calcSigma(x_star,x_star,opt_length_scale) # Update the mean and covariance from equations 2.22-2.24 of_bar_star_mean = ok_xsx.dot(np.linalg.inv(ok_xx+ (sigma_n**2)*np.identity(ok_xx.shape[0])).dot(y)) of_bar_star_cov = ok_xsxs - ok_xsx.dot(np.linalg.inv(ok_xx+ (sigma_n**2)*np.identity(ok_xx.shape[0])).dot(ok_xxs)) # Redraw the sample functions of_bar_star_sampled_values = sample_GP(n_samples*10,n_pts,of_bar_star_mean,of_bar_star_cov) # Get mean and spread of newly sampled values ofunc_bar_mean, ofunc_bar_lower, ofunc_bar_upper = calculate_func_mean_and_variance(of_bar_star_sampled_values) # Plot the results plt.figure() plt.plot(x_star,of_bar_star_sampled_values[:,0],x_star,of_bar_star_sampled_values[:,1],x_star,of_bar_star_sampled_values[:,2]) plt.fill_between(x_star.flatten(),ofunc_bar_lower,ofunc_bar_upper,color='0.15',alpha=0.25) plt.xlabel('input, x') plt.ylabel('output, f(x)') plt.title("Prediction using noisy observations, with optimized length scale")
return bernoulli(pi,yi) def get_pi(beta,xi,alpha_i): xi = np.insert(xi, 0, 1) return logistic(np.dot(beta,xi)+alpha_i) def bernoulli(pi, yi): return (pi**yi)*((1-pi)**(1-yi)) def logistic(x): return 1 / (1 + np.exp(-x)) if __name__=='__main__': #create some data with beta = 2 d = 3 params = np.random.normal(0,1,(d+1,d+1)) cov = np.identity(d) params[0:d,1:d+1] = cov print params #generate_data(beta,tau,n,num_times) X,y = generate_data(np.array([0.5,0.8,1]),1,20,4)#537 #test likelihood for several beta values, beta = 2 should give high likelihood m = np.zeros((d+1,d+1)) v = np.zeros((d+1,d+1)) for i in range(10): params,m,v =iterate(params,y,X,i,m,v,30) mu = params[0:d,0] print mu # eps = np.random.rand(50) # print lower_bound(params,y,X,eps)
def KL_two_gaussians(params): mu = params[0:len(params)/2] Sigma = np.diag(np.exp(params[len(params)/2:])) muPrior = np.zeros(d) sigmaPrior = np.identity(d) return 1/2*(np.log(np.linalg.det(Sigma)/np.linalg.det(sigmaPrior))-d+np.trace(np.dot(np.linalg.inv(Sigma),sigmaPrior))+np.dot(np.transpose(mu-muPrior),np.dot(np.linalg.inv(Sigma),mu-muPrior)))
def marg_likelihood(x, y, l): k_xx = calcSigma(x,x,l) marg_data = 0.5* np.dot(y.T,np.dot(np.linalg.inv(k_xx+ (noise_var**2)*np.identity(k_xx.shape[0])),y)) - 0.5 * \ np.log(np.linalg.det(np.linalg.inv(k_xx+ (noise_var**2)*np.identity(k_xx.shape[0])))) - (len(y)*0.5) * np.log(2*np.pi) return -1.0*marg_data
init_params = 0.1 * rs.randn(num_params) grad_ml = grad(g_ml) cov_params = minimize(value_and_grad(g_ml),init_params,jac=True, method = 'CG') print marg_likelihood(x_train,y_train,length_scale) print grad_ml(length_scale) print "Initial Parameters: ", init_params print "Optimized Parameters: ", cov_params.x opt_length_scale = np.exp(cov_params.x[0]) import pdb pdb.set_trace() Omg = np.linalg.inv( K + ((noise_var/2.)**2*np.identity(n_train)) ) Beta = np.dot(Omg,y_train).reshape((-1,1)) post_mean = np.dot(ks.T,Beta) post_var = kss - ks.T.dot(Omg.dot(ks)) # Sample from prior prior_sampled_values = sample_GP(n_samples,n_pts,np.zeros(n_pts),kss) # Sample from posterior predictive distribution post_sampled_values = sample_GP(n_samples*100,n_pts,post_mean.flatten(),post_var) # Get mean and spread of newly sampled values f_post_mean, f_post_lower, f_post_upper = calculate_func_mean_and_variance(post_sampled_values)