def spcol(knots, order, tau): """Return collocation matrix. Minimal emulation of MATLAB's ``spcol``. Parameters: knots: rank-1 array, knot vector (with appropriately repeated endpoints; see `augknt`, `aptknt`) order: int, >= 0, order of spline tau: rank-1 array, collocation sites Returns: rank-2 array A such that A[i,j] = D**{m(i)} B_j(tau[i]) where m(i) = multiplicity of site tau[i] D**k = kth derivative (0 for function value itself) """ m = knt2mlt(tau) B = bspline.Bspline(knots, order) dummy = B(0.) nbasis = len( dummy) # perform dummy evaluation to get number of basis functions A = np.empty((tau.shape[0], nbasis), dtype=dummy.dtype) for i, item in enumerate(zip(tau, m)): taui, mi = item f = B.diff(order=mi) A[i, :] = f(taui) return A
def bspline_fit(X, order, nknots): X = X.squeeze() if len(X.shape) > 1: raise ValueError('Bspline method only works for a single covariate.') knots = np.linspace(X.min(), X.max(), nknots) k = splinelab.augknt(knots, order) bsp_basis = bspline.Bspline(k, order) return bsp_basis
def create_bspline_basis(xmin, xmax, p=3, nknots=5): """ compute a Bspline basis set where: :param p: order of spline (3 = cubic) :param nknots: number of knots (endpoints only counted once) """ knots = np.linspace(xmin, xmax, nknots) k = splinelab.augknt(knots, p) # pad the knot vector B = bspline.Bspline(k, p) return B
def test(): ######################## # config ######################## p = 3 # order of spline basis (as-is! 3 = cubic) nknots = 5 # for testing: number of knots to generate (here endpoints count only once) tau = [0.1, 0.33] # collocation sites (i.e. where to evaluate) ######################## # usage example ######################## knots = np.linspace( 0, 1, nknots) # create a knot vector without endpoint repeats k = splinelab.augknt( knots, p) # add endpoint repeats as appropriate for spline order p B = bspline.Bspline(k, p) # create spline basis of order p on knots k # build some collocation matrices: # A0 = B.collmat(tau) # function value at sites tau A2 = B.collmat(tau, deriv_order=2) # second derivative at sites tau print(A0) print(A2) ######################## # tests ######################## # number of basis functions n_interior_knots = len(knots) - 2 n_basis_functions_expected = n_interior_knots + (p + 1) n_basis_functions_actual = len( B(0.)) # perform dummy evaluation to get number of basis functions assert n_basis_functions_actual == n_basis_functions_expected, "something went wrong, number of basis functions is incorrect" # partition-of-unity property of the spline basis assert np.allclose( np.sum(A0, axis=1), 1.0 ), "something went wrong, the basis functions do not form a partition of unity"
def initialization(self, data, x): """ Feed in data and initialize variables needed in DP solution. :return: Dictionary of variables. """ var_dict = {} var_dict['ds'] = data[:, 1:self.T + 1] - 1 / self.gamma * data[:, 0:self.T] var_dict['ds_hat'] = var_dict['ds'] - np.mean(var_dict['ds'], axis=0) var_dict['pi'] = np.zeros((self.n_mc, self.T + 1)) var_dict['pi_hat'] = np.zeros_like(var_dict['pi']) var_dict['action'] = np.zeros_like(var_dict['pi']) var_dict['q'] = np.zeros_like(var_dict['pi']) var_dict['reward'] = np.zeros_like(var_dict['pi']) var_dict['pi'][:, self.T] = np.maximum(data[:, self.T] - self.K, 0) var_dict['pi_hat'][:, self.T] = var_dict['pi'][:, self.T] - np.mean( var_dict['pi'][:, self.T]) var_dict['action'][:, self.T] = 0 var_dict['q'][:, self.T] = -var_dict['pi'][:, self. T] - self.risk_lambda * np.var( var_dict['pi'][:, self.T]) var_dict['reward'][:, self.T] = -self.risk_lambda * np.var( var_dict['pi'][:, self.T]) x_min, x_max = np.min(x), np.max(x) tau = np.linspace(x_min, x_max, self.num_basis) k = splinelab.aptknt(tau=tau, order=3) basis = bspline.Bspline(k, order=3) var_dict['func_x'] = np.zeros((self.n_mc, self.T + 1, self.num_basis)) for t in range(self.T + 1): xt = x[:, t] var_dict['func_x'][:, t, :] = np.array( [basis(element) for element in xt]) print('The shape of pi / action / q:', var_dict['pi'].shape) print('The shape of func_x:', var_dict['func_x'].shape) return var_dict
def gen_path(self): # Path Generator (Black Scholes ) seed(42) for i in range(1, self.num_steps + 1): std_norm = standard_normal(self.num_paths) exp_pow = (self.mu - self.vol ** 2 / 2) * self.dt \ + self.vol * np.sqrt(self.dt) * std_norm self.s_values[:, i] = self.s_values[:, i - 1] * np.exp(exp_pow) delta_S = (1 - self.tr_alpha) * self.s_values[:, 1:] - 1 / self.gamma * self.s_values[:, :self.num_steps] self.delta_S = delta_S self.delta_S_hat = np.apply_along_axis(lambda x: x - np.mean(x), axis=0, arr=delta_S) self.X = - (self.mu - 0.5 * self.vol ** 2) * np.arange(self.num_steps + 1) * self.dt + np.log(self.s_values) X_min = np.min(np.min(self.X)) X_max = np.max(np.max(self.X)) print("Shape of X : {} \n Max : {} \n Min : {}".format(self.X.shape, X_max, X_min)) self.pi[:, -1] = np.maximum(self.s_values[:, -1] - self.K, 0) self.pi_hat[:, -1] = self.pi[:, -1] - np.mean(self.pi[:, -1]) self.q[:, -1] = -self.pi[:, -1] - self.risk_lambda * np.var(self.pi[:, -1]) self.r[:, -1] = -self.risk_lambda * np.var(self.pi[:, -1]) p = 4 ncolloc = 12 tau = np.linspace(X_min, X_max, ncolloc) k = splinelab.aptknt(tau, p) basis = bspline.Bspline(k, p) num_basis = ncolloc self.data = np.zeros((self.num_steps + 1, self.num_paths, num_basis)) t0 = time.time() for ix in np.arange(self.num_steps + 1): x = self.X[:, ix] self.data[ix, :, :] = np.array([basis(el) for el in x]) t1 = time.time() print("\nTime for basis expansion {}".format(t1 - t0))
def evalkernel(ell,RS,cgam,gpts,kerzeta): # Define B-spline breaks for a B-spline of order ell bsbrks = np.linspace(-0.5*(2*RS+ell),0.5*(2*RS+ell),2*RS+ell+1) basis = bspline.Bspline(bsbrks,ell-1) # basis.plot() # bsbrks = np.zeros(ell+1) # for i in arange(ell+1): # bsbrks[i] = -0.5*ell+i fker = np.zeros((gpts)) g = np.zeros((gpts, 2*RS+1)) for n in arange(gpts): # summing over zetas g[n][:] = basis(kerzeta[n])*cgam[:] fker[n] = sum(g[n][jj] for jj in arange(2*RS+1)) return fker
bbox_to_anchor=(1, 1.016)) plt.tight_layout() if save_figs: plt.savefig('../images/ex_%02d_basis_funcs.png' % ex_number) plt.show() if __name__ == '__main__': # B-spline examples bsplines = examples_data.bspline() for i in range(len(bsplines)): p = bsplines[i].degree p_list = bsplines[i].points u_list = bsplines[i].knots u = np.linspace(u_list[0], u_list[-1], 100) spline = bspline.Bspline(p, p_list, u_list) points = spline.points(u) basis_funcs = bspline.get_basis_vector(u, u_list, p) plot_example(p, p_list, points, basis_funcs, u, i, save_figs=False) # NURBS examples nurbs = examples_data.nurbs() for i in range(len(nurbs)): p = nurbs[i].degree p_list = nurbs[i].points u_list = nurbs[i].knots w_list = nurbs[i].weights u = np.linspace(u_list[0], u_list[-1], 100) spline = bspline.Nurbs(p, p_list, u_list, w_list) points = spline.points(u) basis_funcs = bspline.get_basis_vector(u, u_list, p)
payoff = np.int32(ST >= K) else: payoff = np.int32(ST <= K) return payoff # ### Spline basis functions definition X_min = np.min(np.min(X)) X_max = np.max(np.max(X)) print('X.shape = ', X.shape) print('X_min, X_max = ', X_min, X_max) p = 4 # 3 <- cubic, 4 <- B-spline ncolloc = 12 tau = np.linspace(X_min, X_max, ncolloc) k = splinelab.aptknt(tau, p) basis = bspline.Bspline(k, p) f = plt.figure() print('Number of points k = ', len(k)) basis.plot() # ### Make data matrices with feature values # "Features" here are the values of basis functions at data points # The outputs are 3D arrays of dimensions num_tSteps x num_MC x num_basis num_t_steps = T + 1 num_basis = ncolloc data_mat_t = np.zeros((num_t_steps, N_MC, num_basis)) print('num_basis = ', num_basis) print('dim data_mat_t = ', data_mat_t.shape) # fill it, expand function in finite dimensional space # in neural network the basis is the neural network itself
nk_list = [(200, 5), (20, 50)] # delta_list = [0.0, 0.10, 0.15, 0.20, 0.25, 0.30] # effect size typ three alpha = 0.05 e = 0.5 # error variance ms = [1, 2, 3] # alpha spending functions lm = len(ms) lb, ub = -2, 2 order = 3 # order of spline (as-is; 3 = cubic) nknots = 4 # number of knots to generate knots = np.linspace(lb, ub, nknots) # create a knot vector without endpoint repeats knots = splinelab.augknt( knots, order) # add endpoint repeats as appropriate for spline order bases = bspline.Bspline(knots, order) # create spline basis of order p on knots k nfeatures = 3 p = nfeatures * (order + nknots - 1) + 1 rho = 0.5 cov_mat = (1 - rho) * np.eye(nfeatures) + rho * np.ones((nfeatures, nfeatures)) L = np.linalg.cholesky(cov_mat).T for (typ, (n, k), delta) in product(typ_list, nk_list, delta_list): name = 'result/HTE/' + 'typ' + str(typ) + 'n' + str(n) + 'k' + str( k) + 'delta' + str(delta) + '_nonlinear_adaptive_HTE.npz' # if os.path.exists(name): # continue
def main(): ###################### # Config ###################### # Choose least-squares solver to use: # # lsq_solver = "dense" # LAPACK DGELSD, direct, good for small problems # lsq_solver = "sparse" # SciPy LSQR, iterative, asymptotically faster, good for large problems # lsq_solver = "optimize" # general nonlinear optimizer using Trust Region Reflective (trf) algorithm # lsq_solver = "qr" # lsq_solver = "cholesky" # lsq_solver = "sparse_qr" lsq_solver = "sparse_qr_solve" ###################### # Load multiscale data ###################### print("Loading measurement data...") # measurements are provided on a meshgrid over (Hx, sigxx) # data2.mat contains virtual measurements, generated from a multiscale model. # data2 = scipy.io.loadmat("data2.mat") # Hx = np.squeeze(data2["Hx"]) # 1D array, (M,) # sigxx = np.squeeze(data2["sigxx"]) # 1D array, (N,) # Bx = data2["Bx"] # 2D array, (M, N) # lamxx = data2["lamxx"] # --"-- ## lamyy = data2["lamyy"] # --"-- ## lamzz = data2["lamzz"] # --"-- data2 = scipy.io.loadmat("umair_gal_denoised.mat") sigxx = -1e6 * np.array([ 0, 1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80 ][::-1], dtype=np.float64) assert sigxx.shape[0] == 18 Hx = data2["Hval"][0, :] # same for all sigma, just take the first row Bx = data2["Bval"].T lamxx = data2["LHval"].T * 1e-6 Bx = Bx[::-1, :] lamxx = lamxx[::-1, :] # HACK, fix later (must decouple number of knots from number of data sites) ii = np.arange(Hx.shape[0]) n_newi = 401 newii = np.linspace(0, ii[-1], n_newi) nsigma = sigxx.shape[0] fH = scipy.interpolate.interp1d(ii, Hx) newB = np.empty((n_newi, Bx.shape[1]), dtype=np.float64) newlam = np.empty((n_newi, lamxx.shape[1]), dtype=np.float64) for j in range(nsigma): fB = scipy.interpolate.interp1d(ii, Bx[:, j]) newB[:, j] = fB(newii) flam = scipy.interpolate.interp1d(ii, lamxx[:, j]) newlam[:, j] = flam(newii) Hx = fH(newii) Bx = newB lamxx = newlam # Order of spline (as-is! 3 = cubic) ordr = 3 # Auxiliary variables (H, sig_xx, sig_xy) Hscale = np.max(Hx) sscale = np.max(np.abs(sigxx)) x = Hx / Hscale y = sigxx / sscale nx = x.shape[0] # number of grid points, x axis ny = y.shape[0] # number of grid points, y axis # Partial derivatives (B, lam_xx, lam_xy) from multiscale model # # In the magnetostriction components, the multiscale model produces nonzero lamxx at zero stress. # We normalize this away for purposes of performing the curve fit. # dpsi_dx = Bx * Hscale dpsi_dy = (lamxx - lamxx[0, :]) * sscale ###################### # Set up splines ###################### print("Setting up splines...") # The evaluation algorithm used in bspline.py uses half-open intervals t_i <= x < t_{i+1}. # # This causes havoc for evaluation at the end of each interval, because it is actually the start # of the next interval. # # Especially, the end of the last interval is the start of the next (non-existent) interval. # # We work around this by using a small epsilon to avoid evaluation exactly at t_{i+1} (for the last interval). # def marginize_end(x): out = x.copy() out[-1] += 1e-10 * (x[-1] - x[0]) return out # create knots and spline basis xknots = splinelab.aptknt(marginize_end(x), ordr) yknots = splinelab.aptknt(marginize_end(y), ordr) splx = bspline.Bspline(xknots, ordr) sply = bspline.Bspline(yknots, ordr) # get number of basis functions (perform dummy evaluation and count) nxb = len(splx(0.)) nyb = len(sply(0.)) # TODO Check if we need to convert input Bx and sigxx to u,v (what is actually stored in the data files?) # Create collocation matrices: # # A[i,j] = d**deriv_order B_j(tau[i]) # # where d denotes differentiation and B_j is the jth basis function. # # We place the collocation sites at the points where we have measurements. # Au = splx.collmat(x) Av = sply.collmat(y) Du = splx.collmat(x, deriv_order=1) Dv = sply.collmat(y, deriv_order=1) ###################### # Assemble system ###################### print("Assembling system...") # Assemble the equation system for fitting against data on the partial derivatives of psi. # # By writing psi in the spline basis, # # psi_{ij} = A^{u}_{ik} A^{v}_{jl} c_{kl} # # the quantities to be fitted, which are the partial derivatives of psi, become # # B_{ij} = D^{u}_{ik} A^{v}_{jl} c_{kl} # lambda_{xx,ij} = A^{u}_{ik} D^{v}_{jl} c_{kl} # # Repeated indices are summed over. # # Column: kl converted to linear index (k = 0,1,...,nxb-1, l = 0,1,...,nyb-1) # Row: ij converted to linear index (i = 0,1,...,nx-1, j = 0,1,...,ny-1) # # (Paavo's notes, Stresses4.pdf) nf = 2 # number of unknown fields nr = nx * ny # equation system rows per unknown field A = np.empty((nf * nr, nxb * nyb), dtype=np.float64) # global matrix b = np.empty((nf * nr), dtype=np.float64) # global RHS # zero array element detection tolerance tol = 1e-6 I, J, IJ = util.index.genidx((nx, ny)) K, L, KL = util.index.genidx((nxb, nyb)) # loop only over rows of the equation system for i, j, ij in zip(I, J, IJ): A[nf * ij, KL] = Du[i, K] * Av[j, L] A[nf * ij + 1, KL] = Au[i, K] * Dv[j, L] b[nf * IJ] = dpsi_dx[I, J] # RHS for B_x b[nf * IJ + 1] = dpsi_dy[I, J] # RHS for lambda_xx # # the above is equivalent to this much slower version: # # # # equation system row # for j in range(ny): # for i in range(nx): # ij = np.ravel_multi_index( (i,j), (nx,ny) ) # # # equation system column # for l in range(nyb): # for k in range(nxb): # kl = np.ravel_multi_index( (k,l), (nxb,nyb) ) # A[nf*ij, kl] = Du[i,k] * Av[j,l] # A[nf*ij+1,kl] = Au[i,k] * Dv[j,l] # # b[nf*ij] = dpsi_dx[i,j] if abs(dpsi_dx[i,j]) > tol else 0. # RHS for B_x # b[nf*ij+1] = dpsi_dy[i,j] if abs(dpsi_dy[i,j]) > tol else 0. # RHS for lambda_xx ###################### # Solve ###################### # Solve the optimal coefficients. # Note that we are constructing a potential function from partial derivatives only, # so the solution is unique only up to a global additive shift term. # # Under the hood, numpy.linalg.lstsq uses LAPACK DGELSD: # # http://stackoverflow.com/questions/29372559/what-is-the-difference-between-numpy-linalg-lstsq-and-scipy-linalg-lstsq # # DGELSD accepts also rank-deficient input (rank(A) < min(nrows,ncols)), returning arg min( ||x||_2 ) , # so we don't need to do anything special to account for this. # # Same goes for the sparse LSQR. # equilibrate row and column norms # # See documentation of scipy.sparse.linalg.lsqr, it requires this to work properly. # # https://github.com/Technologicat/python-wlsqm # print("Equilibrating...") S = A.copy(order='F') # the rescaler requires Fortran memory layout A = scipy.sparse.csr_matrix(A) # save memory (dense "A" no longer needed) # eps = 7./3. - 4./3. - 1 # http://stackoverflow.com/questions/19141432/python-numpy-machine-epsilon # print( S.max() * max(S.shape) * eps ) # default zero singular value detection tolerance in np.linalg.matrix_rank() # import wlsqm.utils.lapackdrivers as wul # rs,cs = wul.do_rescale( S, wul.ScalingAlgo.ALGO_DGEEQU ) # # row scaling only (for weighting) # with np.errstate(divide='ignore', invalid='ignore'): # rs = np.where( np.abs(b) > tol, 1./b, 1. ) # for i in range(S.shape[0]): # S[i,:] *= rs[i] # cs = 1. # scale rows corresponding to Bx # rs = np.ones_like(b) rs[nf * IJ] = 2 for i in range(S.shape[0]): S[i, :] *= rs[i] cs = 1. # # It seems this is not needed in the 2D problem (fitting error is slightly smaller without it). # # # Additional row scaling. # # # # This equilibrates equation weights, but deteriorates the condition number of the matrix. # # # # Note that in a least-squares problem the row weighting *does* matter, because it affects # # the fitting error contribution from the rows. # # # with np.errstate(divide='ignore', invalid='ignore'): # rs2 = np.where( np.abs(b) > tol, 1./b, 1. ) # for i in range(S.shape[0]): # S[i,:] *= rs2[i] # rs *= rs2 # a = np.abs(rs2) # print( np.min(a), np.mean(a), np.max(a) ) # rs = np.asanyarray(rs) # cs = np.asanyarray(cs) # a = np.abs(rs) # print( np.min(a), np.mean(a), np.max(a) ) b *= rs # scale RHS accordingly # colnorms = np.linalg.norm(S, ord=np.inf, axis=0) # sum over rows -> column norms # rownorms = np.linalg.norm(S, ord=np.inf, axis=1) # sum over columns -> row norms # print( " rescaled column norms min = %g, avg = %g, max = %g" % (np.min(colnorms), np.mean(colnorms), np.max(colnorms)) ) # print( " rescaled row norms min = %g, avg = %g, max = %g" % (np.min(rownorms), np.mean(rownorms), np.max(rownorms)) ) print("Solving with algorithm = '%s'..." % (lsq_solver)) if lsq_solver == "dense": print(" matrix shape %s = %d elements" % (S.shape, np.prod(S.shape))) ret = numpy.linalg.lstsq(S, b) # c,residuals,rank,singvals c = ret[0] elif lsq_solver == "sparse": S = scipy.sparse.coo_matrix(S) print(" matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod( S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape))) ret = scipy.sparse.linalg.lsqr(S, b) c, exit_reason, iters = ret[:3] if exit_reason != 2: # 2 = least-squares solution found print("WARNING: solver did not converge (exit_reason = %d)" % (exit_reason)) print(" sparse solver iterations taken: %d" % (iters)) elif lsq_solver == "optimize": # make sparse matrix (faster for dot products) S = scipy.sparse.coo_matrix(S) print(" matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod( S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape))) def fitting_error(c): return S.dot(c) - b ret = scipy.optimize.least_squares(fitting_error, np.ones(S.shape[1], dtype=np.float64), method="trf", loss="linear") c = ret.x if ret.status < 1: # status codes: https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.optimize.least_squares.html print("WARNING: solver did not converge (status = %d)" % (ret.status)) elif lsq_solver == "qr": print(" matrix shape %s = %d elements" % (S.shape, np.prod(S.shape))) # http://glowingpython.blogspot.fi/2012/03/solving-overdetermined-systems-with-qr.html Q, R = np.linalg.qr(S) # qr decomposition of A Qb = (Q.T).dot(b) # computing Q^T*b (project b onto the range of A) # c = np.linalg.solve(R,Qb) # solving R*x = Q^T*b c = scipy.linalg.solve_triangular(R, Qb, check_finite=False) elif lsq_solver == "cholesky": # S is rank-deficient by one, because we are solving a potential based on data on its partial derivatives. # # Before solving, force S to have full rank by fixing one coefficient. # S[0, :] = 0. S[0, 0] = 1. b[0] = 1. rs[0] = 1. S = scipy.sparse.csr_matrix(S) print(" matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod( S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape))) # Be sure to use the new sksparse from # # https://github.com/scikit-sparse/scikit-sparse # # instead of the old scikits.sparse (which will fail with an error). # # Requires libsuitesparse-dev for CHOLMOD headers. # from sksparse.cholmod import cholesky_AAt # Notice that CHOLMOD computes AA' and we want M'M, so we must set A = M'! factor = cholesky_AAt(S.T) c = factor.solve_A(S.T * b) elif lsq_solver == "sparse_qr": # S is rank-deficient by one, because we are solving a potential based on data on its partial derivatives. # # Before solving, force S to have full rank by fixing one coefficient; # otherwise the linear solve step will fail because R will be exactly singular. # S[0, :] = 0. S[0, 0] = 1. b[0] = 1. rs[0] = 1. S = scipy.sparse.coo_matrix(S) print(" matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod( S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape))) # pip install sparseqr # or https://github.com/yig/PySPQR # # Works like MATLAB's [Q,R,e] = qr(...): # # https://se.mathworks.com/help/matlab/ref/qr.html # # [Q,R,E] = qr(A) or [Q,R,E] = qr(A,'matrix') produces unitary Q, upper triangular R and a permutation matrix E # so that A*E = Q*R. The column permutation E is chosen to reduce fill-in in R. # # [Q,R,e] = qr(A,'vector') returns the permutation information as a vector instead of a matrix. # That is, e is a row vector such that A(:,e) = Q*R. # import sparseqr print(" performing sparse QR decomposition...") Q, R, E, rank = sparseqr.qr(S) # produce reduced QR (for least-squares fitting) # # - cut away bottom part of R (zeros!) # - cut away the corresponding far-right part of Q # # see # np.linalg.qr # https://andreask.cs.illinois.edu/cs357-s15/public/demos/06-qr-applications/Solving%20Least-Squares%20Problems.html # # # inefficient way: # k = min(S.shape) # R = scipy.sparse.csr_matrix( R.A[:k,:] ) # Q = scipy.sparse.csr_matrix( Q.A[:,:k] ) print(" reducing matrices...") # somewhat more efficient way: k = min(S.shape) R = R.tocsr()[:k, :] Q = Q.tocsc()[:, :k] # # maybe somewhat efficient way: manipulate data vectors, create new coo matrix # # # # (incomplete, needs work; need to shift indices of rows/cols after the removed ones) # # # k = min(S.shape) # mask = np.nonzero( R.row < k )[0] # R = scipy.sparse.coo_matrix( ( R.data[mask], (R.row[mask], R.col[mask]) ), shape=(k,k) ) # mask = np.nonzero( Q.col < k )[0] # Q = scipy.sparse.coo_matrix( ( Q.data[mask], (Q.row[mask], Q.col[mask]) ), shape=(k,k) ) print(" solving...") Qb = (Q.T).dot(b) x = scipy.sparse.linalg.spsolve(R, Qb) c = np.empty_like(x) c[E] = x[:] # apply inverse permutation elif lsq_solver == "sparse_qr_solve": S[0, :] = 0. S[0, 0] = 1. b[0] = 1. rs[0] = 1. S = scipy.sparse.coo_matrix(S) print(" matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod( S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape))) import sparseqr c = sparseqr.solve(S, b) else: raise ValueError("unknown solver '%s'; valid: 'dense', 'sparse'" % (lsq_solver)) c *= cs # undo column scaling in solution # now c contains the spline coefficients, c_{kl}, where kl has been raveled into a linear index. ###################### # Save ###################### filename = "tmp_s2d.mat" L = locals() data = { key: L[key] for key in ["ordr", "xknots", "yknots", "c", "Hscale", "sscale"] } scipy.io.savemat(filename, data, format='5', oned_as='row') ###################### # Plot ###################### print("Visualizing...") # unpack results onto meshgrid # fitted = A.dot( c ) # function values corresponding to each row in the global equation system X, Y = np.meshgrid( Hx, sigxx, indexing='ij') # indexed like X[i,j] (i is x index, j is y index) Z_Bx = np.empty_like(X) Z_lamxx = np.empty_like(X) Z_Bx[I, J] = fitted[nf * IJ] Z_lamxx[I, J] = fitted[nf * IJ + 1] # # the above is equivalent to: # for ij in range(nr): # i,j = np.unravel_index( ij, (nx,ny) ) # Z_Bx[i,j] = fitted[nf*ij] # Z_lamxx[i,j] = fitted[nf*ij+1] data_Bx = { "x": (X, r"$H_{x}$"), "y": (Y, r"$\sigma_{xx}$"), "z": (Z_Bx / Hscale, r"$B_{x}$") } data_lamxx = { "x": (X, r"$H_{x}$"), "y": (Y, r"$\sigma_{xx}$"), "z": (Z_lamxx / sscale, r"$\lambda_{xx}$") } def relerr(data, refdata): refdata_linview = refdata.reshape(-1) return 100. * np.linalg.norm(refdata_linview - data.reshape(-1) ) / np.linalg.norm(refdata_linview) plt.figure(1) plt.clf() ax = util.plot.plot_wireframe(data_Bx, legend_label="Spline", figno=1) ax.plot_wireframe(X, Y, dpsi_dx / Hscale, label="Multiscale", color="r") plt.legend(loc="best") print("B_x relative error %g%%" % (relerr(Z_Bx, dpsi_dx))) plt.figure(2) plt.clf() ax = util.plot.plot_wireframe(data_lamxx, legend_label="Spline", figno=2) ax.plot_wireframe(X, Y, dpsi_dy / sscale, label="Multiscale", color="r") plt.legend(loc="best") print("lambda_{xx} relative error %g%%" % (relerr(Z_lamxx, dpsi_dy))) # match the grid point numbering used in MATLAB version of this script # def t(A): return np.transpose(A, [1, 0]) dpsi_dx = t(dpsi_dx) Z_Bx = t(Z_Bx) dpsi_dy = t(dpsi_dy) Z_lamxx = t(Z_lamxx) plt.figure(3) plt.clf() ax = plt.subplot(1, 1, 1) ax.plot(dpsi_dx.reshape(-1) / Hscale, 'ro', markersize='2', label="Multiscale") ax.plot(Z_Bx.reshape(-1) / Hscale, 'ko', markersize='2', label="Spline") ax.set_xlabel("Grid point number") ax.set_ylabel(r"$B_{x}$") plt.legend(loc="best") plt.figure(4) plt.clf() ax = plt.subplot(1, 1, 1) ax.plot(dpsi_dy.reshape(-1) / sscale, 'ro', markersize='2', label="Multiscale") ax.plot(Z_lamxx.reshape(-1) / sscale, 'ko', markersize='2', label="Spline") ax.set_xlabel("Grid point number") ax.set_ylabel(r"$\lambda_{xx}$") plt.legend(loc="best") print("All done.")
def main(): # Original knots # x1 = 0 x2 = 1 x_orig = np.linspace(x1, x2, 21) order = 3 # We stretch the domain of the splines just a bit to avoid evaluating them # exactly at x2, as the splines actually have support on the half-open # interval [x1,x2). # def marginize_end(x): out = x.copy() out[-1] += 1e-10 * (x[-1] - x[0]) return out knots = splinelab.aptknt(marginize_end(x_orig), order) spl = bspline.Bspline(knots, order) nb = len( spl(0.) ) # get number of basis functions (perform dummy evaluation and count) # Sites used for creating the Elmer spline. # # We use different sites for different functions to represent them well # with a small number of points. # nx = 41 xx = np.empty((nb, nx), dtype=np.float64) ff = np.empty((nb, nx), dtype=np.float64) # The first and last basis functions have no internal maximum. # xx[0, :] = quadspace(x1, x2, nx, refine='left') xx[-1, :] = quadspace(x1, x2, nx, refine='right') # For the other functions, create a spacing that has one point # exactly at the peak. # import scipy.optimize def make_f(j): scalar_f = spl.diff(order=1) # lambda x: ... scalar_fj = lambda x: scalar_f(x)[j] vector_fj = np.vectorize( scalar_fj) # horrible performance, this is just a Python loop return vector_fj for j in range(1, nb - 1): # The maximum of the jth basis function is known to be near knot j, # so we search from knot j-1 to knot j+1. # # The fitting is very sensitive to the placement of this point, # so attempt to solve down to the last ulp. # fj = make_f(j) x0 = scipy.optimize.bisect(fj, x_orig[j - 1], x_orig[j + 1], xtol=ulp(x_orig[j])) # Use a quadratic spacing with more points near the peak. # xx_left = quadspace(x1, x0, nx // 2 + 1, refine='right') xx_right = quadspace(x0, x2, nx // 2 + 1, refine='left') tmp = xx_left.tolist() tmp.extend(xx_right[1:]) # discard the duplicate point at the peak xx[j, :] = tmp # Evaluate each basis function at each of the sites chosen for it. # for j in range(nb): for i, x in enumerate(xx[j, :]): ff[j, i] = spl(x)[j] # Sites used for visualization of results (same for all functions). # nvis = 10001 xxvis = np.linspace(x1, x2, nvis) # Evaluate at visualization sites (for debug only) # ffvis = np.empty((nb, nvis), dtype=np.float64) for i, x in enumerate(xxvis): ffvis[:, i] = spl(x) # Create the fits and plot. # create a list of unique colors for plotting # # http://stackoverflow.com/questions/8389636/creating-over-20-unique-legend-colors-using-matplotlib # NUM_COLORS = nb cm = plt.get_cmap('gist_rainbow') cNorm = matplotlib.colors.Normalize(vmin=0, vmax=NUM_COLORS - 1) scalarMap = matplotlib.cm.ScalarMappable(norm=cNorm, cmap=cm) colors = [scalarMap.to_rgba(i) for i in range(NUM_COLORS)] plt.figure(1) plt.clf() for j in range(nb): # Create Elmer-format cubic spline approximation using the data at the sites. rr = elmerspline.solve_coeffs(xx[j, :], ff[j, :]) # TODO: tabulate and save rr for use with Elmer # Plot the original basis function. plt.plot(xxvis, ffvis[j, :], linestyle='dashed', color=colors[j]) # Plot the approximation. plt.plot(xxvis, elmerspline.evaluate_cubic_spline(xx[j, :], ff[j, :], rr, xxvis), linestyle='solid', color=colors[j])
def fit_1d_spline(x, y, knots=None, nvis=10001): spline_order = 3 minx = np.min(x) maxx = np.max(x) # Preliminary placement of knots. # # Bump the last site slightly. The spline is nonzero only on the *half-open* interval [x1, x2), # so the value of the spline interpolant exactly at the end of the span is always 0. # # kk = np.linspace(minx, maxx + 1e-8*(maxx-minx), 21) # better to adjust number and spacing of knots (maybe quadratic toward ends?) if knots is not None: kk = knots else: # if no custom knot vector, make one now (emphasize ends -- good for BH curves) kk = np.linspace(0,1, 81) kk = kk**2 kk = mirspace(kk) kk = minx + (1. + 1e-8)*(maxx - minx)*kk kk = splinelab.aptknt(kk, order=spline_order) spl = bspline.Bspline(order=spline_order, knot_vector=kk) nx = x.shape[0] Au = spl.collmat(x) # construct the overdetermined linear system for determining the optimal spline coefficients # nf = 1 # number of unknown fields nr = nx # equation system rows per unknown field nxb = len( spl(0.) ) # get number of basis functions (perform dummy evaluation and count) A = np.empty( (nf*nr, nxb), dtype=np.float64 ) # global matrix b = np.empty( (nf*nr), dtype=np.float64 ) # global RHS # loop only over rows of the equation system for i in range(nf*nr): A[nf*i,:] = Au[i,:] b[:] = y # solve the overdetermined linear system (in the least-squares sense) # # dense solver (LAPACK DGELSD) # ret = np.linalg.lstsq(A, b) # c,residuals,rank,singvals # c = ret[0] # sparse solver (SciPy LSQR) S = scipy.sparse.coo_matrix(A) print( " matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod(S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape) ) ) ret = scipy.sparse.linalg.lsqr( S, b ) # c,exit_reason,iters = ret[:3] c,exit_reason = ret[:2] if exit_reason != 2: # 2 = least-squares solution found print( "WARNING: solver did not converge (exit_reason = %d)" % (exit_reason) ) # evaluate the computed optimal b-spline # xx_spline = np.linspace(minx, maxx, nvis) Avis = spl.collmat(xx_spline) yy_spline = np.sum( Avis*c, axis=-1 ) return (xx_spline, yy_spline)
def QLBS_EPUT(S0, mu, sigma, r, M, T, risk_lambda, N_MC, delta_t, gamma, K, rand_seed): ############################################################################### ############################################################################### # make a dataset np.random.seed(rand_seed) # Fix random seed # stock price S = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) S.loc[:, 0] = S0 # standard normal random numbers RN = pd.DataFrame(np.random.randn(N_MC, T), index=range(1, N_MC + 1), columns=range(1, T + 1)) for t in range(1, T + 1): S.loc[:, t] = S.loc[:, t - 1] * np.exp((mu - 1 / 2 * sigma**2) * delta_t + sigma * np.sqrt(delta_t) * RN.loc[:, t]) delta_S = S.loc[:, 1:T].values - np.exp(r * delta_t) * S.loc[:, 0:T - 1] delta_S_hat = delta_S.apply(lambda x: x - np.mean(x), axis=0) # state variable X = -(mu - 1 / 2 * sigma**2) * np.arange(T + 1) * delta_t + np.log( S) # delta_t here is due to their conventions # plot 10 paths step_size = N_MC // 10 idx_plot = np.arange(step_size, N_MC, step_size) plt.plot(S.T.iloc[:, idx_plot]) plt.xlabel('Time Steps') plt.title('Stock Price Sample Paths') plt.ylabel('State Variable') plt.show() plt.plot(X.T.iloc[:, idx_plot]) plt.xlabel('Time Steps') plt.ylabel('State Variable') plt.title('State Variable Sample Paths') plt.show() ############################################################################### ############################################################################### # Define function *terminal_payoff* to compute the terminal payoff of a European put option. def terminal_payoff(ST, K): # ST final stock price # K strike payoff = max(K - ST, 0) return payoff ############################################################################### ############################################################################### # Define spline basis functions import bspline import bspline.splinelab as splinelab X_min = np.min(np.min(X)) X_max = np.max(np.max(X)) p = 4 # order of spline (as-is; 3 = cubic, 4: B-spline?) ncolloc = 12 tau = np.linspace( X_min, X_max, ncolloc) # These are the sites to which we would like to interpolate # k is a knot vector that adds endpoints repeats as appropriate for a spline of order p # To get meaninful results, one should have ncolloc >= p+1 k = splinelab.aptknt(tau, p) # Spline basis of order p on knots k basis = bspline.Bspline(k, p) f = plt.figure() # Spline basis functions plt.title("Basis Functions to be Used For This Iteration") basis.plot() plt.savefig('Basis_functions.png', dpi=600) ############################################################################### ############################################################################### # ### Make data matrices with feature values # # "Features" here are the values of basis functions at data points # The outputs are 3D arrays of dimensions num_tSteps x num_MC x num_basis num_t_steps = T + 1 num_basis = ncolloc # len(k) # data_mat_t = np.zeros((num_t_steps, N_MC, num_basis)) # fill it, expand function in finite dimensional space # in neural network the basis is the neural network itself t_0 = time.time() for i in np.arange(num_t_steps): x = X.values[:, i] data_mat_t[i, :, :] = np.array([basis(el) for el in x]) t_end = time.time() # save these data matrices for future re-use np.save('data_mat_m=r_A_%d' % N_MC, data_mat_t) ############################################################################### ############################################################################### # ## Dynamic Programming solution for QLBS risk_lambda = 0.001 # 0.001 # 0.0001 # risk aversion K = 100 # # functions to compute optimal hedges def function_A_vec(t, delta_S_hat, data_mat, reg_param): """ function_A_vec - compute the matrix A_{nm} from Eq. (52) (with a regularization!) Eq. (52) in QLBS Q-Learner in the Black-Scholes-Merton article Arguments: t - time index, a scalar, an index into time axis of data_mat delta_S_hat - pandas.DataFrame of dimension N_MC x T data_mat - pandas.DataFrame of dimension T x N_MC x num_basis reg_param - a scalar, regularization parameter Return: - np.array, i.e. matrix A_{nm} of dimension num_basis x num_basis """ ### START CODE HERE ### (≈ 5-6 lines of code) # A_mat = your code goes here ... X_mat = data_mat[t, :, :] num_basis_funcs = X_mat.shape[1] this_dS = delta_S_hat.loc[:, t] hat_dS2 = (this_dS**2).values.reshape(-1, 1) A_mat = np.dot(X_mat.T, X_mat * hat_dS2) + reg_param * np.eye(num_basis_funcs) ### END CODE HERE ### return A_mat def function_B_vec(t, Pi_hat, delta_S_hat=delta_S_hat, S=S, data_mat=data_mat_t, gamma=gamma, risk_lambda=risk_lambda): """ function_B_vec - compute vector B_{n} from Eq. (52) QLBS Q-Learner in the Black-Scholes-Merton article Arguments: t - time index, a scalar, an index into time axis of delta_S_hat Pi_hat - pandas.DataFrame of dimension N_MC x T of portfolio values delta_S_hat - pandas.DataFrame of dimension N_MC x T S - pandas.DataFrame of simulated stock prices data_mat - pandas.DataFrame of dimension T x N_MC x num_basis gamma - one time-step discount factor $exp(-r \delta t)$ risk_lambda - risk aversion coefficient, a small positive number Return: B_vec - np.array() of dimension num_basis x 1 """ #coef = 1.0/(2 * gamma * risk_lambda) # override it by zero to have pure risk hedge coef = 0. # keep it tmp = Pi_hat.loc[:, t + 1] * delta_S_hat.loc[:, t] X_mat = data_mat[t, :, :] # matrix of dimension N_MC x num_basis B_vec = np.dot(X_mat.T, tmp) return B_vec ############################################################################### ############################################################################### # ## Compute optimal hedge and portfolio value starttime = time.time() # portfolio value Pi = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Pi.iloc[:, -1] = S.iloc[:, -1].apply(lambda x: terminal_payoff(x, K)) Pi_hat = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Pi_hat.iloc[:, -1] = Pi.iloc[:, -1] - np.mean(Pi.iloc[:, -1]) # optimal hedge a = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) a.iloc[:, -1] = 0 reg_param = 1e-3 for t in range(T - 1, -1, -1): A_mat = function_A_vec(t, delta_S_hat, data_mat_t, reg_param) B_vec = function_B_vec(t, Pi_hat, delta_S_hat, S, data_mat_t) # print ('t = A_mat.shape = B_vec.shape = ', t, A_mat.shape, B_vec.shape) phi = np.dot(np.linalg.inv(A_mat), B_vec) a.loc[:, t] = np.dot(data_mat_t[t, :, :], phi) Pi.loc[:, t] = gamma * (Pi.loc[:, t + 1] - a.loc[:, t] * delta_S.loc[:, t]) Pi_hat.loc[:, t] = Pi.loc[:, t] - np.mean(Pi.loc[:, t]) a = a.astype('float') Pi = Pi.astype('float') Pi_hat = Pi_hat.astype('float') endtime = time.time() # Plots of 10 optimal hedge $a_t^\star$ and portfolio value $\Pi_t$ paths are shown below. # plot 10 paths plt.plot(a.T.iloc[:, idx_plot]) plt.xlabel('Time Steps') plt.title('Optimal Hedge') plt.show() plt.plot(Pi.T.iloc[:, idx_plot]) plt.xlabel('Time Steps') plt.title('Portfolio Value') plt.show() ############################################################################### ############################################################################### # ## Part 2: Compute the optimal Q-function with the DP approach def function_C_vec(t, data_mat, reg_param): """ function_C_vec - calculate C_{nm} matrix (with a regularization!) Arguments: t - time index, a scalar, an index into time axis of data_mat data_mat - pandas.DataFrame of values of basis functions of dimension T x N_MC x num_basis reg_param - regularization parameter, a scalar Return: C_mat - np.array of dimension num_basis x num_basis """ ### START CODE HERE ### (≈ 5-6 lines of code) # C_mat = your code goes here .... X_mat = data_mat[t, :, :] num_basis_funcs = X_mat.shape[1] C_mat = np.dot(X_mat.T, X_mat) + reg_param * np.eye(num_basis_funcs) ### END CODE HERE ### return C_mat def function_D_vec(t, Q, R, data_mat, gamma=gamma): """ function_D_vec - calculate D_{nm} vector (with a regularization!) Arguments: t - time index, a scalar, an index into time axis of data_mat Q - pandas.DataFrame of Q-function values of dimension N_MC x T R - pandas.DataFrame of rewards of dimension N_MC x T data_mat - pandas.DataFrame of values of basis functions of dimension T x N_MC x num_basis gamma - one time-step discount factor $exp(-r \delta t)$ Return: D_vec - np.array of dimension num_basis x 1 """ ### START CODE HERE ### (≈ 2-3 lines of code) # D_vec = your code goes here ... X_mat = data_mat[t, :, :] D_vec = np.dot(X_mat.T, R.loc[:, t] + gamma * Q.loc[:, t + 1]) ### END CODE HERE ### return D_vec ############################################################################### ############################################################################### # Implement a batch-mode off-policy model-free Q-Learning by Fitted Q-Iteration. # The only data available is given by a set of $N_{MC}$ paths for the underlying state # variable $X_t$, hedge position $a_t$, instantaneous reward $R_t$ and the # next-time value $X_{t+1}$. starttime = time.time() eta = 0.5 # 0.5 # 0.25 # 0.05 # 0.5 # 0.1 # 0.25 # 0.15 reg_param = 1e-3 np.random.seed(42) # Fix random seed # disturbed optimal actions to be computed a_op = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) a_op.iloc[:, -1] = 0 # also make portfolios and rewards # portfolio value Pi_op = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Pi_op.iloc[:, -1] = S.iloc[:, -1].apply(lambda x: terminal_payoff(x, K)) Pi_op_hat = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Pi_op_hat.iloc[:, -1] = Pi_op.iloc[:, -1] - np.mean(Pi_op.iloc[:, -1]) # reward function R_op = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) R_op.iloc[:, -1] = -risk_lambda * np.var(Pi_op.iloc[:, -1]) # The backward loop for t in range(T - 1, -1, -1): # 1. Compute the optimal policy, and write the result to a_op a_op.loc[:, t] = a.loc[:, t] # 2. Now disturb these values by a random noise a_op.loc[:, t] *= np.random.uniform(1 - eta, 1 + eta, size=a_op.shape[0]) # 3. Compute portfolio values corresponding to observed actions Pi_op.loc[:, t] = gamma * (Pi_op.loc[:, t + 1] - a_op.loc[:, t] * delta_S.loc[:, t]) Pi_hat.loc[:, t] = Pi_op.loc[:, t] - np.mean(Pi_op.loc[:, t]) # 4. Compute rewards corrresponding to observed actions R_op.loc[:, t] = gamma * a_op.loc[:, t] * delta_S.loc[:, t] - risk_lambda * np.var( Pi_op.loc[:, t]) # Plot 10 reward functions plt.plot(R_op.iloc[idx_plot, :]) plt.xlabel('Time Steps') plt.title('Reward Function') plt.show() ############################################################################### ############################################################################### # Override on-policy data with off-policy data a = copy.deepcopy(a_op) # distrubed actions Pi = copy.deepcopy(Pi_op) # disturbed portfolio values Pi_hat = copy.deepcopy(Pi_hat) R = copy.deepcopy(R_op) # make matrix A_t of shape (3 x num_MC x num_steps) num_MC = a.shape[0] # number of simulated paths num_TS = a.shape[1] # number of time steps a_1_1 = a.values.reshape((1, num_MC, num_TS)) a_1_2 = 0.5 * a_1_1**2 ones_3d = np.ones((1, num_MC, num_TS)) A_stack = np.vstack((ones_3d, a_1_1, a_1_2)) data_mat_swap_idx = np.swapaxes(data_mat_t, 0, 2) # expand dimensions of matrices to multiply element-wise A_2 = np.expand_dims(A_stack, axis=1) # becomes (3,1,10000,25) data_mat_swap_idx = np.expand_dims(data_mat_swap_idx, axis=0) # becomes (1,12,10000,25) Psi_mat = np.multiply( A_2, data_mat_swap_idx ) # this is a matrix of size 3 x num_basis x num_MC x num_steps # now concatenate columns along the first dimension # Psi_mat = Psi_mat.reshape(-1, a.shape[0], a.shape[1], order='F') Psi_mat = Psi_mat.reshape(-1, N_MC, T + 1, order='F') ############################################################################### ############################################################################### # make matrix S_t Psi_1_aux = np.expand_dims(Psi_mat, axis=1) Psi_2_aux = np.expand_dims(Psi_mat, axis=0) S_t_mat = np.sum(np.multiply(Psi_1_aux, Psi_2_aux), axis=2) # clean up some space del Psi_1_aux, Psi_2_aux, data_mat_swap_idx, A_2 ############################################################################### ############################################################################### def function_S_vec(t, S_t_mat, reg_param): """ function_S_vec - calculate S_{nm} matrix from Eq. (75) (with a regularization!) Eq. (75) in QLBS Q-Learner in the Black-Scholes-Merton article num_Qbasis = 3 x num_basis, 3 because of the basis expansion (1, a_t, 0.5 a_t^2) Arguments: t - time index, a scalar, an index into time axis of S_t_mat S_t_mat - pandas.DataFrame of dimension num_Qbasis x num_Qbasis x T reg_param - regularization parameter, a scalar Return: S_mat_reg - num_Qbasis x num_Qbasis """ ### START CODE HERE ### (≈ 4-5 lines of code) # S_mat_reg = your code goes here ... num_Qbasis = S_t_mat.shape[0] S_mat_reg = S_t_mat[:, :, t] + reg_param * np.eye(num_Qbasis) ### END CODE HERE ### return S_mat_reg def function_M_vec(t, Q_star, R, Psi_mat_t, gamma=gamma): """ function_S_vec - calculate M_{nm} vector from Eq. (75) (with a regularization!) Eq. (75) in QLBS Q-Learner in the Black-Scholes-Merton article num_Qbasis = 3 x num_basis, 3 because of the basis expansion (1, a_t, 0.5 a_t^2) Arguments: t- time index, a scalar, an index into time axis of S_t_mat Q_star - pandas.DataFrame of Q-function values of dimension N_MC x T R - pandas.DataFrame of rewards of dimension N_MC x T Psi_mat_t - pandas.DataFrame of dimension num_Qbasis x N_MC gamma - one time-step discount factor $exp(-r \delta t)$ Return: M_t - np.array of dimension num_Qbasis x 1 """ ### START CODE HERE ### (≈ 2-3 lines of code) # M_t = your code goes here ... M_t = np.dot(Psi_mat_t, R.loc[:, t] + gamma * Q_star.loc[:, t + 1]) ### END CODE HERE ### return M_t ############################################################################### ############################################################################### # Call *function_S* and *function_M* for $t=T-1,...,0$ together with vector $\vec\Psi\left(X_t,a_t\right)$ to compute $\vec W_t$ and learn the Q-function $Q_t^\star\left(X_t,a_t\right)=\mathbf A_t^T\mathbf U_W\left(t,X_t\right)$ implied by the input data backward recursively with terminal condition $Q_T^\star\left(X_T,a_T=0\right)=-\Pi_T\left(X_T\right)-\lambda Var\left[\Pi_T\left(X_T\right)\right]$. # Plots of 5 optimal action $a_t^\star\left(X_t\right)$, optimal Q-function with optimal action $Q_t^\star\left(X_t,a_t^\star\right)$ and implied Q-function $Q_t^\star\left(X_t,a_t\right)$ paths are shown below. # ## Fitted Q Iteration (FQI) # implied Q-function by input data (using the first form in Eq.(68)) Q_RL = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Q_RL.iloc[:, -1] = -Pi.iloc[:, -1] - risk_lambda * np.var(Pi.iloc[:, -1]) # optimal action a_opt = np.zeros((N_MC, T + 1)) a_star = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) a_star.iloc[:, -1] = 0 # optimal Q-function with optimal action Q_star = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Q_star.iloc[:, -1] = Q_RL.iloc[:, -1] # max_Q_star_next = Q_star.iloc[:,-1].values max_Q_star = np.zeros((N_MC, T + 1)) max_Q_star[:, -1] = Q_RL.iloc[:, -1].values num_basis = data_mat_t.shape[2] reg_param = 1e-3 hyper_param = 1e-1 # The backward loop for t in range(T - 1, -1, -1): # calculate vector W_t S_mat_reg = function_S_vec(t, S_t_mat, reg_param) M_t = function_M_vec(t, Q_star, R, Psi_mat[:, :, t], gamma) W_t = np.dot(np.linalg.inv(S_mat_reg), M_t) # this is an 1D array of dimension 3M # reshape to a matrix W_mat W_mat = W_t.reshape((3, num_basis), order='F') # shape 3 x M # make matrix Phi_mat Phi_mat = data_mat_t[t, :, :].T # dimension M x N_MC # compute matrix U_mat of dimension N_MC x 3 U_mat = np.dot(W_mat, Phi_mat) # compute vectors U_W^0,U_W^1,U_W^2 as rows of matrix U_mat U_W_0 = U_mat[0, :] U_W_1 = U_mat[1, :] U_W_2 = U_mat[2, :] # IMPORTANT!!! Instead, use hedges computed as in DP approach: # in this way, errors of function approximation do not back-propagate. # This provides a stable solution, unlike # the first method that leads to a diverging solution A_mat = function_A_vec(t, delta_S_hat, data_mat_t, reg_param) B_vec = function_B_vec(t, Pi_hat, delta_S_hat, S, data_mat_t) # print ('t = A_mat.shape = B_vec.shape = ', t, A_mat.shape, B_vec.shape) phi = np.dot(np.linalg.inv(A_mat), B_vec) a_opt[:, t] = np.dot(data_mat_t[t, :, :], phi) a_star.loc[:, t] = a_opt[:, t] ''' print("test "+str(t)) print(str(Q_star.head())) ''' max_Q_star[:, t] = U_W_0 + a_opt[:, t] * U_W_1 + 0.5 * (a_opt[:, t]** 2) * U_W_2 Q_star.iloc[:, t] = max_Q_star[:, t] # update dataframes # update the Q_RL solution given by a dot product of two matrices W_t Psi_t Psi_t = Psi_mat[:, :, t].T # dimension N_MC x 3M Q_RL.loc[:, t] = np.dot(Psi_t, W_t) # trim outliers for Q_RL up_percentile_Q_RL = 95 # 95 low_percentile_Q_RL = 5 # 5 low_perc_Q_RL, up_perc_Q_RL = np.percentile( Q_RL.loc[:, t], [low_percentile_Q_RL, up_percentile_Q_RL]) # print('t = %s low_perc_Q_RL = %s up_perc_Q_RL = %s' % (t, low_perc_Q_RL, up_perc_Q_RL)) # trim outliers in values of max_Q_star: flag_lower = Q_RL.loc[:, t].values < low_perc_Q_RL flag_upper = Q_RL.loc[:, t].values > up_perc_Q_RL Q_RL.loc[flag_lower, t] = low_perc_Q_RL Q_RL.loc[flag_upper, t] = up_perc_Q_RL endtime = time.time() ############################################################################### ############################################################################### # plot both simulations f, axarr = plt.subplots(3, 1) f.subplots_adjust(hspace=.5) f.set_figheight(8.0) f.set_figwidth(8.0) step_size = N_MC // 10 idx_plot = np.arange(step_size, N_MC, step_size) axarr[0].plot(a_star.T.iloc[:, idx_plot]) axarr[0].set_xlabel('Time Steps') axarr[0].set_title(r'Optimal action $a_t^{\star}$') axarr[1].plot(Q_RL.T.iloc[:, idx_plot]) axarr[1].set_xlabel('Time Steps') axarr[1].set_title(r'Q-function $Q_t^{\star} (X_t, a_t)$') axarr[2].plot(Q_star.T.iloc[:, idx_plot]) axarr[2].set_xlabel('Time Steps') axarr[2].set_title(r'Optimal Q-function $Q_t^{\star} (X_t, a_t^{\star})$') plt.show() plt.savefig('QLBS_FQI_off_policy_summary_ATM_eta_%d.png' % (100 * eta), dpi=600) # Note that a from the DP method and a_star from the RL method are now identical by construction # plot 1 path num_path = 300 # 430 # 510 plt.plot(a.T.iloc[:, num_path], label="DP Action") plt.plot(a_star.T.iloc[:, num_path], label="RL Action") plt.legend() plt.xlabel('Time Steps') plt.title('Optimal Action Comparison Between DP and RL for a sample path') plt.show() compTime = endtime - starttime return ([Q_star.iloc[:, 0], compTime])
# I4 = fI4(Hx, Hy, Hz, sxx, syy, szz, sxy, syz, szx) I5 = fI5(Hx, Hy, Hz, sxx, syy, szz, sxy, syz, szx) #I6 = fI6(Hx, Hy, Hz, sxx, syy, szz, sxy, syz, szx) u = fu(Hx, Hy, Hz, sxx, syy, szz, sxy, syz, szx) / Hscale v = fv(Hx, Hy, Hz, sxx, syy, szz, sxy, syz, szx) / sscale #w = fw(Hx, Hy, Hz, sxx, syy, szz, sxy, syz, szx) / sscale nx = len(u) ny = len(v) assert nx == ny, "sequences (u,v) describing the input points must be of the same length" # Set up splines for evaluation # splx = bspline.Bspline(xknots, ordr) sply = bspline.Bspline(yknots, ordr) # get number of basis functions (perform dummy evaluation and count) nxb = len(splx(0.)) nyb = len(sply(0.)) # NOTE: we are evaluating the model on a sequence of points (u[j], v[j]), not on a meshgrid (outer(u,v)). # # However, the *spline basis* is a meshgrid in the sense that # it is the outer product of the x and y basis functions. # Au = splx.collmat(u) Av = sply.collmat(v) Du = splx.collmat(u, deriv_order=1) Dv = sply.collmat(v, deriv_order=1)
#!/usr/bin/python import sys import bspline def print_usage(): print '{} <output_file> <order> <knot_list>'.format(sys.argv[0]) print 'Example: {} fig.ps 3 0 0 1 2 3 4 5'.format(sys.argv[0]) try: filename = sys.argv[1] order = int(sys.argv[2]) knots = map(int, sys.argv[3:]) assert len(knots) >= order + 2 except: print_usage() exit(0) basis = bspline.Bspline(knots, order) basis.plot() bspline.plt.savefig(filename)
def main(): """Demonstration: plot a B-spline basis and its first three derivatives.""" ######################################################################################### # config ######################################################################################### # Order of spline basis. # p = 3 # Knot vector, including the endpoints. # # For convenience, endpoints are specified only once, regardless of the value of p. # # Duplicate knots *in the interior* are allowed, with the standard meaning for B-splines. # knots = [0., 0.25, 0.5, 0.75, 1.] # How many plotting points to use on each subinterval [knots[i], knots[i+1]). # # Only intervals with length > 0 are actually plotted. # nt_per_interval = 101 ######################################################################################### # the demo itself ######################################################################################### # The evaluation algorithm used in bspline.py uses half-open intervals t_i <= x < t_{i+1}. # # This causes the right endpoint of each interval to actually be the start point of the next interval. # # Especially, the right endpoint of the last interval is the start point of the next (nonexistent) interval, # so the basis will return a value of zero there. # # We work around this by using a small epsilon to avoid evaluation exactly at t_{i+1} (for each interval). # epsrel = 1e-10 epsabs = epsrel * (knots[-1] - knots[0]) original_knots = knots knots = splinelab.augknt( knots, p) # add repeated endpoint knots for splines of order p # treat each interval separately to preserve discontinuities # # (useful especially when plotting the highest-order nonzero derivative) # B = bspline.Bspline(knots, p) xxs = [] for I in zip(knots[:-1], knots[1:]): t_i = I[0] t_ip1 = I[1] - epsabs if t_ip1 - t_i > 0.: # accept only intervals of length > 0 (to skip higher-multiplicity knots in the interior) xxs.append(np.linspace(t_i, t_ip1, nt_per_interval)) # common settings for all plotted lines settings = {"linestyle": 'solid', "linewidth": 1.0} # create a list of unique colors for plotting # # http://stackoverflow.com/questions/8389636/creating-over-20-unique-legend-colors-using-matplotlib # NUM_COLORS = nbasis = len( B(0.)) # perform dummy evaluation to get number of basis functions cm = plt.get_cmap('gist_rainbow') cNorm = matplotlib.colors.Normalize(vmin=0, vmax=NUM_COLORS - 1) scalarMap = matplotlib.cm.ScalarMappable(norm=cNorm, cmap=cm) colors = [scalarMap.to_rgba(i) for i in range(NUM_COLORS)] labels = [ r"$B$", r"$\mathrm{d}B\,/\,\mathrm{d}x$", r"$\mathrm{d}^2B\,/\,\mathrm{d}x^2$", r"$\mathrm{d}^3B\,/\,\mathrm{d}x^3$" ] # for plotting the knot positions: unique_knots_xx = np.unique(original_knots) unique_knots_yy = np.zeros_like(unique_knots_xx) # plot the basis functions B(x) and their first three derivatives plt.figure(1) plt.clf() for k in range(4): ax = plt.subplot(2, 2, k + 1) # place the axis label where it fits if k % 2 == 0: ax.yaxis.set_label_position("left") else: ax.yaxis.set_label_position("right") # plot the kth derivative; each basis function gets a unique color f = B.diff(order=k) # order=0 is a passthrough for xx in xxs: yy = np.array([ f(x) for x in xx ]) # f(scalar) -> rank-1 array, one element per basis function for i in range(nbasis): settings["color"] = colors[i] plt.plot(xx, yy[:, i], **settings) plt.ylabel(labels[k]) # show knot positions plt.plot(unique_knots_xx, unique_knots_yy, "kx") plt.suptitle(r"$B$-spline basis functions, $p=%d$" % (p))
colid = np.arange(0, 1) for d in range(1, dimpoly + 1): Phips[:, colid] = np.vstack(Xs**d) colid += 1 # generative model b = [0.5, -0.1, 0.005] # true regression coefficients s2 = 0.05 # noise variance y = Phip.dot(b) + np.random.normal(0, s2, N) # cubic B-spline basis (used for regression) p = 3 # order of spline (3 = cubic) nknots = 5 # number of knots (endpoints only counted once) knots = np.linspace(0, 10, nknots) k = splinelab.augknt(knots, p) # pad the knot vector B = bspline.Bspline(k, p) Phi = np.array([B(i) for i in X]) Phis = np.array([B(i) for i in Xs]) hyp0 = np.zeros(2) #hyp0 = np.zeros(4) # use ARD B = BLR(hyp0, Phi, y) hyp = B.estimate(hyp0, Phi, y, optimizer='powell') yhat, s2 = B.predict(hyp, Phi, y, Phis) plt.fill_between(Xs, yhat - 1.96 * np.sqrt(s2), yhat + 1.96 * np.sqrt(s2), alpha=0.2) plt.scatter(X, y) plt.plot(Xs, yhat)
def __init__(self, knots=None, order=3, nrknots=None, min=None, max=None, xrange=None, copy=None, fixed=None, **kwargs): """ Splines on a given set of knots and a given order. The number of parameters is ( length( knots ) + order - 1 ) Parameters ---------- knots : array_like a array of arbitrarily positioned knots order : int order of the spline. Default 3 (cubic splines) nrknots : int number of knots, equidistantly posited over xrange or [min,max] min : float minimum of the knot range max : float maximum of the knot range xrange : array_like range of the xdata copy : BSplinesModel model to be copied. fixed : dict If not None raise AttributeError. Raises ------ ValueError : At least either (`knots`) or (`nrnkots`, `min`, `max`) or (`nrknots`, `xrange`) must be provided to define a valid model. AttributeErrr : When fixed is not None Notes ----- The BSplinesModel is only strictly valid inside the domain defined by the minmax of knots. It deteriorates fastly going outside the domain. """ if fixed is not None: raise AttributeError("BSplinesModel cannot have fixed parameters") if copy is not None: knots = copy.knots order = copy.order if knots is not None: nrknots = len(knots) elif nrknots is None: raise ValueError( "Need either knots or (nrknots,min,max) or (nrknots,xrange)") super(BSplinesModel, self).__init__(order + nrknots - 1, copy=copy, **kwargs) self.order = order if knots is None: if xrange is not None: min = numpy.min(xrange) max = numpy.max(xrange) knots = numpy.linspace(min, max, nrknots, dtype=float) self.knots = knots self.augknots = splinelab.augknt(knots, order) self.Bspline = bspline.Bspline(self.augknots, self.order, last=True) self.eps = 0