def gen_path(self): # Path Generator (Black Scholes ) seed(42) for i in range(1, self.num_steps + 1): std_norm = standard_normal(self.num_paths) exp_pow = (self.mu - self.vol ** 2 / 2) * self.dt \ + self.vol * np.sqrt(self.dt) * std_norm self.s_values[:, i] = self.s_values[:, i - 1] * np.exp(exp_pow) delta_S = (1 - self.tr_alpha) * self.s_values[:, 1:] - 1 / self.gamma * self.s_values[:, :self.num_steps] self.delta_S = delta_S self.delta_S_hat = np.apply_along_axis(lambda x: x - np.mean(x), axis=0, arr=delta_S) self.X = - (self.mu - 0.5 * self.vol ** 2) * np.arange(self.num_steps + 1) * self.dt + np.log(self.s_values) X_min = np.min(np.min(self.X)) X_max = np.max(np.max(self.X)) print("Shape of X : {} \n Max : {} \n Min : {}".format(self.X.shape, X_max, X_min)) self.pi[:, -1] = np.maximum(self.s_values[:, -1] - self.K, 0) self.pi_hat[:, -1] = self.pi[:, -1] - np.mean(self.pi[:, -1]) self.q[:, -1] = -self.pi[:, -1] - self.risk_lambda * np.var(self.pi[:, -1]) self.r[:, -1] = -self.risk_lambda * np.var(self.pi[:, -1]) p = 4 ncolloc = 12 tau = np.linspace(X_min, X_max, ncolloc) k = splinelab.aptknt(tau, p) basis = bspline.Bspline(k, p) num_basis = ncolloc self.data = np.zeros((self.num_steps + 1, self.num_paths, num_basis)) t0 = time.time() for ix in np.arange(self.num_steps + 1): x = self.X[:, ix] self.data[ix, :, :] = np.array([basis(el) for el in x]) t1 = time.time() print("\nTime for basis expansion {}".format(t1 - t0))
def initialization(self, data, x): """ Feed in data and initialize variables needed in DP solution. :return: Dictionary of variables. """ var_dict = {} var_dict['ds'] = data[:, 1:self.T + 1] - 1 / self.gamma * data[:, 0:self.T] var_dict['ds_hat'] = var_dict['ds'] - np.mean(var_dict['ds'], axis=0) var_dict['pi'] = np.zeros((self.n_mc, self.T + 1)) var_dict['pi_hat'] = np.zeros_like(var_dict['pi']) var_dict['action'] = np.zeros_like(var_dict['pi']) var_dict['q'] = np.zeros_like(var_dict['pi']) var_dict['reward'] = np.zeros_like(var_dict['pi']) var_dict['pi'][:, self.T] = np.maximum(data[:, self.T] - self.K, 0) var_dict['pi_hat'][:, self.T] = var_dict['pi'][:, self.T] - np.mean( var_dict['pi'][:, self.T]) var_dict['action'][:, self.T] = 0 var_dict['q'][:, self.T] = -var_dict['pi'][:, self. T] - self.risk_lambda * np.var( var_dict['pi'][:, self.T]) var_dict['reward'][:, self.T] = -self.risk_lambda * np.var( var_dict['pi'][:, self.T]) x_min, x_max = np.min(x), np.max(x) tau = np.linspace(x_min, x_max, self.num_basis) k = splinelab.aptknt(tau=tau, order=3) basis = bspline.Bspline(k, order=3) var_dict['func_x'] = np.zeros((self.n_mc, self.T + 1, self.num_basis)) for t in range(self.T + 1): xt = x[:, t] var_dict['func_x'][:, t, :] = np.array( [basis(element) for element in xt]) print('The shape of pi / action / q:', var_dict['pi'].shape) print('The shape of func_x:', var_dict['func_x'].shape) return var_dict
def main(): # Original knots # x1 = 0 x2 = 1 x_orig = np.linspace(x1, x2, 21) order = 3 # We stretch the domain of the splines just a bit to avoid evaluating them # exactly at x2, as the splines actually have support on the half-open # interval [x1,x2). # def marginize_end(x): out = x.copy() out[-1] += 1e-10 * (x[-1] - x[0]) return out knots = splinelab.aptknt(marginize_end(x_orig), order) spl = bspline.Bspline(knots, order) nb = len( spl(0.) ) # get number of basis functions (perform dummy evaluation and count) # Sites used for creating the Elmer spline. # # We use different sites for different functions to represent them well # with a small number of points. # nx = 41 xx = np.empty((nb, nx), dtype=np.float64) ff = np.empty((nb, nx), dtype=np.float64) # The first and last basis functions have no internal maximum. # xx[0, :] = quadspace(x1, x2, nx, refine='left') xx[-1, :] = quadspace(x1, x2, nx, refine='right') # For the other functions, create a spacing that has one point # exactly at the peak. # import scipy.optimize def make_f(j): scalar_f = spl.diff(order=1) # lambda x: ... scalar_fj = lambda x: scalar_f(x)[j] vector_fj = np.vectorize( scalar_fj) # horrible performance, this is just a Python loop return vector_fj for j in range(1, nb - 1): # The maximum of the jth basis function is known to be near knot j, # so we search from knot j-1 to knot j+1. # # The fitting is very sensitive to the placement of this point, # so attempt to solve down to the last ulp. # fj = make_f(j) x0 = scipy.optimize.bisect(fj, x_orig[j - 1], x_orig[j + 1], xtol=ulp(x_orig[j])) # Use a quadratic spacing with more points near the peak. # xx_left = quadspace(x1, x0, nx // 2 + 1, refine='right') xx_right = quadspace(x0, x2, nx // 2 + 1, refine='left') tmp = xx_left.tolist() tmp.extend(xx_right[1:]) # discard the duplicate point at the peak xx[j, :] = tmp # Evaluate each basis function at each of the sites chosen for it. # for j in range(nb): for i, x in enumerate(xx[j, :]): ff[j, i] = spl(x)[j] # Sites used for visualization of results (same for all functions). # nvis = 10001 xxvis = np.linspace(x1, x2, nvis) # Evaluate at visualization sites (for debug only) # ffvis = np.empty((nb, nvis), dtype=np.float64) for i, x in enumerate(xxvis): ffvis[:, i] = spl(x) # Create the fits and plot. # create a list of unique colors for plotting # # http://stackoverflow.com/questions/8389636/creating-over-20-unique-legend-colors-using-matplotlib # NUM_COLORS = nb cm = plt.get_cmap('gist_rainbow') cNorm = matplotlib.colors.Normalize(vmin=0, vmax=NUM_COLORS - 1) scalarMap = matplotlib.cm.ScalarMappable(norm=cNorm, cmap=cm) colors = [scalarMap.to_rgba(i) for i in range(NUM_COLORS)] plt.figure(1) plt.clf() for j in range(nb): # Create Elmer-format cubic spline approximation using the data at the sites. rr = elmerspline.solve_coeffs(xx[j, :], ff[j, :]) # TODO: tabulate and save rr for use with Elmer # Plot the original basis function. plt.plot(xxvis, ffvis[j, :], linestyle='dashed', color=colors[j]) # Plot the approximation. plt.plot(xxvis, elmerspline.evaluate_cubic_spline(xx[j, :], ff[j, :], rr, xxvis), linestyle='solid', color=colors[j])
def main(): ###################### # Config ###################### # Choose least-squares solver to use: # # lsq_solver = "dense" # LAPACK DGELSD, direct, good for small problems # lsq_solver = "sparse" # SciPy LSQR, iterative, asymptotically faster, good for large problems # lsq_solver = "optimize" # general nonlinear optimizer using Trust Region Reflective (trf) algorithm # lsq_solver = "qr" # lsq_solver = "cholesky" # lsq_solver = "sparse_qr" lsq_solver = "sparse_qr_solve" ###################### # Load multiscale data ###################### print("Loading measurement data...") # measurements are provided on a meshgrid over (Hx, sigxx) # data2.mat contains virtual measurements, generated from a multiscale model. # data2 = scipy.io.loadmat("data2.mat") # Hx = np.squeeze(data2["Hx"]) # 1D array, (M,) # sigxx = np.squeeze(data2["sigxx"]) # 1D array, (N,) # Bx = data2["Bx"] # 2D array, (M, N) # lamxx = data2["lamxx"] # --"-- ## lamyy = data2["lamyy"] # --"-- ## lamzz = data2["lamzz"] # --"-- data2 = scipy.io.loadmat("umair_gal_denoised.mat") sigxx = -1e6 * np.array([ 0, 1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80 ][::-1], dtype=np.float64) assert sigxx.shape[0] == 18 Hx = data2["Hval"][0, :] # same for all sigma, just take the first row Bx = data2["Bval"].T lamxx = data2["LHval"].T * 1e-6 Bx = Bx[::-1, :] lamxx = lamxx[::-1, :] # HACK, fix later (must decouple number of knots from number of data sites) ii = np.arange(Hx.shape[0]) n_newi = 401 newii = np.linspace(0, ii[-1], n_newi) nsigma = sigxx.shape[0] fH = scipy.interpolate.interp1d(ii, Hx) newB = np.empty((n_newi, Bx.shape[1]), dtype=np.float64) newlam = np.empty((n_newi, lamxx.shape[1]), dtype=np.float64) for j in range(nsigma): fB = scipy.interpolate.interp1d(ii, Bx[:, j]) newB[:, j] = fB(newii) flam = scipy.interpolate.interp1d(ii, lamxx[:, j]) newlam[:, j] = flam(newii) Hx = fH(newii) Bx = newB lamxx = newlam # Order of spline (as-is! 3 = cubic) ordr = 3 # Auxiliary variables (H, sig_xx, sig_xy) Hscale = np.max(Hx) sscale = np.max(np.abs(sigxx)) x = Hx / Hscale y = sigxx / sscale nx = x.shape[0] # number of grid points, x axis ny = y.shape[0] # number of grid points, y axis # Partial derivatives (B, lam_xx, lam_xy) from multiscale model # # In the magnetostriction components, the multiscale model produces nonzero lamxx at zero stress. # We normalize this away for purposes of performing the curve fit. # dpsi_dx = Bx * Hscale dpsi_dy = (lamxx - lamxx[0, :]) * sscale ###################### # Set up splines ###################### print("Setting up splines...") # The evaluation algorithm used in bspline.py uses half-open intervals t_i <= x < t_{i+1}. # # This causes havoc for evaluation at the end of each interval, because it is actually the start # of the next interval. # # Especially, the end of the last interval is the start of the next (non-existent) interval. # # We work around this by using a small epsilon to avoid evaluation exactly at t_{i+1} (for the last interval). # def marginize_end(x): out = x.copy() out[-1] += 1e-10 * (x[-1] - x[0]) return out # create knots and spline basis xknots = splinelab.aptknt(marginize_end(x), ordr) yknots = splinelab.aptknt(marginize_end(y), ordr) splx = bspline.Bspline(xknots, ordr) sply = bspline.Bspline(yknots, ordr) # get number of basis functions (perform dummy evaluation and count) nxb = len(splx(0.)) nyb = len(sply(0.)) # TODO Check if we need to convert input Bx and sigxx to u,v (what is actually stored in the data files?) # Create collocation matrices: # # A[i,j] = d**deriv_order B_j(tau[i]) # # where d denotes differentiation and B_j is the jth basis function. # # We place the collocation sites at the points where we have measurements. # Au = splx.collmat(x) Av = sply.collmat(y) Du = splx.collmat(x, deriv_order=1) Dv = sply.collmat(y, deriv_order=1) ###################### # Assemble system ###################### print("Assembling system...") # Assemble the equation system for fitting against data on the partial derivatives of psi. # # By writing psi in the spline basis, # # psi_{ij} = A^{u}_{ik} A^{v}_{jl} c_{kl} # # the quantities to be fitted, which are the partial derivatives of psi, become # # B_{ij} = D^{u}_{ik} A^{v}_{jl} c_{kl} # lambda_{xx,ij} = A^{u}_{ik} D^{v}_{jl} c_{kl} # # Repeated indices are summed over. # # Column: kl converted to linear index (k = 0,1,...,nxb-1, l = 0,1,...,nyb-1) # Row: ij converted to linear index (i = 0,1,...,nx-1, j = 0,1,...,ny-1) # # (Paavo's notes, Stresses4.pdf) nf = 2 # number of unknown fields nr = nx * ny # equation system rows per unknown field A = np.empty((nf * nr, nxb * nyb), dtype=np.float64) # global matrix b = np.empty((nf * nr), dtype=np.float64) # global RHS # zero array element detection tolerance tol = 1e-6 I, J, IJ = util.index.genidx((nx, ny)) K, L, KL = util.index.genidx((nxb, nyb)) # loop only over rows of the equation system for i, j, ij in zip(I, J, IJ): A[nf * ij, KL] = Du[i, K] * Av[j, L] A[nf * ij + 1, KL] = Au[i, K] * Dv[j, L] b[nf * IJ] = dpsi_dx[I, J] # RHS for B_x b[nf * IJ + 1] = dpsi_dy[I, J] # RHS for lambda_xx # # the above is equivalent to this much slower version: # # # # equation system row # for j in range(ny): # for i in range(nx): # ij = np.ravel_multi_index( (i,j), (nx,ny) ) # # # equation system column # for l in range(nyb): # for k in range(nxb): # kl = np.ravel_multi_index( (k,l), (nxb,nyb) ) # A[nf*ij, kl] = Du[i,k] * Av[j,l] # A[nf*ij+1,kl] = Au[i,k] * Dv[j,l] # # b[nf*ij] = dpsi_dx[i,j] if abs(dpsi_dx[i,j]) > tol else 0. # RHS for B_x # b[nf*ij+1] = dpsi_dy[i,j] if abs(dpsi_dy[i,j]) > tol else 0. # RHS for lambda_xx ###################### # Solve ###################### # Solve the optimal coefficients. # Note that we are constructing a potential function from partial derivatives only, # so the solution is unique only up to a global additive shift term. # # Under the hood, numpy.linalg.lstsq uses LAPACK DGELSD: # # http://stackoverflow.com/questions/29372559/what-is-the-difference-between-numpy-linalg-lstsq-and-scipy-linalg-lstsq # # DGELSD accepts also rank-deficient input (rank(A) < min(nrows,ncols)), returning arg min( ||x||_2 ) , # so we don't need to do anything special to account for this. # # Same goes for the sparse LSQR. # equilibrate row and column norms # # See documentation of scipy.sparse.linalg.lsqr, it requires this to work properly. # # https://github.com/Technologicat/python-wlsqm # print("Equilibrating...") S = A.copy(order='F') # the rescaler requires Fortran memory layout A = scipy.sparse.csr_matrix(A) # save memory (dense "A" no longer needed) # eps = 7./3. - 4./3. - 1 # http://stackoverflow.com/questions/19141432/python-numpy-machine-epsilon # print( S.max() * max(S.shape) * eps ) # default zero singular value detection tolerance in np.linalg.matrix_rank() # import wlsqm.utils.lapackdrivers as wul # rs,cs = wul.do_rescale( S, wul.ScalingAlgo.ALGO_DGEEQU ) # # row scaling only (for weighting) # with np.errstate(divide='ignore', invalid='ignore'): # rs = np.where( np.abs(b) > tol, 1./b, 1. ) # for i in range(S.shape[0]): # S[i,:] *= rs[i] # cs = 1. # scale rows corresponding to Bx # rs = np.ones_like(b) rs[nf * IJ] = 2 for i in range(S.shape[0]): S[i, :] *= rs[i] cs = 1. # # It seems this is not needed in the 2D problem (fitting error is slightly smaller without it). # # # Additional row scaling. # # # # This equilibrates equation weights, but deteriorates the condition number of the matrix. # # # # Note that in a least-squares problem the row weighting *does* matter, because it affects # # the fitting error contribution from the rows. # # # with np.errstate(divide='ignore', invalid='ignore'): # rs2 = np.where( np.abs(b) > tol, 1./b, 1. ) # for i in range(S.shape[0]): # S[i,:] *= rs2[i] # rs *= rs2 # a = np.abs(rs2) # print( np.min(a), np.mean(a), np.max(a) ) # rs = np.asanyarray(rs) # cs = np.asanyarray(cs) # a = np.abs(rs) # print( np.min(a), np.mean(a), np.max(a) ) b *= rs # scale RHS accordingly # colnorms = np.linalg.norm(S, ord=np.inf, axis=0) # sum over rows -> column norms # rownorms = np.linalg.norm(S, ord=np.inf, axis=1) # sum over columns -> row norms # print( " rescaled column norms min = %g, avg = %g, max = %g" % (np.min(colnorms), np.mean(colnorms), np.max(colnorms)) ) # print( " rescaled row norms min = %g, avg = %g, max = %g" % (np.min(rownorms), np.mean(rownorms), np.max(rownorms)) ) print("Solving with algorithm = '%s'..." % (lsq_solver)) if lsq_solver == "dense": print(" matrix shape %s = %d elements" % (S.shape, np.prod(S.shape))) ret = numpy.linalg.lstsq(S, b) # c,residuals,rank,singvals c = ret[0] elif lsq_solver == "sparse": S = scipy.sparse.coo_matrix(S) print(" matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod( S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape))) ret = scipy.sparse.linalg.lsqr(S, b) c, exit_reason, iters = ret[:3] if exit_reason != 2: # 2 = least-squares solution found print("WARNING: solver did not converge (exit_reason = %d)" % (exit_reason)) print(" sparse solver iterations taken: %d" % (iters)) elif lsq_solver == "optimize": # make sparse matrix (faster for dot products) S = scipy.sparse.coo_matrix(S) print(" matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod( S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape))) def fitting_error(c): return S.dot(c) - b ret = scipy.optimize.least_squares(fitting_error, np.ones(S.shape[1], dtype=np.float64), method="trf", loss="linear") c = ret.x if ret.status < 1: # status codes: https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.optimize.least_squares.html print("WARNING: solver did not converge (status = %d)" % (ret.status)) elif lsq_solver == "qr": print(" matrix shape %s = %d elements" % (S.shape, np.prod(S.shape))) # http://glowingpython.blogspot.fi/2012/03/solving-overdetermined-systems-with-qr.html Q, R = np.linalg.qr(S) # qr decomposition of A Qb = (Q.T).dot(b) # computing Q^T*b (project b onto the range of A) # c = np.linalg.solve(R,Qb) # solving R*x = Q^T*b c = scipy.linalg.solve_triangular(R, Qb, check_finite=False) elif lsq_solver == "cholesky": # S is rank-deficient by one, because we are solving a potential based on data on its partial derivatives. # # Before solving, force S to have full rank by fixing one coefficient. # S[0, :] = 0. S[0, 0] = 1. b[0] = 1. rs[0] = 1. S = scipy.sparse.csr_matrix(S) print(" matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod( S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape))) # Be sure to use the new sksparse from # # https://github.com/scikit-sparse/scikit-sparse # # instead of the old scikits.sparse (which will fail with an error). # # Requires libsuitesparse-dev for CHOLMOD headers. # from sksparse.cholmod import cholesky_AAt # Notice that CHOLMOD computes AA' and we want M'M, so we must set A = M'! factor = cholesky_AAt(S.T) c = factor.solve_A(S.T * b) elif lsq_solver == "sparse_qr": # S is rank-deficient by one, because we are solving a potential based on data on its partial derivatives. # # Before solving, force S to have full rank by fixing one coefficient; # otherwise the linear solve step will fail because R will be exactly singular. # S[0, :] = 0. S[0, 0] = 1. b[0] = 1. rs[0] = 1. S = scipy.sparse.coo_matrix(S) print(" matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod( S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape))) # pip install sparseqr # or https://github.com/yig/PySPQR # # Works like MATLAB's [Q,R,e] = qr(...): # # https://se.mathworks.com/help/matlab/ref/qr.html # # [Q,R,E] = qr(A) or [Q,R,E] = qr(A,'matrix') produces unitary Q, upper triangular R and a permutation matrix E # so that A*E = Q*R. The column permutation E is chosen to reduce fill-in in R. # # [Q,R,e] = qr(A,'vector') returns the permutation information as a vector instead of a matrix. # That is, e is a row vector such that A(:,e) = Q*R. # import sparseqr print(" performing sparse QR decomposition...") Q, R, E, rank = sparseqr.qr(S) # produce reduced QR (for least-squares fitting) # # - cut away bottom part of R (zeros!) # - cut away the corresponding far-right part of Q # # see # np.linalg.qr # https://andreask.cs.illinois.edu/cs357-s15/public/demos/06-qr-applications/Solving%20Least-Squares%20Problems.html # # # inefficient way: # k = min(S.shape) # R = scipy.sparse.csr_matrix( R.A[:k,:] ) # Q = scipy.sparse.csr_matrix( Q.A[:,:k] ) print(" reducing matrices...") # somewhat more efficient way: k = min(S.shape) R = R.tocsr()[:k, :] Q = Q.tocsc()[:, :k] # # maybe somewhat efficient way: manipulate data vectors, create new coo matrix # # # # (incomplete, needs work; need to shift indices of rows/cols after the removed ones) # # # k = min(S.shape) # mask = np.nonzero( R.row < k )[0] # R = scipy.sparse.coo_matrix( ( R.data[mask], (R.row[mask], R.col[mask]) ), shape=(k,k) ) # mask = np.nonzero( Q.col < k )[0] # Q = scipy.sparse.coo_matrix( ( Q.data[mask], (Q.row[mask], Q.col[mask]) ), shape=(k,k) ) print(" solving...") Qb = (Q.T).dot(b) x = scipy.sparse.linalg.spsolve(R, Qb) c = np.empty_like(x) c[E] = x[:] # apply inverse permutation elif lsq_solver == "sparse_qr_solve": S[0, :] = 0. S[0, 0] = 1. b[0] = 1. rs[0] = 1. S = scipy.sparse.coo_matrix(S) print(" matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod( S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape))) import sparseqr c = sparseqr.solve(S, b) else: raise ValueError("unknown solver '%s'; valid: 'dense', 'sparse'" % (lsq_solver)) c *= cs # undo column scaling in solution # now c contains the spline coefficients, c_{kl}, where kl has been raveled into a linear index. ###################### # Save ###################### filename = "tmp_s2d.mat" L = locals() data = { key: L[key] for key in ["ordr", "xknots", "yknots", "c", "Hscale", "sscale"] } scipy.io.savemat(filename, data, format='5', oned_as='row') ###################### # Plot ###################### print("Visualizing...") # unpack results onto meshgrid # fitted = A.dot( c ) # function values corresponding to each row in the global equation system X, Y = np.meshgrid( Hx, sigxx, indexing='ij') # indexed like X[i,j] (i is x index, j is y index) Z_Bx = np.empty_like(X) Z_lamxx = np.empty_like(X) Z_Bx[I, J] = fitted[nf * IJ] Z_lamxx[I, J] = fitted[nf * IJ + 1] # # the above is equivalent to: # for ij in range(nr): # i,j = np.unravel_index( ij, (nx,ny) ) # Z_Bx[i,j] = fitted[nf*ij] # Z_lamxx[i,j] = fitted[nf*ij+1] data_Bx = { "x": (X, r"$H_{x}$"), "y": (Y, r"$\sigma_{xx}$"), "z": (Z_Bx / Hscale, r"$B_{x}$") } data_lamxx = { "x": (X, r"$H_{x}$"), "y": (Y, r"$\sigma_{xx}$"), "z": (Z_lamxx / sscale, r"$\lambda_{xx}$") } def relerr(data, refdata): refdata_linview = refdata.reshape(-1) return 100. * np.linalg.norm(refdata_linview - data.reshape(-1) ) / np.linalg.norm(refdata_linview) plt.figure(1) plt.clf() ax = util.plot.plot_wireframe(data_Bx, legend_label="Spline", figno=1) ax.plot_wireframe(X, Y, dpsi_dx / Hscale, label="Multiscale", color="r") plt.legend(loc="best") print("B_x relative error %g%%" % (relerr(Z_Bx, dpsi_dx))) plt.figure(2) plt.clf() ax = util.plot.plot_wireframe(data_lamxx, legend_label="Spline", figno=2) ax.plot_wireframe(X, Y, dpsi_dy / sscale, label="Multiscale", color="r") plt.legend(loc="best") print("lambda_{xx} relative error %g%%" % (relerr(Z_lamxx, dpsi_dy))) # match the grid point numbering used in MATLAB version of this script # def t(A): return np.transpose(A, [1, 0]) dpsi_dx = t(dpsi_dx) Z_Bx = t(Z_Bx) dpsi_dy = t(dpsi_dy) Z_lamxx = t(Z_lamxx) plt.figure(3) plt.clf() ax = plt.subplot(1, 1, 1) ax.plot(dpsi_dx.reshape(-1) / Hscale, 'ro', markersize='2', label="Multiscale") ax.plot(Z_Bx.reshape(-1) / Hscale, 'ko', markersize='2', label="Spline") ax.set_xlabel("Grid point number") ax.set_ylabel(r"$B_{x}$") plt.legend(loc="best") plt.figure(4) plt.clf() ax = plt.subplot(1, 1, 1) ax.plot(dpsi_dy.reshape(-1) / sscale, 'ro', markersize='2', label="Multiscale") ax.plot(Z_lamxx.reshape(-1) / sscale, 'ko', markersize='2', label="Spline") ax.set_xlabel("Grid point number") ax.set_ylabel(r"$\lambda_{xx}$") plt.legend(loc="best") print("All done.")
def fit_1d_spline(x, y, knots=None, nvis=10001): spline_order = 3 minx = np.min(x) maxx = np.max(x) # Preliminary placement of knots. # # Bump the last site slightly. The spline is nonzero only on the *half-open* interval [x1, x2), # so the value of the spline interpolant exactly at the end of the span is always 0. # # kk = np.linspace(minx, maxx + 1e-8*(maxx-minx), 21) # better to adjust number and spacing of knots (maybe quadratic toward ends?) if knots is not None: kk = knots else: # if no custom knot vector, make one now (emphasize ends -- good for BH curves) kk = np.linspace(0,1, 81) kk = kk**2 kk = mirspace(kk) kk = minx + (1. + 1e-8)*(maxx - minx)*kk kk = splinelab.aptknt(kk, order=spline_order) spl = bspline.Bspline(order=spline_order, knot_vector=kk) nx = x.shape[0] Au = spl.collmat(x) # construct the overdetermined linear system for determining the optimal spline coefficients # nf = 1 # number of unknown fields nr = nx # equation system rows per unknown field nxb = len( spl(0.) ) # get number of basis functions (perform dummy evaluation and count) A = np.empty( (nf*nr, nxb), dtype=np.float64 ) # global matrix b = np.empty( (nf*nr), dtype=np.float64 ) # global RHS # loop only over rows of the equation system for i in range(nf*nr): A[nf*i,:] = Au[i,:] b[:] = y # solve the overdetermined linear system (in the least-squares sense) # # dense solver (LAPACK DGELSD) # ret = np.linalg.lstsq(A, b) # c,residuals,rank,singvals # c = ret[0] # sparse solver (SciPy LSQR) S = scipy.sparse.coo_matrix(A) print( " matrix shape %s = %d elements; %d nonzeros (%g%%)" % (S.shape, np.prod(S.shape), S.nnz, 100. * S.nnz / np.prod(S.shape) ) ) ret = scipy.sparse.linalg.lsqr( S, b ) # c,exit_reason,iters = ret[:3] c,exit_reason = ret[:2] if exit_reason != 2: # 2 = least-squares solution found print( "WARNING: solver did not converge (exit_reason = %d)" % (exit_reason) ) # evaluate the computed optimal b-spline # xx_spline = np.linspace(minx, maxx, nvis) Avis = spl.collmat(xx_spline) yy_spline = np.sum( Avis*c, axis=-1 ) return (xx_spline, yy_spline)
def Temp(self): # load in all the defined properties num_of_points = 15 #Number of points used in the spline order = 5 #order of the spline plt_points = self.xgrid length = self.length start_time = self.start_time final_time = self.final_time time_step = self.time_step Left_BC_Type = self.Left_BC_Type Right_BC_Type = self.Right_BC_Type Left_BC = self.Left_BC Right_BC = self.Right_BC conductivity = self.conductivity rfct = self.heatCapacity source = self.source init = self.init rho = self.rho # Simulation Setup ==================================================== # Capacity Function --------------------------------------------------- #dk/(dphi) is needed later in the core in every loop # evaluating the derivative of conductivity with respect to phi # --------------------------------------------------------------------- def diff_conductivity(phi): eps = 1e-9 dc = (conductivity(phi + eps) - conductivity(phi)) / eps return (dc) # Capacity Function --------------------------------------------------- # evaluating coefficients of the passed on function via a system of linear eq. # --------------------------------------------------------------------- def capacity(r): A = np.array([[1, 1, 1, 1], [1, 2, 4, 8], [1, 3, 9, 27], [1, 4, 16, 64]]) B = np.array([r(1), r(2), r(3), r(4)]) rcoeff = np.linalg.solve(A, B) return (rcoeff) # Define Time and Space grid ------------------------------------------ x = np.linspace(0, length, num_of_points) # Space Grid t = np.arange(start_time, final_time, time_step) # Time Grid # Define Splines and Differentiation Matrices ------------------------- knot_vector = aptknt(x, order) # Creates knot points with Ghost points basis = Bspline(knot_vector, order) # Generate a vector of Spline Objects A0 = basis.collmat( x, deriv_order=0) # Generate Matrix A0 0st order derivative in space AA0 = basis.collmat( x, deriv_order=0) # Generate a Boundary condition free A0 version AA0[-1, -1] = 1 A1 = basis.collmat( x, deriv_order=1) # Generate Matrix A1 1st order derivative in space A2 = basis.collmat( x, deriv_order=2) # Generate Matrix A2 2st order derivative in space # Prepare "Smooth Plot Matrix" xx = np.linspace(0, length, plt_points) # Gird to Plot C = basis.collmat(xx) # Smooth Plot Matrix (read LaTeX notes) # Correct last spline A0[-1, -1] = 1 C[-1, -1] = 1 A1[-1] = -np.flip(A1[0], 0) # put first values of A1 to the last row # Prepare the inverse P to save time during the simulation ------------ if Left_BC_Type == 1: A0[0] = A1[0] #set 1st and last row to 0 if Right_BC_Type == 1: A0[-1] = A1[-1] #needed to implement BC P = spl.inv(A0) # Modify Formulation to implement Boundary Condition ------------------ A0[0] = 0 A1[0] = 0 A2[0] = 0 #First row is reserved for Boundary Condition A0[-1] = 0 A1[-1] = 0 A2[-1] = 0 # Last row is reserved for Boundary Condition # Time Evolution Matrix ----------------------------------------------- M = np.dot(P, A0) + (time_step * np.dot(P, A2) ) #only needed for the simple case #see core #initial c = coefficients for splines if isinstance(init, (int, float)) or len( init(x)) < len(x): #make the initial condition a function dummy = init #in case input is a number init = lambda x: dummy + 0 * x c = np.dot(spl.inv(AA0), init(x)) # Prepare Boundary Condition according to which function is given to the class BC = np.zeros((len(x), len(t))) BC[0] = Left_BC(t) BC[-1] = Right_BC(t) #Prepare a matrix with the source data (space, time) to not always call #the function in the loop, see core if isinstance(source, (int, float)): # make the source a function dummy1 = source #in case the input is a number, i.e.a constant source = lambda x, t: dummy1 + 0 * x + 0 * t xmg, tmg = np.meshgrid(x, t) sourceM = source(xmg, tmg) sourceM[:, 0] = 0 sourceM[:, -1] = 0 #set last and first row 0 for BC #Prepare Array to store results phi = np.zeros((len(t), len(xx))) # End of Simulation Setup ============================================= # MAIN LOOP ----------------------------------------------------------- # ===================================================================== # Decide which case is relevant and solve the according for loops in core.py file # Depending on which _BC_Type (either Neumann or Dirichlet) is radsed, #respective boundary conditions are taken into consideration. # ===================================================================== if Left_BC_Type == 0 and Right_BC_Type == 0: #Dirichlet on both sides print('Dirichlet condition on both sides') if isinstance(conductivity, (int, float)) == True and isinstance( rfct, (int, float)) == True: #k(phi) = k0 & r(phi) = r0 Conditions check if conductivity & capacity are constants print( 'Constant Conductivity and Capacity and Dirichlet boundary conditions' ) print('No source and no density is taken under consideration') k0 = conductivity r0 = rfct phi = core.simple_DD(M, t, c, k0, P, BC, C, phi) if isinstance(conductivity, (int, float)) == False and isinstance( rfct, (int, float)) == True: #Conductivity: k(phi) = k0 + k1*phi and Capacity: r(phi) = r0 print(r'Generic $k(\phi)$ and Capacity:$r(\phi) = r0$') r0 = rfct phi = core.genK_phi_DD(t, c, A0, A1, A2, diff_conductivity, conductivity, sourceM, r0, time_step, P, C, BC, phi, rho, x) if isinstance(conductivity, (int, float)) == False and isinstance( rfct, (int, float)) == False: #Conductivity: k(phi) & Capacity: r(phi) are both generic print( r'Generic Conductivity: $k(\phi)$ and Capacity: $r(\phi)$') r0 = capacity(rfct)[0] r1 = capacity(rfct)[1] r2 = capacity(rfct)[2] r3 = capacity(rfct)[3] phi = core.genK_genR_DD(t, c, A0, AA0, A1, A2, diff_conductivity, conductivity, sourceM, r0, r1, r2, r3, time_step, P, C, BC, phi, rho, x) if Left_BC_Type == 1 and Right_BC_Type == 0: # Neumann condition on RHS: print('Left side: Neumann- ; Right side: Dirichlet BC') print(r'Generic Conductivity: $k(\phi)$ and Capacity: $r(\phi)$') side = 0 r0 = capacity(rfct)[0] r1 = capacity(rfct)[1] r2 = capacity(rfct)[2] r3 = capacity(rfct)[3] phi = core.genK_genR_ND(t, c, A0, AA0, A1, A2, diff_conductivity, conductivity, sourceM, r0, r1, r2, r3, time_step, P, C, BC, phi, side, rho, x) if Left_BC_Type == 0 and Right_BC_Type == 1: # Neumann condition on RHS: print( 'Left side: Dirichlet- ; Right side: Neumann Boundary condition' ) print(r'Generic Conductivity: $k(\phi)$ and Capacity: $r(\phi)$') side = -1 r0 = capacity(rfct)[0] r1 = capacity(rfct)[1] r2 = capacity(rfct)[2] r3 = capacity(rfct)[3] phi = core.genK_genR_DN(t, c, A0, AA0, A1, A2, diff_conductivity, conductivity, sourceM, r0, r1, r2, r3, time_step, P, C, BC, phi, side, rho, x) if Left_BC_Type == 1 and Right_BC_Type == 1: # Neumann condition on RHS & LHS: print('Both sides Neumann Boundary condition') print(r'Generic Conductivity: $k(\phi)$ and Capacity: $r(\phi)$') r0 = capacity(rfct)[0] r1 = capacity(rfct)[1] r2 = capacity(rfct)[2] r3 = capacity(rfct)[3] phi = core.genK_genR_NN(t, c, A0, AA0, A1, A2, diff_conductivity, conductivity, sourceM, r0, r1, r2, r3, time_step, P, C, BC, phi, rho, x) return (phi) # in every case phi gets returned by the core
def QLBS_EPUT(S0, mu, sigma, r, M, T, risk_lambda, N_MC, delta_t, gamma, K, rand_seed): ############################################################################### ############################################################################### # make a dataset np.random.seed(rand_seed) # Fix random seed # stock price S = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) S.loc[:, 0] = S0 # standard normal random numbers RN = pd.DataFrame(np.random.randn(N_MC, T), index=range(1, N_MC + 1), columns=range(1, T + 1)) for t in range(1, T + 1): S.loc[:, t] = S.loc[:, t - 1] * np.exp((mu - 1 / 2 * sigma**2) * delta_t + sigma * np.sqrt(delta_t) * RN.loc[:, t]) delta_S = S.loc[:, 1:T].values - np.exp(r * delta_t) * S.loc[:, 0:T - 1] delta_S_hat = delta_S.apply(lambda x: x - np.mean(x), axis=0) # state variable X = -(mu - 1 / 2 * sigma**2) * np.arange(T + 1) * delta_t + np.log( S) # delta_t here is due to their conventions # plot 10 paths step_size = N_MC // 10 idx_plot = np.arange(step_size, N_MC, step_size) plt.plot(S.T.iloc[:, idx_plot]) plt.xlabel('Time Steps') plt.title('Stock Price Sample Paths') plt.ylabel('State Variable') plt.show() plt.plot(X.T.iloc[:, idx_plot]) plt.xlabel('Time Steps') plt.ylabel('State Variable') plt.title('State Variable Sample Paths') plt.show() ############################################################################### ############################################################################### # Define function *terminal_payoff* to compute the terminal payoff of a European put option. def terminal_payoff(ST, K): # ST final stock price # K strike payoff = max(K - ST, 0) return payoff ############################################################################### ############################################################################### # Define spline basis functions import bspline import bspline.splinelab as splinelab X_min = np.min(np.min(X)) X_max = np.max(np.max(X)) p = 4 # order of spline (as-is; 3 = cubic, 4: B-spline?) ncolloc = 12 tau = np.linspace( X_min, X_max, ncolloc) # These are the sites to which we would like to interpolate # k is a knot vector that adds endpoints repeats as appropriate for a spline of order p # To get meaninful results, one should have ncolloc >= p+1 k = splinelab.aptknt(tau, p) # Spline basis of order p on knots k basis = bspline.Bspline(k, p) f = plt.figure() # Spline basis functions plt.title("Basis Functions to be Used For This Iteration") basis.plot() plt.savefig('Basis_functions.png', dpi=600) ############################################################################### ############################################################################### # ### Make data matrices with feature values # # "Features" here are the values of basis functions at data points # The outputs are 3D arrays of dimensions num_tSteps x num_MC x num_basis num_t_steps = T + 1 num_basis = ncolloc # len(k) # data_mat_t = np.zeros((num_t_steps, N_MC, num_basis)) # fill it, expand function in finite dimensional space # in neural network the basis is the neural network itself t_0 = time.time() for i in np.arange(num_t_steps): x = X.values[:, i] data_mat_t[i, :, :] = np.array([basis(el) for el in x]) t_end = time.time() # save these data matrices for future re-use np.save('data_mat_m=r_A_%d' % N_MC, data_mat_t) ############################################################################### ############################################################################### # ## Dynamic Programming solution for QLBS risk_lambda = 0.001 # 0.001 # 0.0001 # risk aversion K = 100 # # functions to compute optimal hedges def function_A_vec(t, delta_S_hat, data_mat, reg_param): """ function_A_vec - compute the matrix A_{nm} from Eq. (52) (with a regularization!) Eq. (52) in QLBS Q-Learner in the Black-Scholes-Merton article Arguments: t - time index, a scalar, an index into time axis of data_mat delta_S_hat - pandas.DataFrame of dimension N_MC x T data_mat - pandas.DataFrame of dimension T x N_MC x num_basis reg_param - a scalar, regularization parameter Return: - np.array, i.e. matrix A_{nm} of dimension num_basis x num_basis """ ### START CODE HERE ### (≈ 5-6 lines of code) # A_mat = your code goes here ... X_mat = data_mat[t, :, :] num_basis_funcs = X_mat.shape[1] this_dS = delta_S_hat.loc[:, t] hat_dS2 = (this_dS**2).values.reshape(-1, 1) A_mat = np.dot(X_mat.T, X_mat * hat_dS2) + reg_param * np.eye(num_basis_funcs) ### END CODE HERE ### return A_mat def function_B_vec(t, Pi_hat, delta_S_hat=delta_S_hat, S=S, data_mat=data_mat_t, gamma=gamma, risk_lambda=risk_lambda): """ function_B_vec - compute vector B_{n} from Eq. (52) QLBS Q-Learner in the Black-Scholes-Merton article Arguments: t - time index, a scalar, an index into time axis of delta_S_hat Pi_hat - pandas.DataFrame of dimension N_MC x T of portfolio values delta_S_hat - pandas.DataFrame of dimension N_MC x T S - pandas.DataFrame of simulated stock prices data_mat - pandas.DataFrame of dimension T x N_MC x num_basis gamma - one time-step discount factor $exp(-r \delta t)$ risk_lambda - risk aversion coefficient, a small positive number Return: B_vec - np.array() of dimension num_basis x 1 """ #coef = 1.0/(2 * gamma * risk_lambda) # override it by zero to have pure risk hedge coef = 0. # keep it tmp = Pi_hat.loc[:, t + 1] * delta_S_hat.loc[:, t] X_mat = data_mat[t, :, :] # matrix of dimension N_MC x num_basis B_vec = np.dot(X_mat.T, tmp) return B_vec ############################################################################### ############################################################################### # ## Compute optimal hedge and portfolio value starttime = time.time() # portfolio value Pi = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Pi.iloc[:, -1] = S.iloc[:, -1].apply(lambda x: terminal_payoff(x, K)) Pi_hat = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Pi_hat.iloc[:, -1] = Pi.iloc[:, -1] - np.mean(Pi.iloc[:, -1]) # optimal hedge a = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) a.iloc[:, -1] = 0 reg_param = 1e-3 for t in range(T - 1, -1, -1): A_mat = function_A_vec(t, delta_S_hat, data_mat_t, reg_param) B_vec = function_B_vec(t, Pi_hat, delta_S_hat, S, data_mat_t) # print ('t = A_mat.shape = B_vec.shape = ', t, A_mat.shape, B_vec.shape) phi = np.dot(np.linalg.inv(A_mat), B_vec) a.loc[:, t] = np.dot(data_mat_t[t, :, :], phi) Pi.loc[:, t] = gamma * (Pi.loc[:, t + 1] - a.loc[:, t] * delta_S.loc[:, t]) Pi_hat.loc[:, t] = Pi.loc[:, t] - np.mean(Pi.loc[:, t]) a = a.astype('float') Pi = Pi.astype('float') Pi_hat = Pi_hat.astype('float') endtime = time.time() # Plots of 10 optimal hedge $a_t^\star$ and portfolio value $\Pi_t$ paths are shown below. # plot 10 paths plt.plot(a.T.iloc[:, idx_plot]) plt.xlabel('Time Steps') plt.title('Optimal Hedge') plt.show() plt.plot(Pi.T.iloc[:, idx_plot]) plt.xlabel('Time Steps') plt.title('Portfolio Value') plt.show() ############################################################################### ############################################################################### # ## Part 2: Compute the optimal Q-function with the DP approach def function_C_vec(t, data_mat, reg_param): """ function_C_vec - calculate C_{nm} matrix (with a regularization!) Arguments: t - time index, a scalar, an index into time axis of data_mat data_mat - pandas.DataFrame of values of basis functions of dimension T x N_MC x num_basis reg_param - regularization parameter, a scalar Return: C_mat - np.array of dimension num_basis x num_basis """ ### START CODE HERE ### (≈ 5-6 lines of code) # C_mat = your code goes here .... X_mat = data_mat[t, :, :] num_basis_funcs = X_mat.shape[1] C_mat = np.dot(X_mat.T, X_mat) + reg_param * np.eye(num_basis_funcs) ### END CODE HERE ### return C_mat def function_D_vec(t, Q, R, data_mat, gamma=gamma): """ function_D_vec - calculate D_{nm} vector (with a regularization!) Arguments: t - time index, a scalar, an index into time axis of data_mat Q - pandas.DataFrame of Q-function values of dimension N_MC x T R - pandas.DataFrame of rewards of dimension N_MC x T data_mat - pandas.DataFrame of values of basis functions of dimension T x N_MC x num_basis gamma - one time-step discount factor $exp(-r \delta t)$ Return: D_vec - np.array of dimension num_basis x 1 """ ### START CODE HERE ### (≈ 2-3 lines of code) # D_vec = your code goes here ... X_mat = data_mat[t, :, :] D_vec = np.dot(X_mat.T, R.loc[:, t] + gamma * Q.loc[:, t + 1]) ### END CODE HERE ### return D_vec ############################################################################### ############################################################################### # Implement a batch-mode off-policy model-free Q-Learning by Fitted Q-Iteration. # The only data available is given by a set of $N_{MC}$ paths for the underlying state # variable $X_t$, hedge position $a_t$, instantaneous reward $R_t$ and the # next-time value $X_{t+1}$. starttime = time.time() eta = 0.5 # 0.5 # 0.25 # 0.05 # 0.5 # 0.1 # 0.25 # 0.15 reg_param = 1e-3 np.random.seed(42) # Fix random seed # disturbed optimal actions to be computed a_op = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) a_op.iloc[:, -1] = 0 # also make portfolios and rewards # portfolio value Pi_op = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Pi_op.iloc[:, -1] = S.iloc[:, -1].apply(lambda x: terminal_payoff(x, K)) Pi_op_hat = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Pi_op_hat.iloc[:, -1] = Pi_op.iloc[:, -1] - np.mean(Pi_op.iloc[:, -1]) # reward function R_op = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) R_op.iloc[:, -1] = -risk_lambda * np.var(Pi_op.iloc[:, -1]) # The backward loop for t in range(T - 1, -1, -1): # 1. Compute the optimal policy, and write the result to a_op a_op.loc[:, t] = a.loc[:, t] # 2. Now disturb these values by a random noise a_op.loc[:, t] *= np.random.uniform(1 - eta, 1 + eta, size=a_op.shape[0]) # 3. Compute portfolio values corresponding to observed actions Pi_op.loc[:, t] = gamma * (Pi_op.loc[:, t + 1] - a_op.loc[:, t] * delta_S.loc[:, t]) Pi_hat.loc[:, t] = Pi_op.loc[:, t] - np.mean(Pi_op.loc[:, t]) # 4. Compute rewards corrresponding to observed actions R_op.loc[:, t] = gamma * a_op.loc[:, t] * delta_S.loc[:, t] - risk_lambda * np.var( Pi_op.loc[:, t]) # Plot 10 reward functions plt.plot(R_op.iloc[idx_plot, :]) plt.xlabel('Time Steps') plt.title('Reward Function') plt.show() ############################################################################### ############################################################################### # Override on-policy data with off-policy data a = copy.deepcopy(a_op) # distrubed actions Pi = copy.deepcopy(Pi_op) # disturbed portfolio values Pi_hat = copy.deepcopy(Pi_hat) R = copy.deepcopy(R_op) # make matrix A_t of shape (3 x num_MC x num_steps) num_MC = a.shape[0] # number of simulated paths num_TS = a.shape[1] # number of time steps a_1_1 = a.values.reshape((1, num_MC, num_TS)) a_1_2 = 0.5 * a_1_1**2 ones_3d = np.ones((1, num_MC, num_TS)) A_stack = np.vstack((ones_3d, a_1_1, a_1_2)) data_mat_swap_idx = np.swapaxes(data_mat_t, 0, 2) # expand dimensions of matrices to multiply element-wise A_2 = np.expand_dims(A_stack, axis=1) # becomes (3,1,10000,25) data_mat_swap_idx = np.expand_dims(data_mat_swap_idx, axis=0) # becomes (1,12,10000,25) Psi_mat = np.multiply( A_2, data_mat_swap_idx ) # this is a matrix of size 3 x num_basis x num_MC x num_steps # now concatenate columns along the first dimension # Psi_mat = Psi_mat.reshape(-1, a.shape[0], a.shape[1], order='F') Psi_mat = Psi_mat.reshape(-1, N_MC, T + 1, order='F') ############################################################################### ############################################################################### # make matrix S_t Psi_1_aux = np.expand_dims(Psi_mat, axis=1) Psi_2_aux = np.expand_dims(Psi_mat, axis=0) S_t_mat = np.sum(np.multiply(Psi_1_aux, Psi_2_aux), axis=2) # clean up some space del Psi_1_aux, Psi_2_aux, data_mat_swap_idx, A_2 ############################################################################### ############################################################################### def function_S_vec(t, S_t_mat, reg_param): """ function_S_vec - calculate S_{nm} matrix from Eq. (75) (with a regularization!) Eq. (75) in QLBS Q-Learner in the Black-Scholes-Merton article num_Qbasis = 3 x num_basis, 3 because of the basis expansion (1, a_t, 0.5 a_t^2) Arguments: t - time index, a scalar, an index into time axis of S_t_mat S_t_mat - pandas.DataFrame of dimension num_Qbasis x num_Qbasis x T reg_param - regularization parameter, a scalar Return: S_mat_reg - num_Qbasis x num_Qbasis """ ### START CODE HERE ### (≈ 4-5 lines of code) # S_mat_reg = your code goes here ... num_Qbasis = S_t_mat.shape[0] S_mat_reg = S_t_mat[:, :, t] + reg_param * np.eye(num_Qbasis) ### END CODE HERE ### return S_mat_reg def function_M_vec(t, Q_star, R, Psi_mat_t, gamma=gamma): """ function_S_vec - calculate M_{nm} vector from Eq. (75) (with a regularization!) Eq. (75) in QLBS Q-Learner in the Black-Scholes-Merton article num_Qbasis = 3 x num_basis, 3 because of the basis expansion (1, a_t, 0.5 a_t^2) Arguments: t- time index, a scalar, an index into time axis of S_t_mat Q_star - pandas.DataFrame of Q-function values of dimension N_MC x T R - pandas.DataFrame of rewards of dimension N_MC x T Psi_mat_t - pandas.DataFrame of dimension num_Qbasis x N_MC gamma - one time-step discount factor $exp(-r \delta t)$ Return: M_t - np.array of dimension num_Qbasis x 1 """ ### START CODE HERE ### (≈ 2-3 lines of code) # M_t = your code goes here ... M_t = np.dot(Psi_mat_t, R.loc[:, t] + gamma * Q_star.loc[:, t + 1]) ### END CODE HERE ### return M_t ############################################################################### ############################################################################### # Call *function_S* and *function_M* for $t=T-1,...,0$ together with vector $\vec\Psi\left(X_t,a_t\right)$ to compute $\vec W_t$ and learn the Q-function $Q_t^\star\left(X_t,a_t\right)=\mathbf A_t^T\mathbf U_W\left(t,X_t\right)$ implied by the input data backward recursively with terminal condition $Q_T^\star\left(X_T,a_T=0\right)=-\Pi_T\left(X_T\right)-\lambda Var\left[\Pi_T\left(X_T\right)\right]$. # Plots of 5 optimal action $a_t^\star\left(X_t\right)$, optimal Q-function with optimal action $Q_t^\star\left(X_t,a_t^\star\right)$ and implied Q-function $Q_t^\star\left(X_t,a_t\right)$ paths are shown below. # ## Fitted Q Iteration (FQI) # implied Q-function by input data (using the first form in Eq.(68)) Q_RL = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Q_RL.iloc[:, -1] = -Pi.iloc[:, -1] - risk_lambda * np.var(Pi.iloc[:, -1]) # optimal action a_opt = np.zeros((N_MC, T + 1)) a_star = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) a_star.iloc[:, -1] = 0 # optimal Q-function with optimal action Q_star = pd.DataFrame([], index=range(1, N_MC + 1), columns=range(T + 1)) Q_star.iloc[:, -1] = Q_RL.iloc[:, -1] # max_Q_star_next = Q_star.iloc[:,-1].values max_Q_star = np.zeros((N_MC, T + 1)) max_Q_star[:, -1] = Q_RL.iloc[:, -1].values num_basis = data_mat_t.shape[2] reg_param = 1e-3 hyper_param = 1e-1 # The backward loop for t in range(T - 1, -1, -1): # calculate vector W_t S_mat_reg = function_S_vec(t, S_t_mat, reg_param) M_t = function_M_vec(t, Q_star, R, Psi_mat[:, :, t], gamma) W_t = np.dot(np.linalg.inv(S_mat_reg), M_t) # this is an 1D array of dimension 3M # reshape to a matrix W_mat W_mat = W_t.reshape((3, num_basis), order='F') # shape 3 x M # make matrix Phi_mat Phi_mat = data_mat_t[t, :, :].T # dimension M x N_MC # compute matrix U_mat of dimension N_MC x 3 U_mat = np.dot(W_mat, Phi_mat) # compute vectors U_W^0,U_W^1,U_W^2 as rows of matrix U_mat U_W_0 = U_mat[0, :] U_W_1 = U_mat[1, :] U_W_2 = U_mat[2, :] # IMPORTANT!!! Instead, use hedges computed as in DP approach: # in this way, errors of function approximation do not back-propagate. # This provides a stable solution, unlike # the first method that leads to a diverging solution A_mat = function_A_vec(t, delta_S_hat, data_mat_t, reg_param) B_vec = function_B_vec(t, Pi_hat, delta_S_hat, S, data_mat_t) # print ('t = A_mat.shape = B_vec.shape = ', t, A_mat.shape, B_vec.shape) phi = np.dot(np.linalg.inv(A_mat), B_vec) a_opt[:, t] = np.dot(data_mat_t[t, :, :], phi) a_star.loc[:, t] = a_opt[:, t] ''' print("test "+str(t)) print(str(Q_star.head())) ''' max_Q_star[:, t] = U_W_0 + a_opt[:, t] * U_W_1 + 0.5 * (a_opt[:, t]** 2) * U_W_2 Q_star.iloc[:, t] = max_Q_star[:, t] # update dataframes # update the Q_RL solution given by a dot product of two matrices W_t Psi_t Psi_t = Psi_mat[:, :, t].T # dimension N_MC x 3M Q_RL.loc[:, t] = np.dot(Psi_t, W_t) # trim outliers for Q_RL up_percentile_Q_RL = 95 # 95 low_percentile_Q_RL = 5 # 5 low_perc_Q_RL, up_perc_Q_RL = np.percentile( Q_RL.loc[:, t], [low_percentile_Q_RL, up_percentile_Q_RL]) # print('t = %s low_perc_Q_RL = %s up_perc_Q_RL = %s' % (t, low_perc_Q_RL, up_perc_Q_RL)) # trim outliers in values of max_Q_star: flag_lower = Q_RL.loc[:, t].values < low_perc_Q_RL flag_upper = Q_RL.loc[:, t].values > up_perc_Q_RL Q_RL.loc[flag_lower, t] = low_perc_Q_RL Q_RL.loc[flag_upper, t] = up_perc_Q_RL endtime = time.time() ############################################################################### ############################################################################### # plot both simulations f, axarr = plt.subplots(3, 1) f.subplots_adjust(hspace=.5) f.set_figheight(8.0) f.set_figwidth(8.0) step_size = N_MC // 10 idx_plot = np.arange(step_size, N_MC, step_size) axarr[0].plot(a_star.T.iloc[:, idx_plot]) axarr[0].set_xlabel('Time Steps') axarr[0].set_title(r'Optimal action $a_t^{\star}$') axarr[1].plot(Q_RL.T.iloc[:, idx_plot]) axarr[1].set_xlabel('Time Steps') axarr[1].set_title(r'Q-function $Q_t^{\star} (X_t, a_t)$') axarr[2].plot(Q_star.T.iloc[:, idx_plot]) axarr[2].set_xlabel('Time Steps') axarr[2].set_title(r'Optimal Q-function $Q_t^{\star} (X_t, a_t^{\star})$') plt.show() plt.savefig('QLBS_FQI_off_policy_summary_ATM_eta_%d.png' % (100 * eta), dpi=600) # Note that a from the DP method and a_star from the RL method are now identical by construction # plot 1 path num_path = 300 # 430 # 510 plt.plot(a.T.iloc[:, num_path], label="DP Action") plt.plot(a_star.T.iloc[:, num_path], label="RL Action") plt.legend() plt.xlabel('Time Steps') plt.title('Optimal Action Comparison Between DP and RL for a sample path') plt.show() compTime = endtime - starttime return ([Q_star.iloc[:, 0], compTime])
if iscall == 1: payoff = np.int32(ST >= K) else: payoff = np.int32(ST <= K) return payoff # ### Spline basis functions definition X_min = np.min(np.min(X)) X_max = np.max(np.max(X)) print('X.shape = ', X.shape) print('X_min, X_max = ', X_min, X_max) p = 4 # 3 <- cubic, 4 <- B-spline ncolloc = 12 tau = np.linspace(X_min, X_max, ncolloc) k = splinelab.aptknt(tau, p) basis = bspline.Bspline(k, p) f = plt.figure() print('Number of points k = ', len(k)) basis.plot() # ### Make data matrices with feature values # "Features" here are the values of basis functions at data points # The outputs are 3D arrays of dimensions num_tSteps x num_MC x num_basis num_t_steps = T + 1 num_basis = ncolloc data_mat_t = np.zeros((num_t_steps, N_MC, num_basis)) print('num_basis = ', num_basis) print('dim data_mat_t = ', data_mat_t.shape) # fill it, expand function in finite dimensional space