def readApprentice(fname): """ Read an apprentice JSON file. We abuse try except here to figure out whether it's a rational or polynomial approximation. """ import apprentice import os if not os.path.exists(fname): raise Exception("File {} not found".format(fname)) try: app = apprentice.RationalApproximation(fname=fname) except: app = apprentice.PolynomialApproximation(fname=fname) return app
def mkBestRA(X, Y, pnames, split=0.7, norm=2, m_max=5, n_max=None, f_plot=None): """ """ _N, _dim = X.shape i_train = sorted( list(np.random.choice(range(_N), int(np.ceil(split * _N))))) i_test = [i for i in range(_N) if not i in i_train] N_train = len(i_train) N_test = len(i_test) orders = apprentice.tools.possibleOrders(N_train, _dim, mirror=True) if n_max is not None: orders = [o for o in orders if o[1] <= n_max and o[0] <= m_max] # d_RA = { o : apprentice.RationalApproximation(X[i_train], Y[i_train], order=o, pnames=pnames) for o in orders } d_RA = {} for o in orders: if o[1] == 0: d_RA[o] = apprentice.PolynomialApproximation(X[i_train], Y[i_train], order=o[0], pnames=pnames) else: d_RA[o] = apprentice.RationalApproximation(X[i_train], Y[i_train], order=o, pnames=pnames) d_norm = {o: raNorm(d_RA[o], X[i_test], Y[i_test]) for o in orders} import operator sorted_norm = sorted(d_norm.items(), key=operator.itemgetter(1)) if f_plot is not None: mkPlotNorm(d_norm, f_plot, norm) winner = sorted_norm[0] print("Winner: m={} n={} with L2={}".format(*winner[0], winner[1])) return apprentice.RationalApproximation(X, Y, order=winner[0], pnames=pnames)
def readApprox(fname, set_structures=True, usethese=None): import json, apprentice with open(fname) as f: rd = json.load(f) binids = app.tools.sorted_nicely(rd.keys()) binids = [x for x in binids if not x.startswith("__")] if usethese is not None: binids = [x for x in binids if x in usethese] APP = {} for b in binids: if "n" in rd[b]: APP[b] = apprentice.RationalApproximation( initDict=rd[b] ) # FIXME what about set_structures for rationals? else: APP[b] = apprentice.PolynomialApproximation( initDict=rd[b], set_structures=set_structures) return binids, [APP[b] for b in binids]
def runPAforsimcoeffs(X, Y, fndesc, m, n, ts, outfile): import json dim = X[0].shape[0] M = apprentice.tools.numCoeffsPoly(dim, m) N = apprentice.tools.numCoeffsPoly(dim, n) totalcoeffsinRA = M + N padeg = 0 pacoeffs = apprentice.tools.numCoeffsPoly(dim, padeg) while (pacoeffs < totalcoeffsinRA): padeg += 1 pacoeffs = apprentice.tools.numCoeffsPoly(dim, padeg) if (ts == ".5x" or ts == "0.5x"): trainingsize = 0.5 * pacoeffs elif (ts == "1x"): trainingsize = pacoeffs elif (ts == "2x"): trainingsize = 2 * pacoeffs elif (ts == "Cp"): trainingsize = len(X) else: raise Exception("Training scale %s unknown" % (ts)) if (trainingsize > len(X)): raise Exception( "Not enough data for padeg = %d and dim = %d. Require %d (%s) and only have %d" % (padeg, dim, trainingsize, ts, len(X))) train = range(trainingsize) start = timer() pa = apprentice.PolynomialApproximation(X[train], Y[train], order=padeg) end = timer() padict = pa.asDict fittime = end - start padict["log"] = {"fittime": fittime} import json with open(outfile, "w") as f: json.dump(padict, f, indent=4, sort_keys=True)
def plotoptimaldegree(folder, testfile, desc, bottom_or_all, opt): import glob import json import re filelistRA = np.array(glob.glob(folder + "/out/*.json")) filelistRA = np.sort(filelistRA) filelistPA = np.array(glob.glob(folder + "/outpa/*.json")) filelistPA = np.sort(filelistPA) if not os.path.exists(folder + "/plots"): os.mkdir(folder + '/plots') maxpap = 0 dim = 0 ts = "" orders = [] APP = [] nnzthreshold = 1e-6 for file in filelistRA: if file: with open(file, 'r') as fn: datastore = json.load(fn) m = datastore['m'] n = datastore['n'] ts = datastore['trainingscale'] if (n != 0): orders.append((m, n)) for i, p in enumerate(datastore['pcoeff']): if (abs(p) < nnzthreshold): datastore['pcoeff'][i] = 0. for i, q in enumerate(datastore['qcoeff']): if (abs(q) < nnzthreshold): datastore['qcoeff'][i] = 0. APP.append(apprentice.RationalApproximationSIP(datastore)) dim = datastore['dim'] for file in filelistPA: if file: with open(file, 'r') as fn: datastore = json.load(fn) m = datastore['m'] if ((m, 0) in orders): continue orders.append((m, 0)) for i, p in enumerate(datastore['pcoeff']): if (abs(p) < nnzthreshold): datastore['pcoeff'][i] = 0. APP.append(apprentice.PolynomialApproximation(initDict=datastore)) if m > maxpap: maxpap = m try: X, Y = apprentice.tools.readData(testfile) except: DATA = apprentice.tools.readH5(testfile, [0]) X, Y = DATA[0] if (bottom_or_all == "bottom"): trainingsize = apprentice.tools.numCoeffsPoly(dim, maxpap) if (ts == ".5x" or ts == "0.5x"): trainingsize = 0.5 * trainingsize elif (ts == "1x"): trainingsize = trainingsize elif (ts == "2x"): trainingsize = 2 * trainingsize elif (ts == "Cp"): trainingsize = len(X) else: raise Exception("Training scale %s unknown" % (ts)) testset = [i for i in range(trainingsize, len(X))] X_test = X[testset] Y_test = Y[testset] elif (bottom_or_all == "all"): X_test = X Y_test = Y else: raise Exception( "bottom or all? Option ambiguous. Check spelling and/or usage") if (len(X_test) <= 1): raise Exception("Not enough testing data") # print(orders) # print(len(X_test)) L2 = [np.sqrt(raNorm(app, X_test, Y_test, 2)) for app in APP] Linf = [raNormInf(app, X_test, Y_test) for app in APP] NNZ = [apprentice.tools.numNonZeroCoeff(app, nnzthreshold) for app in APP] VAR = [l / m for l, m in zip(L2, NNZ)] ncN, ncM = [], [] NC = [] for m, n in orders: ncM.append(apprentice.tools.numCoeffsPoly(dim, m)) ncN.append(apprentice.tools.numCoeffsPoly(dim, n)) if n == 0: NC.append(apprentice.tools.numCoeffsPoly(dim, m)) else: NC.append(apprentice.tools.numCoeffsRapp(dim, (m, n))) # D3D = np.array([(m,n,v,o[0], o[1]) for m,n,v, o in zip(ncM,ncN, VAR, orders)]) # outfileparetomn = "%s/plots/Poptdeg_%s_paretomn.png"%(folder, desc) # mkPlotParetoSquare(D3D, outfileparetomn) # print("paretomn written to %s"%(outfileparetomn)) # import matplotlib as mpl # import matplotlib.pyplot as plt # from mpl_toolkits.mplot3d import Axes3D # plt.clf() # mpl.rc('text', usetex = True) # mpl.rc('font', family = 'serif', size=12) # mpl.style.use("ggplot") # # fig = plt.figure() # ax = fig.add_subplot(111, projection='3d') # p3D = is_pareto_efficient_dumb(D3D) # for num, (m,n,v) in enumerate(zip(ncM,ncN, VAR)): # if p3D[num]: # ax.scatter(m, n, np.log10(v), c="gold") # else: # ax.scatter(m, n, np.log10(v), c="r") # ax.set_xlabel('nc m') # ax.set_ylabel('nc n') # ax.set_zlabel('log v') # plt.show() jsdump = {} jsdump['dim'] = dim NNC = [] for num, n in enumerate(NC): NNC.append(n - NNZ[num]) CMP = [a * b for a, b in zip(NNZ, L2)] nopoints = len(Y_test) #1 if (opt == "opt1"): Xcomp = NC Ycomp = [v / n for v, n in zip(VAR, NC)] Xdesc = "$N_\\mathrm{\\times N_\\mathrm{coeff}}$" Ydesc = "$\\frac{L_2^\\mathrm{test}}{N_\mathrm{non-zero}}$" logx = True logy = True elif (opt == "opt2"): Xcomp = NNZ Ycomp = L2 Xdesc = "$N_\\mathrm{non-zero}$" Ydesc = "$L_2^\\mathrm{test}$" logx = True logy = True elif (opt == "opt3"): Xcomp = [2 * i for i in NC] Ycomp = [nopoints * np.log(i / nopoints) for i in L2] Xdesc = "$2N_\\mathrm{coeff}$" Ydesc = "$nlog\\left(\\frac{L_2^\\mathrm{test}}{n}\\right)$" logx = False logy = False elif (opt == "opt4"): Xcomp = [i * np.log(nopoints) for i in NC] Ycomp = [nopoints * np.log(i / nopoints) for i in L2] Xdesc = "$N_\\mathrm{coeff}log(n)$" Ydesc = "$nlog\\left(\\frac{L_2^\\mathrm{test}}{n}\\right)$" logx = False logy = False elif (opt == "opt5"): Xcomp = NC # print(np.c_[NC,NNZ]) # for l, m,n in zip(L2, NNZ,NC): # print(l, m,n) # print(l*n/((n-m+1))) Ycomp = [l * n / (n - m + 1) for l, m, n in zip(L2, NNZ, NC)] Xdesc = "$N_\\mathrm{coeff}$" Ydesc = "$\\frac{L_2^\\mathrm{test}\\times N_\\mathrm{coeff}}{N_\\mathrm{coeff} - N_\mathrm{non-zero}+1}$" logx = False logy = True elif (opt == "opt6"): Xcomp = NC Ycomp = L2 Xdesc = "$\\log_{10}(N_\\mathrm{coeff})$" Ydesc = "$\\log_{10}(L_2^\\mathrm{test})$" logx = True logy = True elif (opt == "opt7"): Xcomp = [2 * i for i in NC] Ycomp = L2 Xdesc = "$N_\\mathrm{coeff}$" Ydesc = "$\\log_{10}(L_2^\\mathrm{test})$" logx = False logy = True else: raise Exception("option ambiguous/not defined") outfilepareton = "%s/plots/Poptdeg_%s_pareton_%s.png" % (folder, desc, opt) jsdump = mkPlotCompromise([(a, b) for a, b in zip(Xcomp, Ycomp)], desc, outfilepareton, orders, ly=Ydesc, lx=Xdesc, logy=logy, logx=logx, normalize_data=False, jsdump=jsdump) outfileparetojsdump = "%s/plots/Joptdeg_%s_jsdump_%s.json" % (folder, desc, opt) import json with open(outfileparetojsdump, "w") as f: json.dump(jsdump, f, indent=4, sort_keys=True) print("pareton written to %s" % (outfilepareton)) print("paretojsdump written to %s" % (outfileparetojsdump))
def calcApprox(X, Y, order, pnames, mode="sip", onbtol=-1, debug=False, testforPoles=100, ftol=1e-9, itslsqp=200): M, N = order import apprentice as app if N == 0: _app = app.PolynomialApproximation(X, Y, order=M, pnames=pnames) hasPole = False else: if mode == "la": _app = app.RationalApproximation(X, Y, order=(M, N), pnames=pnames, strategy=2) elif mode == "onb": _app = app.RationalApproximationONB(X, Y, order=(M, N), pnames=pnames, tol=onbtol, debug=debug) elif mode == "sip": try: _app = app.RationalApproximationSLSQP(X, Y, order=(M, N), pnames=pnames, debug=debug, ftol=ftol, itslsqp=itslsqp) except Exception as e: print("Exception:", e) return None, True elif mode == "lasip": try: _app = app.RationalApproximation(X, Y, order=(M, N), pnames=pnames, strategy=2, debug=debug) except Exception as e: print("Exception:", e) return None, True has_pole = denomChangesSignMS(_app, 100)[0] if has_pole: try: _app = app.RationalApproximationSLSQP(X, Y, order=(M, N), pnames=pnames, debug=debug, ftol=ftol, itslsqp=itslsqp) except Exception as e: print("Exception:", e) return None, True else: raise Exception( "Specified mode {} does not exist, choose la|onb|sip".format( mode)) hasPole = denomChangesSignMS(_app, testforPoles)[0] return _app, hasPole
def mkBestRACPL(X, Y, pnames=None, train_fact=2, split=0.6, norm=2, m_max=None, n_max=None, f_plot=None, seed=1234, allow_const=False, debug=0): """ """ import apprentice _N, _dim = X.shape np.random.seed(seed) # Split dataset in training and test sample i_train = sorted( list(np.random.choice(range(_N), int(np.ceil(split * _N))))) i_test = [i for i in range(_N) if not i in i_train] N_train = len(i_train) N_test = len(i_test) orders = sorted(apprentice.tools.possibleOrders(N_train, _dim, mirror=True)) if not allow_const: orders = orders[1:] if n_max is not None: orders = [o for o in orders if o[1] <= n_max] if m_max is not None: orders = [o for o in orders if o[0] <= m_max] # Discard those orders where we do not have enough training points if train_fact>1 if train_fact > 1: _temp = [] for o in orders: if o[1] > 0: if train_fact * apprentice.tools.numCoeffsRapp(_dim, o) <= N_train: _temp.append(o) else: if train_fact * apprentice.tools.numCoeffsPoly( _dim, o[0]) <= N_train: _temp.append(o) orders = sorted(_temp) APP = [] # print("Calculating {} approximations".format(len(orders))) import time t1 = time.time() for o in orders: m, n = o if n == 0: i_train_o = np.random.choice( i_train, int(train_fact * apprentice.tools.numCoeffsPoly(_dim, m))) APP.append( apprentice.PolynomialApproximation(X[i_train_o], Y[i_train_o], order=m, pnames=pnames)) else: i_train_o = np.random.choice( i_train, int(train_fact * apprentice.tools.numCoeffsRapp(_dim, (m, n)))) APP.append( apprentice.RationalApproximation(X[i_train_o], Y[i_train_o], order=(m, n), strategy=1, pnames=pnames)) t2 = time.time() print("Calculating {} approximations took {} seconds".format( len(orders), t2 - t1)) L2 = [np.sqrt(raNorm(app, X, Y, 2)) for app in APP] Linf = [raNormInf(app, X, Y) for app in APP] NNZ = [apprentice.tools.numNonZeroCoeff(app, 1e-6) for app in APP] VAR = [l / m for l, m in zip(L2, NNZ)] ncN, ncM = [], [] NC = [] for m, n in orders: ncM.append(apprentice.tools.numCoeffsPoly(_dim, m)) ncN.append(apprentice.tools.numCoeffsPoly(_dim, n)) if n == 0: NC.append(apprentice.tools.numCoeffsPoly(_dim, m)) else: NC.append(apprentice.tools.numCoeffsRapp(_dim, (m, n))) # currently, this zips the number of coefficients for P and Q, the L2 norm divided by the number of non-zero # coefficients and for convenients the orders of the polynomials D3D = np.array([(m, n, v, o[0], o[1]) for m, n, v, o in zip(ncM, ncN, VAR, orders)]) # D3D = np.array([(o[0],o[1],v) for o,v in zip(orders, VAR)]) mkPlotParetoSquare(D3D, "paretomn.pdf") # import matplotlib as mpl # import matplotlib.pyplot as plt # from mpl_toolkits.mplot3d import Axes3D # plt.clf() # mpl.rc('text', usetex = True) # mpl.rc('font', family = 'serif', size=12) # mpl.style.use("ggplot") # fig = plt.figure() # ax = fig.add_subplot(111, projection='3d') # p3D = is_pareto_efficient_dumb(D3D) # # # from IPython import embed # # # embed() # for num, (m,n,v) in enumerate(zip(ncM,ncN, VAR)): # if p3D[num]: # ax.scatter(m, n, np.log10(v), c="gold") # else: # ax.scatter(m, n, np.log10(v), c="r") # ax.set_xlabel('nc m') # ax.set_ylabel('nc n') # ax.set_zlabel('log v') # plt.show() # sys.exit(1) NNC = [] for num, n in enumerate(NC): NNC.append(n - NNZ[num]) CMP = [a * b for a, b in zip(NNZ, L2)] if f_plot: # mkPlotCompromise([(a,b) for a, b in zip(NNZ, L2)], f_plot, orders, ly="$L_2^\\mathrm{test}$", lx="$N_\\mathrm{non-zero}$", logx=False) # mkPlotCompromise([(a,b) for a, b in zip(NNZ, VAR)], "VAR_{}".format(f_plot), orders, ly="$\\frac{L_2^\\mathrm{test}}{N_\mathrm{non-zero}}$", lx="$N_\\mathrm{non-zero}$", logy=True, logx=False) # mkPlotCompromise([(a,b) for a, b in zip(NC, VAR)], "NCVAR_{}".format(f_plot), orders, ly="$\\frac{L_2^\\mathrm{test}}{N_\mathrm{non-zero}}$", lx="$N_\\mathrm{coeff}$", logy=True, logx=True, normalize_data=False) mkPlotCompromise2( [(a, b) for a, b in zip(NC, VAR)], "NCVAR_{}".format(f_plot), orders, ly="$\\frac{L_2^\\mathrm{test}}{N_\mathrm{non-zero}}$", lx="$N_\\mathrm{coeff}$", logy=True, logx=True, normalize_data=False) # mkPlotCompromise([(a,b) for a, b in zip(NNC, VAR)], "NNCVAR_{}".format(f_plot), orders, ly="$\\frac{L_2^\\mathrm{test}}{N_\mathrm{non-zero}}$", lx="$N_\\mathrm{coeff}-N_\mathrm{non-zero}$", logy=True, logx=True) # Proactive memory cleanup del APP for l in sorted(CMP)[0:debug]: i = CMP.index(l) oo = orders[i] print("{} -- L2: {:10.4e} | Loo: {:10.4e} | NNZ : {} | VVV : {:10.4e}". format(oo, L2[i], Linf[i], NNZ[i], VAR[i])) for l in sorted(CMP): i = CMP.index(l) oo = orders[i] # print("{} -- L2: {:10.4e} | Loo: {:10.4e}".format(oo, L2[i], Linf[i])) # If it is a polynomial we are done --- return the approximation that uses all data if oo[1] == 0: return apprentice.PolynomialApproximation(X, Y, order=oo[0], pnames=pnames) else: APP_test = apprentice.RationalApproximation(X, Y, order=oo, strategy=1, pnames=pnames) bad = denomChangesSign(APP_test, APP_test._scaler.box, APP_test._scaler.center)[0] if bad: print("Cannot use {} due to pole in denominator".format(oo)) else: return APP_test
def mkBestRASIP(X, Y, pnames=None, train_fact=1, split=0.5, norm=2, m_max=None, n_max=None, f_plot=None, seed=1234, use_all=False): """ """ import apprentice np.random.seed(seed) _N, _dim = X.shape # Split dataset in training and test sample i_train = sorted( list(np.random.choice(range(_N), int(np.ceil(split * _N))))) i_test = [i for i in range(_N) if not i in i_train] N_train = len(i_train) N_test = len(i_test) orders = sorted(apprentice.tools.possibleOrders(N_train, _dim, mirror=True)) if n_max is not None: orders = [o for o in orders if o[1] <= n_max] if m_max is not None: orders = [o for o in orders if o[0] <= m_max] # Discard those orders where we do not have enough training points if train_fact>1 if train_fact > 1: _temp = [] for o in orders: if o[1] > 0: if train_fact * apprentice.tools.numCoeffsRapp(_dim, o) <= N_train: _temp.append(o) else: if train_fact * apprentice.tools.numCoeffsPoly( _dim, o[0]) <= N_train: _temp.append(o) orders = sorted(_temp) APP, APPcpl = [], [] # print("Calculating {} approximations".format(len(orders))) import time t1 = time.time() for o in orders: m, n = o if n == 0: i_train_o = np.random.choice( i_train, int(train_fact * apprentice.tools.numCoeffsPoly(_dim, m))) APP.append( apprentice.PolynomialApproximation(X[i_train_o], Y[i_train_o], order=m)) APPcpl.append( apprentice.PolynomialApproximation(X[i_train], Y[i_train], order=m)) else: i_train_o = np.random.choice( i_train, int(train_fact * apprentice.tools.numCoeffsRapp(_dim, (m, n)))) APP.append( apprentice.RationalApproximation(X[i_train_o], Y[i_train_o], order=(m, n), strategy=1)) APPcpl.append( apprentice.RationalApproximation(X[i_train], Y[i_train], order=(m, n), strategy=1)) # print("I used a training size of {}/{} available training points for order {}".format(len(i_train_o), len(i_train), o)) t2 = time.time() print("Calculating {} approximations took {} seconds".format( len(orders), t2 - t1)) L2 = [np.sqrt(raNorm(app, X[i_test], Y[i_test], 2)) for app in APP] L2cpl = [np.sqrt(raNorm(app, X[i_test], Y[i_test], 2)) for app in APPcpl] Linf = [raNormInf(app, X[i_test], Y[i_test]) for app in APP] # Find the order that gives the best L2 norm winner = L2.index(min(L2)) runnerup = L2.index(sorted(L2)[1]) winnerinf = Linf.index(min(Linf)) o_win = orders[winner] o_rup = orders[runnerup] APP_win = APP[winner] APP_rup = APP[runnerup] # Confirm using all training data temp = apprentice.RationalApproximation(X[i_train], Y[i_train], order=o_win, strategy=1) l2temp = np.sqrt(raNorm(temp, X[i_test], Y[i_test], 2)) # print("Winner: {} with L2 {} and L2 complete = {}".format( o_win, min(L2), l2temp)) print("Winnerinf: {} with Loo {}".format(orders[winnerinf], min(Linf))) for l in sorted(L2)[0:4]: i = L2.index(l) print("{} -- L2: {:10.2e} || {:10.2e} -- Loo: {:10.2e}".format( orders[i], l, L2cpl[i], Linf[i])) # If it is a polynomial we are done if o_win[1] == 0: return APP_win # Let's check for poles in the denominator isbad = denomChangesSign(APP_win, APP_win._scaler.box, APP_win._scaler.center)[0] if isbad: print("Can't use this guy {} (L2: {:10.2e})".format(o_win, min(L2))) _l2 = min(L2) for l in sorted(L2)[1:]: i = L2.index(l) print("Testing {} (L2: {:10.2e})".format(orders[i], l)) if orders[i][1] == 0: print("This is a polynomial,done") return APP[i] else: bad = denomChangesSign(APP[i], APP[i]._scaler.box, APP[i]._scaler.center)[0] if bad: ("Cannot use {} either".format(orders[i])) else: return APP[i] # if rupisbad: # print("This guy works though{}".format(o_rup)) # else: # print("Need to fix also this guy {}".format(o_rup)) # FS = ["filter", "scipy"] # FS = ["scipy"] # RS = ["ms"] # import json # for fs in FS: # for rs in RS: # rrr = apprentice.RationalApproximationSIP(X[i_train], Y[i_train], # m=o_win[0], n=o_win[1], pnames=pnames, fitstrategy=fs, trainingscale="Cp", # roboptstrategy=rs) # print("Test error FS {} RS {}: 1N:{} 2N:{} InfN:{}".format(fs, rs, # raNorm(rrr, X[i_test], Y[i_test],1), # np.sqrt(raNorm(rrr, X[i_test], Y[i_test],2)), # raNormInf(rrr, X[i_test], Y[i_test]))) # print("Total Approximation time {}\n".format(rrr.fittime)) # print("{}".format(denomChangesSign(rrr, rrr._scaler.box, rrr._scaler.center))) # # with open("test2D_{}_{}.json".format(fs,rs), "w") as f: json.dump(rrr.asDict, f, indent=4) # return rrr else: return APP_win