def manageArguments(xdata, zdata, data, debug, kwargs): """ Parse additional input options The 'pargs' library is used to keep track of options a user has availible descriptions of the dictionaries data, and debug are given in shared.py Multiple keys used to make writing the .alm file easier Args: xdata (numpy.array or list[real]) zdata (numpy.array or list[real) data: shared alamo data options debug: Additional options may be specified and will be applied to the .alm """ parseKwargs(data, debug, kwargs) # Check to see if a simwrapper should be built if debug['simwrap'] or 'simulator' in kwargs.keys(): buildSimWrapper(data, debug) # Specific check to see if the labels of the response variables # should be used in the output dictionary # This is important for systematic testing vs. single model input if debug['outkeys']: # outkeys are specified to be used if data['opts']['noutputs'] > 1: # 'Must use outkeys for multiple outputs' writethis('outkeys set to TRUE for multiple outputs') debug['outkeys'] = True # Construct xmin and xmax vector based on training data if not provided if ('xmin' not in kwargs.keys()): constructXBounds(xdata, zdata, data, debug)
def getValidationData(vargs, data, debug): """ Structure data for validating the model. Modifies data['opts'] Args: vargs: validation data valxdata, valzdata data: shared alamo data options debug: Additional options may be specified and will be applied to the .alm """ if vargs != (): debug['validation'] = True xvaldata = vargs[0] zvaldata = vargs[1] temp = np.shape(xvaldata) data['opts']['nvaldata'] = temp[0] if (len(np.shape(zvaldata)) == 1): zvaldata = np.reshape(zvaldata, (data['opts']['nvaldata'], 1)) if (temp[1] != data['opts']['ninputs']): writethis('Number of input variables inconsistent between x and xval') almerror('p2') temp = np.shape(zvaldata) if (temp[0] != data['opts']['nvaldata'] or temp[1] != data['opts']['noutputs']): writethis('Problem with zval') almerror('p2') return xvaldata, zvaldata
def expandOutput(xdata, zdata, vargs, data, debug): """ Expand output to validation metrics and labels Args: data/debug: shared default options for .alm file xdata (numpy.array or list[real]) zdata (numpy.array or list[real) vargs: Validation data """ if debug['expandoutput']: data['results']['xdata']=xdata data['results']['zdata']=zdata data['results']['xlabels']=data['labs']['savexlabels'] data['results']['zlabels']=data['labs']['savezlabels'] try: import sympy from sympy.parsing.sympy_parser import parse_expr from sympy import symbols, lambdify except: writethis('Cannot install sympy') if (debug['expandoutput'] == False): data['results']['model']={} data['results']['f(model)']={} else: for key in list(['model','f(model)','ssr','R2', 'size','rmse','nbas','totaltime','olrtime','miptime','clrtime','othertime','version','status','madp','numolr','nummip','numclr','ninputs']): data['results'][key]=collections.OrderedDict() if (len(vargs)>0): data['results']['ssrval']=collections.OrderedDict() data['results']['R2val']=collections.OrderedDict() data['results']['rmseval']=collections.OrderedDict() data['results']['madpval']=collections.OrderedDict() if debug['loo']: data['results']['Q2'] = data['results']['Q2']
def constructXBounds(xdata, zdata, data, debug): """ Construct xmin,xmax and zmin, zmax for alamo if none are given Args: xdata (numpy.array or list[real]) zdata (numpy.array or list[real) data: shared alamo data options debug: Additional options may be specified and will be applied to the .alm """ writethis('min and max values of inputs are not provided, they will be calculated from the training data\n') xmin='' xmax='' for i in range(data['opts']['ninputs']): tn = debug['bignum'] tx=-1*debug['bignum'] for j in range(data['opts']['ndata']): if (float(xdata[j][i]) < float(tn)): tn = xdata[j][i] if (float(xdata[j][i]) > float(tx)): tx = xdata[j][i] xmin=xmin+str(tn)+' ' xmax=xmax+str(tx)+' ' data['set4']['xmax']=xmax data['set4']['xmin']=xmin
def almplot(res, show=True): try: import matplotlib.pyplot as plt import numpy as np from alamopy.writethis import writethis except Exception: writethis("Cannot plot, possibly missing matplotlib package") model = res['model'].replace(' - ', ' + -') model = model.split('=')[1] model = model.split(' + ') if model[ 0] == ' ': # if there are more than one terms, the first split is ' ' model = model[1:] ndp = 100 t = np.linspace(0.08, 1.7, ndp) out = np.zeros([3, ndp]) clo = np.zeros(ndp) chi = np.zeros(ndp) coeff = np.zeros(ndp) for i in range(len(model)): coeff[i] = float(model[i].split(' * ')[0]) if 'conf_inv' in res.keys(): clo[i] = coeff[i] - float(res['conf_inv'][i].split('+/-')[1]) chi[i] = coeff[i] + float(res['conf_inv'][i].split('+/-')[1]) for i in range(ndp): out[0, i] = float(coeff[0]) * t[i]**1.2 \ - float(coeff[1]) * t[i]**2 - float(coeff[2]) if 'conf_inv' in res.keys(): # If confidence intervals exist out[1, i] = clo[0] * t[i]**1.2 - chi[1] * t[i]**2 - chi[2] out[2, i] = chi[0] * t[i]**1.2 - clo[1] * t[i]**2 - clo[2] plt.plot(t, out[0], 'b-') if 'conf_inv' in res.keys(): plt.plot(t, out[1], 'r--', t, out[2], 'r--') if show: plt.show()
def allcard(xdata, zdata, xval, zval, **kwargs): # enumerate all model cardinalities via ccmiqp and # use validation/cross-validaiton to determine from idaes.surrogate import alamopy # PYLINT-TODO-FIX alamopy.writethis.writethis doesn't seem to exist # pylint: disable=import-error from alamopy.writethis import writethis # pylint: enable=import-error import numpy as np import math # import sympy import random from random import shuffle # from scipy.optimize import curve_fit #2.7 # from scipy.optimize import minimize #2.7 from scipy.linalg import lstsq # 2.7 import os # from alamopy import almlsq as almfun # from alamopy import almlsqjac as almjac import time # from sympy.parsing.sympy_parser import parse_expr import sys random.seed(100) # kwargs=opdict trans = list([ 'linfcns', 'expfcns', 'logfcns', 'sinfcns', 'cosfcns', 'monomialpower', 'multi2power', 'multi3power', 'ratiopower' ]) et = list(['multi2power', 'multi3power', 'ratiopower']) # datacc = {} ins = list(['cvfold']) for opt in ins: if (opt in kwargs.keys()): cvfold = kwargs['cvfold'] ntrans = 0 ndata = np.shape(xdata)[0] ninputs = np.shape(xdata)[1] for t in trans: if t in kwargs.keys(): if (type(kwargs[t]) == list()): nt = len(kwargs[t]) else: nt = 1 if t not in et: ntrans = ntrans + ninputs * nt else: ntrans = ntrans + math.factorial(ninputs) * nt else: kwargs[t] = 0 # mseold = 0.0 rmseold = 0.0 startt = time.time() oldres = {} oldp = () ntrans = min(ntrans, 1000) # split training and validationd ata before looping through cc tlist = list([]) vlist = list([]) # ndlist = range(ndata) if (cvfold == 'loo'): for i in range(ndata): vlist.append(np.asarray([i])) temp = [x for x in range(ndata) if x != i] tlist.append(np.asarray(temp)) else: temp = range(ndata) shuffle(temp) # tlist = np.asarray(temp) if (cvfold == 'valset'): tlist = np.asarray(temp)[0:int(0.7 * ndata)] vlist = np.asarray(temp)[int(0.7 * ndata):-1] else: vlist = np.array_split(np.asarray(temp), int(cvfold)) tlist = [1] * int(cvfold) for v in range(len(vlist)): tlist[v] = range(ndata) for this in vlist[v]: tlist[v].remove(this) for ccon in list(range(1, ntrans + 1)): # reload(alamopy) # try: # del cvalsim, almsim # except Exception: # pass # res = alamopy.doalamo(xdata,zdata,kwargs.values()) if (cvfold != 'valset'): res = alamopy.doalamo(xdata, zdata, xval=xval, zval=zval, linfcns=kwargs['linfcns'], expfcns=kwargs['expfcns'], logfcns=kwargs['logfcns'], sinfcns=kwargs['sinfcns'], cosfcns=kwargs['cosfcns'], monomialpower=kwargs['monomialpower'], multi2power=kwargs['multi2power'], multi3power=kwargs['multi3power'], ratiopower=kwargs['ratiopower'], sigma=kwargs['sigma'], xlabels=kwargs['xlabels'], zlabels=kwargs['zlabels'], modeler=6, convpen=0, maxterms=ccon, almname=kwargs['almname'], expandoutput=kwargs['expandoutput'], xmax=kwargs['xmax'], xmin=kwargs['xmin'], savescratch=kwargs['savescratch']) else: res = alamopy.doalamo(xdata[tlist, :], zdata[tlist], xval=xdata[vlist, :], zval=zdata[vlist], linfcns=kwargs['linfcns'], expfcns=kwargs['expfcns'], logfcns=kwargs['logfcns'], sinfcns=kwargs['sinfcns'], cosfcns=kwargs['cosfcns'], monomialpower=kwargs['monomialpower'], multi2power=kwargs['multi2power'], multi3power=kwargs['multi3power'], ratiopower=kwargs['ratiopower'], sigma=kwargs['sigma'], xlabels=kwargs['xlabels'], zlabels=kwargs['zlabels'], modeler=6, convpen=0, maxterms=ccon, almname=kwargs['almname'], expandoutput=kwargs['expandoutput'], xmax=kwargs['xmax'], xmin=kwargs['xmin'], savescratch=kwargs['savescratch']) for fn in ['cvalsim.py', 'cvalsim.pyc', 'almsim.py', 'almsim.pyc']: try: os.remove(fn) except Exception: pass os.system('cp ' + kwargs['almname'].split('.')[0] + 'cv.py cvalsim.py') os.system('cp ' + kwargs['almname'].split('.')[0] + 'alm.py almsim.py') if (ccon == 1): if (ndata < 10): # not enough data to do cross validation writethis('Not enough data to facilitate cross validation') endt = time.time() res['totaltime'] = endt - startt return res # import cvalsim import almsim xalm = alamopy.almfeatmat(xdata, ccon) # Lets do the cross validatione error mseval = 0.0 rmse = {} if (cvfold == 'valset'): resid = np.sum((zdata[vlist] - almsim.f(xdata[vlist, :]))**2) mseval = resid / float(len(vlist)) params = 'ALM params used for valset' rmse = {} rmse['val'] = res['rmseval'] rmse['train'] = res['rmse'] else: mseval = 0.0 # track = 0 for tl, vl in zip(tlist, vlist): # initb = [ 0.0 for x in range(ccon)] xd, xmax, xmin = alamopy.mapminmax(xalm) zd, zmax, zmin = alamopy.mapminmax(zdata) fitres = lstsq(xd[tl], zd[tl]) params = fitres[0] resid = zd[vl] - np.matmul(xd[vl, :], params[:]) resid = alamopy.remapminmax(resid, zmax, zmin) resid = sum(resid**2) if (cvfold == 'loo'): mseval = mseval + resid else: mseval = mseval + resid / float(len(vl)) mseval = mseval / float(len(vlist)) rmse['val'] = np.sqrt(mseval) rmse['train'] = 'not' if (ccon > 1): if (float(rmse['val']) >= float(rmseold)): sys.stdout.write(' Problem name : ' + kwargs['almname'] + '\n') sys.stdout.write(' rMSEval : ' + str(rmse['val']) + ' MSEold : ' + str(rmseold) + '\n') if (cvfold == 'valset'): sys.stdout.write(' Ntrain : ' + str(len(tlist)) + ' Nval : ' + str(len(vlist)) + '\n') else: sys.stdout.write(' Ntrain : ' + str(len(tlist[0])) + ' Nval : ' + str(len(vlist[0])) + '\n') sys.stdout.write(' optimal model size is : ' + str(ccon - 1) + '\n') sys.stdout.write(' optimal coefficients are : ' + str(oldp) + '\n') os.remove(kwargs['almname'].split('.')[0] + 'alm.py') os.system('cp ' + 'oldalmsim.sv ' + kwargs['almname'].split('.')[0] + 'alm.py') endt = time.time() oldres['totaltime'] = endt - startt return oldres elif (ccon == ntrans): endt = time.time() sys.stdout.write('optimal model size is :' + str(ccon) + '\n') res['totaltime'] = endt - startt return res else: # mseold = mseval oldres = res else: # mseold = float(mseval) rmseold = float(rmse['val']) oldres = res oldp = params # keep track of alm model of old iteratoin try: os.remove('oldalmsim.sv') except Exception: pass os.system('mv almsim.py oldalmsim.sv')
def readTraceFile(vargs, data, debug): """ Read the alamo trace file to read in the model and metrics Args: data/debug: shared default options for .alm file vargs: Validation data """ trace_file = data['stropts']['tracefname'] # currentDirectory = os.getcwd() trace_str = trace_file # currentDirectory + "/" + trace_file try: lf = open(trace_str).read() except IOError as err: if debug['mock']: data['results']['clrtime'] = '0' data['results']['size'] = '6' data['results']['numolr'] = '16960' data['results']['othertime'] = '0.8799995E-01' data['results']['olrtime'] = '0.10800002' data['results']['miptime'] = '0' data['results']['version'] = '2018.4.3' data['results']['status'] = '0' data['results']['R2'] = '1' data['results']['numclr'] = '0' data['results']['nummip'] = '0' data['results']['ssr'] = '0.169E-21' data['results']['pymodel'] = 'cam6alm' data['results']['totaltime'] = '0.1760001' data['results']['rmse'] = '0.255E-11' data['results']['madp'] = '0.814E-09' data['results']['model'] = \ ' z1 = 3.9999999999884194856747 * x1^2 \ - 3.9999999999873385725380 * x2^2 - 2.0999999999876837186719 \ * x1^4 + 3.9999999999879496392907 * x2^4 + 0.33333333333014281141260 \ * x1^6 + 1.0000000000008837375276 * x1*x2' data['results']['nbas'] = '15' if debug['expandoutput']: data['results']['ssrval'] = 0 data['results']['R2val'] = 0 data['results']['rmseval'] = 0 data['results']['madpval'] = 0 return else: raise almerror.AlamoError('Cannot read from trace file "{}": {}' .format(trace_str, err)) try: # import sympy from sympy.parsing.sympy_parser import parse_expr from sympy import symbols, lambdify except Exception: writethis('Cannot import sympy') lf2 = lf.split('\n') lf2_ind = 0 # ENGLE Allows for multiple writings to trace.trc file 5/30/19 dict_out_str = '#filename, NINPUTS, NOUTPUTS, INITIALPOINTS, OUTPUT, SET, INITIALIZER, SAMPLER, MODELER, BUILDER, GREEDYBUILD, BACKSTEPPER, GREEDYBACK, REGULARIZER, SOLVEMIP, SSEOLR, SSE, RMSE, R2, ModelSize, BIC, RIC, Cp, AICc, HQC, MSE, SSEp, MADp, OLRTime, numOLRs, OLRoneCalls, OLRoneFails, OLRgsiCalls, OLRgsiFails, OLRdgelCalls, OLRdgelFails, OLRclrCalls, OLRclrFails, OLRgmsCalls, OLRgmsFails, CLRTime, numCLRs, MIPTime, NumMIPs, LassoTime, Metric1Lasso, Metric2Lasso, LassoSuccess, LassoRed, nBasInitAct, nBas, SimTime, SimData, TotData, NdataConv, OtherTime, NumIters, IterConv, TimeConv, Step0Time, Step1Time, Step2Time, TotalTime, AlamoStatus, AlamoVersion, Model' lf2_ind = len(lf2) - 1 - lf2[::-1].index(dict_out_str) tkeys = lf2[lf2_ind].split(',') kl1 = list(['ssr', 'rmse', 'R2', 'size', 'nbas', 'totaltime', 'olrtime', 'miptime', 'clrtime', 'othertime', 'version', 'status', 'madp', 'numolr', 'nummip', 'numclr', 'ninputs']) kl2 = list([' SSE', ' RMSE', ' R2', ' ModelSize', ' nBasInitAct', ' TotalTime', ' OLRTime', ' MIPTime', ' CLRTime', ' OtherTime', ' AlamoVersion', ' AlamoStatus', ' MADp', ' numOLRs', ' NumMIPs', ' numCLRs', ' NINPUTS']) # Construct results for training data (&val if provided) ln = 1 wlparam = data['opts']['noutputs'] # initialize multiple output expanded dictionary if debug['expandoutput']: data['results']['f(model)'] = {} data['results']['model'] = {} for i in range(len(kl1)): data['results'][kl1[i]] = {} if len(vargs) > 0: for i in ['ssrval', 'R2val', 'rmseval', 'madpval']: data['results'][i] = {} if(len(vargs) > 0): wlparam = 2 * wlparam else: wlparam = wlparam + 1 while ln < wlparam: lf3 = lf2[lf2_ind + ln].split(',') # Reapply the saved labels for the output model = lf3[tkeys.index(' Model')] # for label in data['labs']['savexlabels']: for i in range(data['opts']['ninputs']): label = data['labs']['xlinks'][i][0] # Now is a convenient time to collect information that will be used in the # confidence interval analysis model = model.replace(str(label), str(data['labs']['xlinks'][i][1])) for i in range(data['opts']['noutputs']): label = data['labs']['zlinks'][i][0] model = model.replace(str(label), str(data['labs']['zlinks'][i][1])) # determine which output label to write # if debug['outkeys'] == True use olab as a key if not dont if debug['outkeys']: olab = model.split('=')[0] olab = olab.replace(' ', '') # print data['results'].keys data['results']['model'][olab] = model # Record tokenized model for each output data['results']['f(model)'][olab] = \ lambdify([symbols(data['labs']['savexlabels'])], parse_expr(model.split('=')[1].replace('^', '**')), "numpy") else: data['results']['model'] = model data['results']['f(model)'] = \ lambdify([symbols(data['labs']['savexlabels'])], parse_expr(model.split('=')[1].replace('^', '**')), "numpy") if debug['expandoutput']: if debug['outkeys']: for i in range(len(kl1)): data['results'][kl1[i]][olab] = lf3[tkeys.index(kl2[i])] # Check for validation set if len(vargs) > 0: lf3 = lf2[lf2_ind + 2].split(',') data['results']['ssrval'][olab] = lf3[tkeys.index(' SSE')] data['results']['R2val'][olab] = lf3[tkeys.index(' R2')] data['results']['rmseval'][olab] = lf3[tkeys.index(' RMSE')] data['results']['madpval'][olab] = lf3[tkeys.index(' MADp')] else: for i in range(len(kl1)): data['results'][kl1[i]] = lf3[tkeys.index(kl2[i])] # Check for validation set if len(vargs) > 0: lf3 = lf2[lf2_ind + 2].split(',') data['results']['ssrval'] = lf3[tkeys.index(' SSE')] data['results']['R2val'] = lf3[tkeys.index(' R2')] data['results']['rmseval'] = lf3[tkeys.index(' RMSE')] data['results']['madpval'] = lf3[tkeys.index(' MADp')] else: if debug['outkeys']: data['results']['ssr'][olab] = lf3[tkeys.index(kl2[0])] else: data['results']['ssr'] = lf3[tkeys.index(kl2[0])] ln = ln + 1
def alamo(xdata, zdata, **kwargs): """ [almmodel] = doalamo(xdata,zdata, xvaldata, zvaldata,addopt=vals) Args: xdata (numpy.array or list[real]) zdata (numpy.array or list[real) kwargs: Additional options may be specified and will be applied to the .alm - example - monomialpower=(1,2,3,4) - xlabels : labels given to input variables - zlabels : labels given to outputs - xval : validaiton data for alamo - zval : response validation data for alamo - modeler : modeler value used in alamo - solvemip : force alamo to solve mip if gams is availible - linfcns : 0-1 option to include linear transformations - expfcns : 0-1 option to include exponential transformations - logfcns : 0-1 option to include logarithmic transformations - sinfcns : 0-1 option to include sine transformations - cosfcns : 0-1 option to include cosine transformations - monomialpower : list of monomial powers - multi2power : list of binomial powers - multi3power : list of trinomials - ratiopower : list of ratio powers - screener : screening method - almname : specify a name for the .alm file - savescratch : saves .alm and .lst - savetrace : saves trace file - expandoutput : add a key to the output dictionary for the output (must be on for inputs(outputs?#Engle)>1) - almopt : direct text appending the option almopt=<file> will append a file to the end of the .alm and can be used to facilitate direct access to the .alm (no current checks) - loo : leave one out evaluation - lmo : leave many out evaluation Returns: dict: An ALAMO model with the following keys - 'model' : algebraic form of model - 'f(model)' : a callable lambda function - Syntac is depended on expandout syntax => almmodel['f(model)']['out'](inputs,sep,by,comma) almmodel['f(model)'](inputs,sep,by,comma) - 'ssr' : SSE on training set provided - 'R2' : R2 on training set provided - 'ssrval' : SSE on testing set if provided - 'R2val' : R2 on testing set if provided """ data, debug = alamopy.data, alamopy.debug # patched together validation data check if 'xval' in kwargs.keys(): vargs = (kwargs['xval'], kwargs['zval']) else: vargs = () xdata, zdata, xvaldata, zvaldata = \ setupData(data, debug, xdata, zdata, vargs, kwargs) manageArguments(xdata, zdata, data, debug, kwargs) data['results'] = {} writeCustomALAMOOptions(kwargs) # New Custom Options MENGLE # Cross Validation if debug['loo']: q2 = [] if debug['outkeys'] and debug['expandoutput']: q2 = {} # size = len(xdata) - 1 data['opts']['ndata'] = data['opts']['ndata'] - 1 kwargValidation = debug['validation'] kwargSaveTrace = debug['savetrace'] if kwargValidation: kwargNvaldata = data['opts']['nvaldata'] data['opts']['nvaldata'] = 1 debug['validation'] = True debug['savetrace'] = False for i in range(0, len(xdata)): cvxdata = [x for y, x in enumerate(xdata) if y != i] cvzdata = [x for y, x in enumerate(zdata) if y != i] alamopy.almwriter(data, debug, (cvxdata, cvzdata, [xdata[i][:]], [zdata[i][:]]), kwargs) # Calling ALAMO if not debug['mock']: os.system(debug['almloc'] + " " + str(data['stropts']['almname']) + " > logscratch") data['results'] = {} readTraceFile([xdata[i][:], zdata[i][:]], data, debug) if debug['outkeys'] and debug['expandoutput']: for k in data['results']['R2'].keys(): if k not in q2.keys(): q2[k] = [float(data['results']['R2val'][k])] else: q2sub = q2[k] q2sub.append(float(data['results']['R2val'][k])) q2[k] = q2sub else: q2.append(float(data['results']['R2val'])) cleanFiles(data, debug) if debug['outkeys'] and debug['expandoutput']: data['results']['Q2'] = {} for k in q2.keys(): Q2 = np.mean(q2[k]) data['results']['Q2'][k] = Q2 print("%s: Running cross validation LOO, evaluated Q2:%f" % (k, Q2)) else: Q2 = np.mean(q2) data['results']['Q2'] = Q2 print("Running cross validation LOO, evaluated Q2:%f" % Q2) del data['opts']['nvaldata'] debug['validation'] = kwargValidation debug['savetrace'] = kwargSaveTrace if kwargValidation: data['opts']['nvaldata'] = kwargNvaldata data['opts']['ndata'] = data['opts']['ndata'] + 1 elif debug['lmo'] > 0: q2 = [] if debug['outkeys'] and debug['expandoutput']: q2 = {} kwargNdata = data['opts']['ndata'] kwargValidation = debug['validation'] kwargSaveTrace = debug['savetrace'] if kwargValidation: kwargNvaldata = data['opts']['nvaldata'] debug['validation'] = True debug['savetrace'] = False numOfFolds = debug['lmo'] print(xdata) if numOfFolds > len(xdata): raise Exception('Number of Cross validation \ folds exceeds the number of data points') # size = len(xdata) sizeOfFolds = int(len(xdata) / numOfFolds) r = len(xdata) % numOfFolds remS = 0 remE = 1 for i in range(numOfFolds): if i < r + 1: remS = i remE = i + 1 cvvalxdata = xdata[remS + sizeOfFolds * i : sizeOfFolds * (i + 1) + remE] cvvalzdata = zdata[remS + sizeOfFolds * i : sizeOfFolds * (i + 1) + remE] if i == 0: cvxdata = xdata[sizeOfFolds * (i + 1) + remE:] cvzdata = zdata[sizeOfFolds * (i + 1) + remE:] else: cvxdata = np.concatenate([xdata[0:remS + sizeOfFolds * i], xdata[sizeOfFolds * (i + 1) + remE:]]) cvzdata = np.concatenate([zdata[0:remS + sizeOfFolds * i], zdata[sizeOfFolds * (i + 1) + remE:]]) data['opts']['nvaldata'] = len(cvvalxdata) data['opts']['ndata'] = len(cvxdata) alamopy.almwriter(data, debug, (cvxdata, cvzdata, cvvalxdata, cvvalzdata), kwargs) # Calling ALAMO if not debug['mock']: os.system(debug['almloc'] + " " + str(data['stropts']['almname']) + " > logscratch") data['results'] = {} expandOutput(xdata, zdata, [cvvalxdata, cvvalzdata], data, debug) readTraceFile([cvvalxdata, cvvalzdata], data, debug) if debug['outkeys'] and debug['expandoutput']: for k in data['results']['R2'].keys(): if k not in q2.keys(): q2[k] = [float(data['results']['R2val'][k])] else: q2sub = q2[k] q2sub.append(float(data['results']['R2val'][k])) q2[k] = q2sub else: q2.append(float(data['results']['R2val'])) cleanFiles(data, debug) if debug['outkeys'] and debug['expandoutput']: data['results']['Q2'] = {} for k in q2.keys(): Q2 = np.mean(q2[k]) data['results']['Q2'][k] = Q2 print("%s: Running cross validation LMO, evaluated Q2:%f" % (k, Q2)) else: Q2 = np.mean(q2) data['results']['Q2'] = Q2 print("Running cross validation LMO, evaluated Q2:%f" % Q2) del data['opts']['nvaldata'] debug['validation'] = kwargValidation debug['savetrace'] = kwargSaveTrace if kwargValidation: data['opts']['nvaldata'] = kwargNvaldata data['opts']['ndata'] = kwargNdata # Write alamo file if debug['validation']: alamopy.almwriter(data, debug, (xdata, zdata, xvaldata, zvaldata), kwargs) else: alamopy.almwriter(data, debug, (xdata, zdata), kwargs) # Call alamo from the terminal if not debug['mock']: if debug['showalm']: os.system(debug['almloc'] + " " + str(data['stropts']['almname'])) else: writethis('Calling ALAMO now:\n') os.system(debug['almloc'] + " " + str(data['stropts']['almname']) + " > logscratch") # Check to see if additional data was sampled and add it if 'sampler' in kwargs.keys(): xdata, zdata = checkForSampledData(data, debug) # calculate additional statistics expandOutput(xdata, zdata, vargs, data, debug) # Open the trace file and pull appropriate results readTraceFile(vargs, data, debug) # write python file of regressed model alamopy.almpywriter(data) if debug['cvfun']: alamopy.almcvwriter(data) # add <>alm.py to results dict data['results']['pymodel'] = data['stropts']['almname'].split('.')[0] + 'alm' if debug['loo'] or debug['lmo'] > 0: Q2, R2, diff = 0, 0, 0 if debug['outkeys']: for k in data['results']['R2'].keys(): R2 = data['results']['R2'][k] if k in data['results']['Q2'].keys(): Q2 = data['results']['Q2'][k] diff = float(R2) - float(Q2) if Q2 < 0.5: print("%s: Q2 suggests this is not a predictive model, \ Q2: %f, R2: %s" % (k, Q2, R2)) elif diff < 0.3: print('%s: The difference of R2-Q2 is %f. This is an acceptable \ difference for predictability, Q2: %f, R2: %s' % (k, diff, Q2, R2)) else: print('%s: The difference of R2-Q2 is %f. The surrogate model is \ not able to predict the data reliably, Q2: %f, R2: %s' % (k, diff, Q2, R2)) else: R2 = data['results']['R2'] Q2 = data['results']['Q2'] diff = float(R2) - float(Q2) if Q2 < 0.5: print("Q2 suggests this is not a predictive model, Q2: %f, R2: %s" % (Q2, R2)) elif diff < 0.3: print('The difference of R2-Q2 is %f. This is an acceptable difference for \ predictability, Q2: %f, R2: %s' % (diff, Q2, R2)) else: print('The difference of R2-Q2 is %f. The surrogate model is not able to \ predict the data reliably, Q2: %f, R2: %s' % (diff, Q2, R2)) cleanFiles(data, debug, pywrite=True, **kwargs) return data['results']
def readTraceFile(vargs, data, debug): """ Read the alamo trace file to read in the model and metrics Args: data/debug: shared default options for .alm file vargs: Validation data """ trace_file = data['stropts']['tracefname'] try: lf = open(trace_file).read() except IOError as err: if debug['mock']: data['results']['clrtime'] = '0' data['results']['size']='6' data['results']['numolr']='16960' data['results']['othertime'] = '0.8799995E-01' data['results']['olrtime'] = '0.10800002' data['results']['miptime']='0' data['results']['version']='2018.4.3' data['results']['status']='0' data['results']['R2']='1' data['results']['numclr']='0' data['results']['nummip']='0' data['results']['ssr']='0.169E-21' data['results']['pymodel']='cam6alm' data['results']['totaltime']='0.1760001' data['results']['rmse']='0.255E-11' data['results']['madp']='0.814E-09' data['results']['model']=' z1 = 3.9999999999884194856747 * x1^2 - 3.9999999999873385725380 * x2^2 - 2.0999999999876837186719 * x1^4 + 3.9999999999879496392907 * x2^4 + 0.33333333333014281141260 * x1^6 + 1.0000000000008837375276 * x1*x2' data['results']['nbas']='15' if debug['expandoutput']: data['results']['ssrval']=0 data['results']['R2val']=0 data['results']['rmseval']=0 data['results']['madpval']=0 return else: raise almerror.AlamoError('Cannot read from trace file "{}": {}' .format(trace_file, err)) try: import sympy from sympy.parsing.sympy_parser import parse_expr from sympy import symbols, lambdify except: writethis('Cannot install sympy') lf2 = lf.split('\n') tkeys=lf2[0].split(',') kl1=list(['ssr','rmse','R2','size','nbas','totaltime','olrtime','miptime','clrtime','othertime','version','status','madp','numolr','nummip','numclr','ninputs']) kl2=list([' SSE',' RMSE',' R2',' ModelSize',' nBasInitAct',' TotalTime', ' OLRTime',' MIPTime',' CLRTime',' OtherTime', ' AlamoVersion',' AlamoStatus',' MADp',' numOLRs',' NumMIPs',' numCLRs',' NINPUTS']) # Construct results for training data (&val if provided) ln=1 wlparam=data['opts']['noutputs'] if (len(vargs) > 0): wlparam=2*wlparam else: wlparam=wlparam+1 while ln < wlparam: lf3=lf2[ln].split(',') # Reapply the saved labels for the output model=lf3[tkeys.index(' Model')] # for label in data['labs']['savexlabels']: for i in range(data['opts']['ninputs']): label=data['labs']['xlinks'][i][0] # Now is a convenient time to collect information that will be used in the # confidence interval analysis model=model.replace(str(label),str(data['labs']['xlinks'][i][1])) for i in range(data['opts']['noutputs']): label=data['labs']['zlinks'][i][0] model=model.replace(str(label),str(data['labs']['zlinks'][i][1])) # determine which output label to write # if debug['outkeys'] == True use olab as a key if not dont if debug['outkeys']: olab = model.split('=')[0] olab=olab.replace(' ','') data['results']['model'][olab]=model #Record tokenized model for each output data['results']['f(model)'][olab] = lambdify([symbols(data['labs']['savexlabels'])], parse_expr(model.split('=')[1].replace('^','**')), "numpy") else: data['results']['model']=model data['results']['f(model)']=lambdify([symbols(data['labs']['savexlabels'])], parse_expr(model.split('=')[1].replace('^','**')), "numpy") if debug['expandoutput']: if debug['outkeys']: for i in range(len(kl1)): data['results'][kl1[i]][olab]=lf3[tkeys.index(kl2[i])] # Check for validation set if len(vargs)>0: lf3=lf2[2].split(',') data['results']['ssrval'][olab]=lf3[tkeys.index(' SSE')] data['results']['R2val'][olab]=lf3[tkeys.index(' R2')] data['results']['rmseval'][olab]=lf3[tkeys.index(' RMSE')] data['results']['madpval'][olab]=lf3[tkeys.index(' MADp')] else: for i in range(len(kl1)): data['results'][kl1[i]]=lf3[tkeys.index(kl2[i])] # Check for validation set if len(vargs)>0: lf3=lf2[2].split(',') data['results']['ssrval']=lf3[tkeys.index(' SSE')] data['results']['R2val']=lf3[tkeys.index(' R2')] data['results']['rmseval']=lf3[tkeys.index(' RMSE')] data['results']['madpval']=lf3[tkeys.index(' MADp')] else: if debug['outkeys']: data['results']['ssr'][olab]=lf3[tkeys.index(kl2[0])] else: data['results']['ssr']=lf3[tkeys.index(kl2[0])] ln=ln+1