Beispiel #1
0
def manageArguments(xdata, zdata, data, debug, kwargs):
    """ Parse additional input options
      The 'pargs' library is used to keep track of options a user has availible
      descriptions of the dictionaries data, and debug are given in shared.py
      Multiple keys used to make writing the .alm file easier
        
      Args:
        xdata (numpy.array or list[real])
        zdata (numpy.array or list[real)
        data:  shared alamo data options
        debug: Additional options may be specified and will be applied
                to the .alm
    """    
    parseKwargs(data, debug, kwargs)

    # Check to see if a simwrapper should be built
    if debug['simwrap'] or 'simulator' in kwargs.keys():
        buildSimWrapper(data, debug)

    # Specific check to see if the labels of the response variables 
    # should be used in the output dictionary
    # This is important for systematic testing vs. single model input
    if debug['outkeys']:
        # outkeys are specified to be used
        if data['opts']['noutputs'] > 1:
            # 'Must use outkeys for multiple outputs'
            writethis('outkeys set to TRUE for multiple outputs')
            debug['outkeys'] = True

    # Construct xmin and xmax vector based on training data if not provided
    if ('xmin' not in kwargs.keys()):
        constructXBounds(xdata, zdata, data, debug)
Beispiel #2
0
def getValidationData(vargs, data, debug):
    """ Structure data for validating the model. Modifies data['opts']

        Args:
        vargs: validation data valxdata, valzdata
        data:  shared alamo data options
        debug: Additional options may be specified and will be applied
                to the .alm
    """

    if vargs != ():
        debug['validation'] = True
        xvaldata = vargs[0]
        zvaldata = vargs[1]
        temp = np.shape(xvaldata)
        data['opts']['nvaldata'] = temp[0]
        if (len(np.shape(zvaldata)) == 1):
            zvaldata = np.reshape(zvaldata, (data['opts']['nvaldata'], 1))
        if (temp[1] != data['opts']['ninputs']):
            writethis('Number of input variables inconsistent between x and xval')
            almerror('p2')
        temp = np.shape(zvaldata)
        if (temp[0] != data['opts']['nvaldata'] or temp[1] != data['opts']['noutputs']):
            writethis('Problem with zval')
            almerror('p2')
        return xvaldata, zvaldata
Beispiel #3
0
def expandOutput(xdata, zdata, vargs, data, debug):
    """ Expand output to validation metrics and labels

        Args:
          data/debug: shared default options for .alm file
          xdata (numpy.array or list[real])
          zdata (numpy.array or list[real)
          vargs:  Validation data

    """
    if debug['expandoutput']:
        data['results']['xdata']=xdata
        data['results']['zdata']=zdata
        data['results']['xlabels']=data['labs']['savexlabels']
        data['results']['zlabels']=data['labs']['savezlabels']
    try:  
        import sympy
        from sympy.parsing.sympy_parser import parse_expr
        from sympy import symbols, lambdify
    except:
        writethis('Cannot install sympy')

    if (debug['expandoutput'] == False):
        data['results']['model']={}
        data['results']['f(model)']={}
    else:
        for key in list(['model','f(model)','ssr','R2', 'size','rmse','nbas','totaltime','olrtime','miptime','clrtime','othertime','version','status','madp','numolr','nummip','numclr','ninputs']):
            data['results'][key]=collections.OrderedDict()
        if (len(vargs)>0):
            data['results']['ssrval']=collections.OrderedDict()
            data['results']['R2val']=collections.OrderedDict()
            data['results']['rmseval']=collections.OrderedDict()
            data['results']['madpval']=collections.OrderedDict()
    if debug['loo']:
        data['results']['Q2'] = data['results']['Q2']
Beispiel #4
0
def constructXBounds(xdata, zdata, data, debug):
    """ Construct xmin,xmax and zmin, zmax for alamo if none are given
        
      Args:
        xdata (numpy.array or list[real])
        zdata (numpy.array or list[real)
        data:  shared alamo data options
        debug: Additional options may be specified and will be applied
                to the .alm
    """  
    writethis('min and max values of inputs are not provided, they will be calculated from the training data\n')
    xmin=''
    xmax=''
    for i in range(data['opts']['ninputs']):
        tn = debug['bignum']
        tx=-1*debug['bignum']
        for j in range(data['opts']['ndata']):
            if (float(xdata[j][i]) < float(tn)):
                tn = xdata[j][i]
            if (float(xdata[j][i]) > float(tx)):
                tx = xdata[j][i]
        xmin=xmin+str(tn)+' '
        xmax=xmax+str(tx)+' '
    data['set4']['xmax']=xmax
    data['set4']['xmin']=xmin
Beispiel #5
0
def almplot(res, show=True):
    try:
        import matplotlib.pyplot as plt
        import numpy as np
        from alamopy.writethis import writethis
    except Exception:
        writethis("Cannot plot, possibly missing matplotlib package")

    model = res['model'].replace(' - ', ' + -')
    model = model.split('=')[1]
    model = model.split(' + ')
    if model[
            0] == ' ':  # if there are more than one terms, the first split is ' '
        model = model[1:]
    ndp = 100
    t = np.linspace(0.08, 1.7, ndp)
    out = np.zeros([3, ndp])
    clo = np.zeros(ndp)
    chi = np.zeros(ndp)
    coeff = np.zeros(ndp)

    for i in range(len(model)):
        coeff[i] = float(model[i].split(' * ')[0])
        if 'conf_inv' in res.keys():
            clo[i] = coeff[i] - float(res['conf_inv'][i].split('+/-')[1])
            chi[i] = coeff[i] + float(res['conf_inv'][i].split('+/-')[1])

    for i in range(ndp):
        out[0, i] = float(coeff[0]) * t[i]**1.2 \
            - float(coeff[1]) * t[i]**2 - float(coeff[2])
        if 'conf_inv' in res.keys():  # If confidence intervals exist
            out[1, i] = clo[0] * t[i]**1.2 - chi[1] * t[i]**2 - chi[2]
            out[2, i] = chi[0] * t[i]**1.2 - clo[1] * t[i]**2 - clo[2]

    plt.plot(t, out[0], 'b-')
    if 'conf_inv' in res.keys():
        plt.plot(t, out[1], 'r--', t, out[2], 'r--')
    if show:
        plt.show()
Beispiel #6
0
def allcard(xdata, zdata, xval, zval, **kwargs):
    # enumerate all model cardinalities via ccmiqp and
    # use validation/cross-validaiton to determine
    from idaes.surrogate import alamopy
    # PYLINT-TODO-FIX alamopy.writethis.writethis doesn't seem to exist
    # pylint: disable=import-error
    from alamopy.writethis import writethis
    # pylint: enable=import-error
    import numpy as np
    import math
    # import sympy
    import random
    from random import shuffle
    # from scipy.optimize import curve_fit #2.7
    # from scipy.optimize import minimize #2.7
    from scipy.linalg import lstsq  # 2.7
    import os
    # from alamopy import almlsq as almfun
    # from alamopy import almlsqjac as almjac
    import time
    # from sympy.parsing.sympy_parser import parse_expr
    import sys

    random.seed(100)

    # kwargs=opdict
    trans = list([
        'linfcns', 'expfcns', 'logfcns', 'sinfcns', 'cosfcns', 'monomialpower',
        'multi2power', 'multi3power', 'ratiopower'
    ])
    et = list(['multi2power', 'multi3power', 'ratiopower'])
    # datacc = {}
    ins = list(['cvfold'])
    for opt in ins:
        if (opt in kwargs.keys()):
            cvfold = kwargs['cvfold']
    ntrans = 0
    ndata = np.shape(xdata)[0]
    ninputs = np.shape(xdata)[1]

    for t in trans:
        if t in kwargs.keys():
            if (type(kwargs[t]) == list()):
                nt = len(kwargs[t])
            else:
                nt = 1
            if t not in et:
                ntrans = ntrans + ninputs * nt
            else:
                ntrans = ntrans + math.factorial(ninputs) * nt
        else:
            kwargs[t] = 0
    # mseold = 0.0
    rmseold = 0.0
    startt = time.time()
    oldres = {}
    oldp = ()
    ntrans = min(ntrans, 1000)

    # split training and validationd ata before looping through cc
    tlist = list([])
    vlist = list([])
    # ndlist = range(ndata)
    if (cvfold == 'loo'):
        for i in range(ndata):
            vlist.append(np.asarray([i]))
            temp = [x for x in range(ndata) if x != i]
            tlist.append(np.asarray(temp))
    else:
        temp = range(ndata)
        shuffle(temp)
        # tlist = np.asarray(temp)
        if (cvfold == 'valset'):
            tlist = np.asarray(temp)[0:int(0.7 * ndata)]
            vlist = np.asarray(temp)[int(0.7 * ndata):-1]
        else:
            vlist = np.array_split(np.asarray(temp), int(cvfold))
            tlist = [1] * int(cvfold)
            for v in range(len(vlist)):
                tlist[v] = range(ndata)
                for this in vlist[v]:
                    tlist[v].remove(this)

    for ccon in list(range(1, ntrans + 1)):
        # reload(alamopy)
        # try:
        #     del cvalsim, almsim
        # except Exception:
        #     pass

        # res = alamopy.doalamo(xdata,zdata,kwargs.values())
        if (cvfold != 'valset'):
            res = alamopy.doalamo(xdata,
                                  zdata,
                                  xval=xval,
                                  zval=zval,
                                  linfcns=kwargs['linfcns'],
                                  expfcns=kwargs['expfcns'],
                                  logfcns=kwargs['logfcns'],
                                  sinfcns=kwargs['sinfcns'],
                                  cosfcns=kwargs['cosfcns'],
                                  monomialpower=kwargs['monomialpower'],
                                  multi2power=kwargs['multi2power'],
                                  multi3power=kwargs['multi3power'],
                                  ratiopower=kwargs['ratiopower'],
                                  sigma=kwargs['sigma'],
                                  xlabels=kwargs['xlabels'],
                                  zlabels=kwargs['zlabels'],
                                  modeler=6,
                                  convpen=0,
                                  maxterms=ccon,
                                  almname=kwargs['almname'],
                                  expandoutput=kwargs['expandoutput'],
                                  xmax=kwargs['xmax'],
                                  xmin=kwargs['xmin'],
                                  savescratch=kwargs['savescratch'])
        else:
            res = alamopy.doalamo(xdata[tlist, :],
                                  zdata[tlist],
                                  xval=xdata[vlist, :],
                                  zval=zdata[vlist],
                                  linfcns=kwargs['linfcns'],
                                  expfcns=kwargs['expfcns'],
                                  logfcns=kwargs['logfcns'],
                                  sinfcns=kwargs['sinfcns'],
                                  cosfcns=kwargs['cosfcns'],
                                  monomialpower=kwargs['monomialpower'],
                                  multi2power=kwargs['multi2power'],
                                  multi3power=kwargs['multi3power'],
                                  ratiopower=kwargs['ratiopower'],
                                  sigma=kwargs['sigma'],
                                  xlabels=kwargs['xlabels'],
                                  zlabels=kwargs['zlabels'],
                                  modeler=6,
                                  convpen=0,
                                  maxterms=ccon,
                                  almname=kwargs['almname'],
                                  expandoutput=kwargs['expandoutput'],
                                  xmax=kwargs['xmax'],
                                  xmin=kwargs['xmin'],
                                  savescratch=kwargs['savescratch'])
        for fn in ['cvalsim.py', 'cvalsim.pyc', 'almsim.py', 'almsim.pyc']:
            try:
                os.remove(fn)
            except Exception:
                pass
        os.system('cp ' + kwargs['almname'].split('.')[0] + 'cv.py cvalsim.py')
        os.system('cp ' + kwargs['almname'].split('.')[0] + 'alm.py almsim.py')

        if (ccon == 1):
            if (ndata < 10):  # not enough data to do cross validation
                writethis('Not enough data to facilitate cross validation')
                endt = time.time()
                res['totaltime'] = endt - startt
                return res

        # import cvalsim
        import almsim
        xalm = alamopy.almfeatmat(xdata, ccon)
        # Lets do the cross validatione error
        mseval = 0.0
        rmse = {}
        if (cvfold == 'valset'):
            resid = np.sum((zdata[vlist] - almsim.f(xdata[vlist, :]))**2)
            mseval = resid / float(len(vlist))
            params = 'ALM params used for valset'
            rmse = {}
            rmse['val'] = res['rmseval']
            rmse['train'] = res['rmse']
        else:
            mseval = 0.0
            # track = 0
            for tl, vl in zip(tlist, vlist):
                # initb = [ 0.0 for x in range(ccon)]
                xd, xmax, xmin = alamopy.mapminmax(xalm)
                zd, zmax, zmin = alamopy.mapminmax(zdata)
                fitres = lstsq(xd[tl], zd[tl])
                params = fitres[0]
                resid = zd[vl] - np.matmul(xd[vl, :], params[:])
                resid = alamopy.remapminmax(resid, zmax, zmin)
                resid = sum(resid**2)
                if (cvfold == 'loo'):
                    mseval = mseval + resid
                else:
                    mseval = mseval + resid / float(len(vl))
            mseval = mseval / float(len(vlist))
            rmse['val'] = np.sqrt(mseval)
            rmse['train'] = 'not'
        if (ccon > 1):
            if (float(rmse['val']) >= float(rmseold)):
                sys.stdout.write('              Problem name   : ' +
                                 kwargs['almname'] + '\n')
                sys.stdout.write('  rMSEval : ' + str(rmse['val']) +
                                 '    MSEold : ' + str(rmseold) + '\n')
                if (cvfold == 'valset'):
                    sys.stdout.write('   Ntrain : ' + str(len(tlist)) +
                                     '    Nval : ' + str(len(vlist)) + '\n')
                else:
                    sys.stdout.write('   Ntrain : ' + str(len(tlist[0])) +
                                     '    Nval : ' + str(len(vlist[0])) + '\n')
                sys.stdout.write('       optimal model size is : ' +
                                 str(ccon - 1) + '\n')
                sys.stdout.write('    optimal coefficients are : ' +
                                 str(oldp) + '\n')
                os.remove(kwargs['almname'].split('.')[0] + 'alm.py')
                os.system('cp ' + 'oldalmsim.sv ' +
                          kwargs['almname'].split('.')[0] + 'alm.py')
                endt = time.time()
                oldres['totaltime'] = endt - startt
                return oldres
            elif (ccon == ntrans):
                endt = time.time()
                sys.stdout.write('optimal model size is :' + str(ccon) + '\n')
                res['totaltime'] = endt - startt
                return res
            else:
                # mseold = mseval
                oldres = res
        else:
            # mseold = float(mseval)
            rmseold = float(rmse['val'])
            oldres = res
            oldp = params
        # keep track of alm model of old iteratoin
        try:
            os.remove('oldalmsim.sv')
        except Exception:
            pass
        os.system('mv almsim.py oldalmsim.sv')
Beispiel #7
0
def readTraceFile(vargs, data, debug):
    """ Read the alamo trace file to read in the model and metrics

      Args:
          data/debug: shared default options for .alm file
          vargs:  Validation data

    """

    trace_file = data['stropts']['tracefname']
    # currentDirectory = os.getcwd()
    trace_str = trace_file  # currentDirectory + "/" + trace_file
    try:
        lf = open(trace_str).read()
    except IOError as err:
        if debug['mock']:
            data['results']['clrtime'] = '0'
            data['results']['size'] = '6'
            data['results']['numolr'] = '16960'
            data['results']['othertime'] = '0.8799995E-01'
            data['results']['olrtime'] = '0.10800002'
            data['results']['miptime'] = '0'
            data['results']['version'] = '2018.4.3'
            data['results']['status'] = '0'
            data['results']['R2'] = '1'
            data['results']['numclr'] = '0'
            data['results']['nummip'] = '0'
            data['results']['ssr'] = '0.169E-21'
            data['results']['pymodel'] = 'cam6alm'
            data['results']['totaltime'] = '0.1760001'
            data['results']['rmse'] = '0.255E-11'
            data['results']['madp'] = '0.814E-09'
            data['results']['model'] = \
                '  z1 = 3.9999999999884194856747 * x1^2 \
                 - 3.9999999999873385725380 * x2^2 - 2.0999999999876837186719 \
                 * x1^4 + 3.9999999999879496392907 * x2^4 + 0.33333333333014281141260 \
                 * x1^6 + 1.0000000000008837375276 * x1*x2'
            data['results']['nbas'] = '15'

            if debug['expandoutput']:
                data['results']['ssrval'] = 0
                data['results']['R2val'] = 0
                data['results']['rmseval'] = 0
                data['results']['madpval'] = 0
            return
        else:
            raise almerror.AlamoError('Cannot read from trace file "{}": {}'
                                      .format(trace_str, err))

    try:  
        # import sympy
        from sympy.parsing.sympy_parser import parse_expr
        from sympy import symbols, lambdify
    except Exception:
        writethis('Cannot import sympy')

    lf2 = lf.split('\n')
    lf2_ind = 0     # ENGLE Allows for multiple writings to trace.trc file 5/30/19

    dict_out_str = '#filename, NINPUTS, NOUTPUTS, INITIALPOINTS, OUTPUT, SET, INITIALIZER, SAMPLER, MODELER, BUILDER, GREEDYBUILD, BACKSTEPPER, GREEDYBACK, REGULARIZER, SOLVEMIP, SSEOLR, SSE, RMSE, R2, ModelSize, BIC, RIC, Cp, AICc, HQC, MSE, SSEp, MADp, OLRTime, numOLRs, OLRoneCalls, OLRoneFails, OLRgsiCalls, OLRgsiFails, OLRdgelCalls, OLRdgelFails, OLRclrCalls, OLRclrFails, OLRgmsCalls, OLRgmsFails, CLRTime, numCLRs, MIPTime, NumMIPs, LassoTime, Metric1Lasso, Metric2Lasso, LassoSuccess, LassoRed, nBasInitAct, nBas, SimTime, SimData, TotData, NdataConv, OtherTime, NumIters, IterConv, TimeConv, Step0Time, Step1Time, Step2Time, TotalTime, AlamoStatus, AlamoVersion, Model'

    lf2_ind = len(lf2) - 1 - lf2[::-1].index(dict_out_str)
    tkeys = lf2[lf2_ind].split(',')
    kl1 = list(['ssr', 'rmse', 'R2', 'size', 'nbas', 'totaltime', 
                'olrtime', 'miptime', 'clrtime', 'othertime', 'version',
                'status', 'madp', 'numolr', 'nummip', 
                'numclr', 'ninputs'])
    kl2 = list([' SSE', ' RMSE', ' R2', ' ModelSize', ' nBasInitAct', 
                ' TotalTime', ' OLRTime', ' MIPTime', ' CLRTime', 
                ' OtherTime', ' AlamoVersion', ' AlamoStatus', ' MADp', 
                ' numOLRs', ' NumMIPs', ' numCLRs', ' NINPUTS'])
    # Construct results for training data (&val if provided)
    ln = 1
    wlparam = data['opts']['noutputs']

    # initialize multiple output expanded dictionary
    if debug['expandoutput']:
        data['results']['f(model)'] = {}
        data['results']['model'] = {}
        for i in range(len(kl1)):
            data['results'][kl1[i]] = {}
        if len(vargs) > 0:
            for i in ['ssrval', 'R2val', 'rmseval', 'madpval']:
                data['results'][i] = {}

    if(len(vargs) > 0):
        wlparam = 2 * wlparam
    else:
        wlparam = wlparam + 1
    while ln < wlparam:
        lf3 = lf2[lf2_ind + ln].split(',')
        # Reapply the saved labels for the output
        model = lf3[tkeys.index(' Model')]
        #    for label in data['labs']['savexlabels']:
        for i in range(data['opts']['ninputs']):
            label = data['labs']['xlinks'][i][0]
            # Now is a convenient time to collect information that will be used in the
            # confidence interval analysis
            model = model.replace(str(label), str(data['labs']['xlinks'][i][1]))
        for i in range(data['opts']['noutputs']):
            label = data['labs']['zlinks'][i][0]
            model = model.replace(str(label), str(data['labs']['zlinks'][i][1]))
        # determine which output label to write 
        # if debug['outkeys'] == True use olab as a key if not dont

        if debug['outkeys']:
            olab = model.split('=')[0]
            olab = olab.replace(' ', '')
            # print data['results'].keys
            data['results']['model'][olab] = model
            # Record tokenized model for each output
            data['results']['f(model)'][olab] = \
                lambdify([symbols(data['labs']['savexlabels'])], 
                         parse_expr(model.split('=')[1].replace('^', '**')), "numpy")
        else:
            data['results']['model'] = model
            data['results']['f(model)'] = \
                lambdify([symbols(data['labs']['savexlabels'])], 
                         parse_expr(model.split('=')[1].replace('^', '**')), "numpy")
        
        if debug['expandoutput']:
            if debug['outkeys']:
                for i in range(len(kl1)):
                    data['results'][kl1[i]][olab] = lf3[tkeys.index(kl2[i])]
                # Check for validation set
                if len(vargs) > 0:
                    lf3 = lf2[lf2_ind + 2].split(',')
                    data['results']['ssrval'][olab] = lf3[tkeys.index(' SSE')]
                    data['results']['R2val'][olab] = lf3[tkeys.index(' R2')]
                    data['results']['rmseval'][olab] = lf3[tkeys.index(' RMSE')]
                    data['results']['madpval'][olab] = lf3[tkeys.index(' MADp')]
            else:
                for i in range(len(kl1)):
                    data['results'][kl1[i]] = lf3[tkeys.index(kl2[i])]
                # Check for validation set
                if len(vargs) > 0:
                    lf3 = lf2[lf2_ind + 2].split(',')
                    data['results']['ssrval'] = lf3[tkeys.index(' SSE')]
                    data['results']['R2val'] = lf3[tkeys.index(' R2')]
                    data['results']['rmseval'] = lf3[tkeys.index(' RMSE')]
                    data['results']['madpval'] = lf3[tkeys.index(' MADp')]
        else:
            if debug['outkeys']:
                data['results']['ssr'][olab] = lf3[tkeys.index(kl2[0])]
            else:
                data['results']['ssr'] = lf3[tkeys.index(kl2[0])]
        ln = ln + 1
Beispiel #8
0
def alamo(xdata, zdata, **kwargs):
    """ [almmodel] = doalamo(xdata,zdata, xvaldata, zvaldata,addopt=vals)

    Args:
        xdata (numpy.array or list[real])
        zdata (numpy.array or list[real)
        kwargs: Additional options may be specified and will be applied
                to the .alm
          -  example -  monomialpower=(1,2,3,4) 
          -  xlabels       : labels given to input variables
          -  zlabels       : labels given to outputs
          -  xval          : validaiton data for alamo
          -  zval          : response validation data for alamo
          -  modeler       : modeler value used in alamo
          -  solvemip      : force alamo to solve mip if gams is availible
          -  linfcns       : 0-1 option to include linear transformations
          -  expfcns       : 0-1 option to include exponential transformations
          -  logfcns       : 0-1 option to include logarithmic transformations
          -  sinfcns       : 0-1 option to include sine transformations
          -  cosfcns       : 0-1 option to include cosine transformations
          -  monomialpower : list of monomial powers
          -  multi2power   : list of binomial powers
          -  multi3power   : list of trinomials
          -  ratiopower    : list of ratio powers
          -  screener      : screening method
          -  almname       : specify a name for the .alm file
          -  savescratch   : saves .alm and .lst
          -  savetrace     : saves trace file
          -  expandoutput  : add a key to the output dictionary for the output
                             (must be on for inputs(outputs?#Engle)>1)
          -  almopt        : direct text appending
                             the option almopt=<file> will append a file to the
                             end of the .alm and can be used to facilitate
                             direct access to the .alm (no current checks)
          -  loo           : leave one out evaluation
          -  lmo           : leave many out evaluation
    Returns:
        dict: An ALAMO model with the following keys
          -  'model'    : algebraic form of model
          -  'f(model)' : a callable lambda function
          -   Syntac is depended on expandout
               syntax => almmodel['f(model)']['out'](inputs,sep,by,comma)
                         almmodel['f(model)'](inputs,sep,by,comma)
          -  'ssr'      : SSE on training set provided
          -  'R2'       : R2 on training set provided
          -  'ssrval'   : SSE on testing set if provided
          -  'R2val'    : R2 on testing set if provided
    """
    data, debug = alamopy.data, alamopy.debug

    # patched together validation data check
    if 'xval' in kwargs.keys():
        vargs = (kwargs['xval'], kwargs['zval'])
    else:
        vargs = ()

    xdata, zdata, xvaldata, zvaldata = \
        setupData(data, debug, xdata, zdata, vargs, kwargs)
    manageArguments(xdata, zdata, data, debug, kwargs)

    data['results'] = {}

    writeCustomALAMOOptions(kwargs)  # New Custom Options MENGLE

    # Cross Validation
    if debug['loo']:
        q2 = []
        if debug['outkeys'] and debug['expandoutput']:
            q2 = {}

        # size = len(xdata) - 1
        data['opts']['ndata'] = data['opts']['ndata'] - 1 
        kwargValidation = debug['validation']
        kwargSaveTrace = debug['savetrace']
        if kwargValidation:
            kwargNvaldata = data['opts']['nvaldata']

        data['opts']['nvaldata'] = 1
        debug['validation'] = True
        debug['savetrace'] = False
        for i in range(0, len(xdata)):
            cvxdata = [x for y, x in enumerate(xdata) if y != i]
            cvzdata = [x for y, x in enumerate(zdata) if y != i]          
            alamopy.almwriter(data, debug, 
                              (cvxdata, cvzdata, [xdata[i][:]], [zdata[i][:]]),
                              kwargs)

            # Calling ALAMO
            if not debug['mock']:
                os.system(debug['almloc'] + " " 
                          + str(data['stropts']['almname']) + " > logscratch")

            data['results'] = {}
            readTraceFile([xdata[i][:], zdata[i][:]], data, debug)

            if debug['outkeys'] and debug['expandoutput']:
                for k in data['results']['R2'].keys():
                    if k not in q2.keys():
                        q2[k] = [float(data['results']['R2val'][k])]
                    else:
                        q2sub = q2[k]
                        q2sub.append(float(data['results']['R2val'][k]))
                        q2[k] = q2sub
            else:
                q2.append(float(data['results']['R2val']))
            cleanFiles(data, debug)

        if debug['outkeys'] and debug['expandoutput']:
            data['results']['Q2'] = {}
            for k in q2.keys():
                Q2 = np.mean(q2[k])
                data['results']['Q2'][k] = Q2
                print("%s: Running cross validation LOO, evaluated Q2:%f" % (k, Q2))
        else:
            Q2 = np.mean(q2)
            data['results']['Q2'] = Q2
            print("Running cross validation LOO, evaluated Q2:%f" % Q2)

        del data['opts']['nvaldata']
        debug['validation'] = kwargValidation
        debug['savetrace'] = kwargSaveTrace
        if kwargValidation:
            data['opts']['nvaldata'] = kwargNvaldata
        data['opts']['ndata'] = data['opts']['ndata'] + 1
    elif debug['lmo'] > 0:
        q2 = []
        if debug['outkeys'] and debug['expandoutput']:
            q2 = {}

        kwargNdata = data['opts']['ndata']
        kwargValidation = debug['validation']
        kwargSaveTrace = debug['savetrace']
        if kwargValidation:
            kwargNvaldata = data['opts']['nvaldata']

        debug['validation'] = True
        debug['savetrace'] = False
        numOfFolds = debug['lmo']
        print(xdata)
        if numOfFolds > len(xdata):
            raise Exception('Number of Cross validation \
                            folds exceeds the number of data points')

        # size = len(xdata)
        sizeOfFolds = int(len(xdata) / numOfFolds)
        r = len(xdata) % numOfFolds 
        remS = 0
        remE = 1

        for i in range(numOfFolds):
            if i < r + 1:
                remS = i
                remE = i + 1
            cvvalxdata = xdata[remS + sizeOfFolds * i : sizeOfFolds * (i + 1) + remE]
            cvvalzdata = zdata[remS + sizeOfFolds * i : sizeOfFolds * (i + 1) + remE]
            if i == 0:
                cvxdata = xdata[sizeOfFolds * (i + 1) + remE:]
                cvzdata = zdata[sizeOfFolds * (i + 1) + remE:]
            else: 
                cvxdata = np.concatenate([xdata[0:remS + sizeOfFolds * i],
                                          xdata[sizeOfFolds * (i + 1) + remE:]])
                cvzdata = np.concatenate([zdata[0:remS + sizeOfFolds * i], 
                                          zdata[sizeOfFolds * (i + 1) + remE:]])

            data['opts']['nvaldata'] = len(cvvalxdata)
            data['opts']['ndata'] = len(cvxdata)

            alamopy.almwriter(data, debug, 
                              (cvxdata, cvzdata, cvvalxdata, cvvalzdata),
                              kwargs)

            # Calling ALAMO
            if not debug['mock']:
                os.system(debug['almloc'] + " " 
                          + str(data['stropts']['almname']) + " > logscratch")

            data['results'] = {}
            expandOutput(xdata, zdata, [cvvalxdata, cvvalzdata], data, debug)
            readTraceFile([cvvalxdata, cvvalzdata], data, debug)

            if debug['outkeys'] and debug['expandoutput']:
                for k in data['results']['R2'].keys():
                    if k not in q2.keys():
                        q2[k] = [float(data['results']['R2val'][k])]
                else:
                    q2sub = q2[k]
                    q2sub.append(float(data['results']['R2val'][k]))
                    q2[k] = q2sub
            else:
                q2.append(float(data['results']['R2val']))
            cleanFiles(data, debug)

        if debug['outkeys'] and debug['expandoutput']:
            data['results']['Q2'] = {}
            for k in q2.keys():
                Q2 = np.mean(q2[k])
                data['results']['Q2'][k] = Q2
                print("%s: Running cross validation LMO, evaluated Q2:%f" % (k, Q2))
        else:
            Q2 = np.mean(q2)
            data['results']['Q2'] = Q2
            print("Running cross validation LMO, evaluated Q2:%f" % Q2)

        del data['opts']['nvaldata']
        debug['validation'] = kwargValidation
        debug['savetrace'] = kwargSaveTrace
        if kwargValidation:
            data['opts']['nvaldata'] = kwargNvaldata
        data['opts']['ndata'] = kwargNdata

    # Write alamo file
    if debug['validation']:
        alamopy.almwriter(data, debug, (xdata, zdata, xvaldata, zvaldata), kwargs)
    else:
        alamopy.almwriter(data, debug, (xdata, zdata), kwargs)

    # Call alamo from the terminal
    if not debug['mock']:
        if debug['showalm']:
            os.system(debug['almloc'] + " " + str(data['stropts']['almname']))
        else:
            writethis('Calling ALAMO now:\n')
            os.system(debug['almloc'] + " " + str(data['stropts']['almname']) 
                      + " > logscratch")

    # Check to see if additional data was sampled and add it
    if 'sampler' in kwargs.keys():
        xdata, zdata = checkForSampledData(data, debug)

    # calculate additional statistics
    expandOutput(xdata, zdata, vargs, data, debug)

    # Open the trace file and pull appropriate results
    readTraceFile(vargs, data, debug)

    # write python file of regressed model
    alamopy.almpywriter(data)
    if debug['cvfun']:
        alamopy.almcvwriter(data)

    # add <>alm.py to results dict
    data['results']['pymodel'] = data['stropts']['almname'].split('.')[0] + 'alm' 

    if debug['loo'] or debug['lmo'] > 0:
        Q2, R2, diff = 0, 0, 0
        if debug['outkeys']:
            for k in data['results']['R2'].keys():          
                R2 = data['results']['R2'][k]
                if k in data['results']['Q2'].keys():
                    Q2 = data['results']['Q2'][k]
                    diff = float(R2) - float(Q2)
                    if Q2 < 0.5:
                        print("%s: Q2 suggests this is not a predictive model, \
                              Q2: %f, R2: %s" % (k, Q2, R2))
                    elif diff < 0.3:
                        print('%s: The difference of R2-Q2 is  %f. This is an acceptable \
                              difference for predictability, Q2: %f, R2: %s' 
                              % (k, diff, Q2, R2))
                    else:
                        print('%s: The difference of R2-Q2 is %f. The surrogate model is \
                              not able to predict the data reliably, Q2: %f, R2: %s' 
                              % (k, diff, Q2, R2))

        else: 
            R2 = data['results']['R2']
            Q2 = data['results']['Q2']
            diff = float(R2) - float(Q2)

            if Q2 < 0.5:
                print("Q2 suggests this is not a predictive model, Q2: %f, R2: %s" 
                      % (Q2, R2))
            elif diff < 0.3:
                print('The difference of R2-Q2 is  %f. This is an acceptable difference for \
                      predictability, Q2: %f, R2: %s' % (diff, Q2, R2))
            else:
                print('The difference of R2-Q2 is %f. The surrogate model is not able to \
                      predict the data reliably, Q2: %f, R2: %s' % (diff, Q2, R2))

    cleanFiles(data, debug, pywrite=True, **kwargs)

    return data['results']
Beispiel #9
0
def readTraceFile(vargs, data, debug):
    """ Read the alamo trace file to read in the model and metrics

      Args:
          data/debug: shared default options for .alm file
          vargs:  Validation data

    """

    trace_file = data['stropts']['tracefname']
    try:
        lf = open(trace_file).read()
    except IOError as err:
        if debug['mock']:
          data['results']['clrtime'] = '0'
          data['results']['size']='6'
          data['results']['numolr']='16960'
          data['results']['othertime'] = '0.8799995E-01'
          data['results']['olrtime'] = '0.10800002'
          data['results']['miptime']='0'
          data['results']['version']='2018.4.3'
          data['results']['status']='0'
          data['results']['R2']='1'
          data['results']['numclr']='0'
          data['results']['nummip']='0'
          data['results']['ssr']='0.169E-21'
          data['results']['pymodel']='cam6alm'
          data['results']['totaltime']='0.1760001'
          data['results']['rmse']='0.255E-11'
          data['results']['madp']='0.814E-09'
          data['results']['model']='  z1 = 3.9999999999884194856747 * x1^2 - 3.9999999999873385725380 * x2^2 - 2.0999999999876837186719 * x1^4 + 3.9999999999879496392907 * x2^4 + 0.33333333333014281141260 * x1^6 + 1.0000000000008837375276 * x1*x2'
          data['results']['nbas']='15'

          if debug['expandoutput']:
            data['results']['ssrval']=0
            data['results']['R2val']=0
            data['results']['rmseval']=0
            data['results']['madpval']=0
          return
        else:
          raise almerror.AlamoError('Cannot read from trace file "{}": {}'
                                  .format(trace_file, err))

    try:  
        import sympy
        from sympy.parsing.sympy_parser import parse_expr
        from sympy import symbols, lambdify
    except:
        writethis('Cannot install sympy')


    lf2 = lf.split('\n')
    tkeys=lf2[0].split(',')
    kl1=list(['ssr','rmse','R2','size','nbas','totaltime','olrtime','miptime','clrtime','othertime','version','status','madp','numolr','nummip','numclr','ninputs'])
    kl2=list([' SSE',' RMSE',' R2',' ModelSize',' nBasInitAct',' TotalTime', ' OLRTime',' MIPTime',' CLRTime',' OtherTime', ' AlamoVersion',' AlamoStatus',' MADp',' numOLRs',' NumMIPs',' numCLRs',' NINPUTS'])
    # Construct results for training data (&val if provided)
    ln=1
    wlparam=data['opts']['noutputs']
    if (len(vargs) > 0):
        wlparam=2*wlparam
    else:
        wlparam=wlparam+1
    while ln < wlparam:
        lf3=lf2[ln].split(',')
        # Reapply the saved labels for the output
        model=lf3[tkeys.index(' Model')]
    #    for label in data['labs']['savexlabels']:
        for i in range(data['opts']['ninputs']):
            label=data['labs']['xlinks'][i][0]
            # Now is a convenient time to collect information that will be used in the
            # confidence interval analysis
            model=model.replace(str(label),str(data['labs']['xlinks'][i][1]))
        for i in range(data['opts']['noutputs']):
            label=data['labs']['zlinks'][i][0]
            model=model.replace(str(label),str(data['labs']['zlinks'][i][1]))
        # determine which output label to write 
        # if debug['outkeys'] == True use olab as a key if not dont
        if debug['outkeys']:
            olab = model.split('=')[0]
            olab=olab.replace(' ','')
            data['results']['model'][olab]=model
            #Record tokenized model for each output
            data['results']['f(model)'][olab] = lambdify([symbols(data['labs']['savexlabels'])], parse_expr(model.split('=')[1].replace('^','**')), "numpy")
        else:
            data['results']['model']=model
            data['results']['f(model)']=lambdify([symbols(data['labs']['savexlabels'])], parse_expr(model.split('=')[1].replace('^','**')), "numpy")
        if debug['expandoutput']:
            if debug['outkeys']:
                for i in range(len(kl1)):
                    data['results'][kl1[i]][olab]=lf3[tkeys.index(kl2[i])]
                # Check for validation set
                if len(vargs)>0:
                    lf3=lf2[2].split(',')
                    data['results']['ssrval'][olab]=lf3[tkeys.index(' SSE')]
                    data['results']['R2val'][olab]=lf3[tkeys.index(' R2')]
                    data['results']['rmseval'][olab]=lf3[tkeys.index(' RMSE')]
                    data['results']['madpval'][olab]=lf3[tkeys.index(' MADp')]
            else:
                for i in range(len(kl1)):
                    data['results'][kl1[i]]=lf3[tkeys.index(kl2[i])]
                # Check for validation set
                if len(vargs)>0:
                    lf3=lf2[2].split(',')
                    data['results']['ssrval']=lf3[tkeys.index(' SSE')]
                    data['results']['R2val']=lf3[tkeys.index(' R2')]
                    data['results']['rmseval']=lf3[tkeys.index(' RMSE')]
                    data['results']['madpval']=lf3[tkeys.index(' MADp')]
        else:
            if debug['outkeys']:
                data['results']['ssr'][olab]=lf3[tkeys.index(kl2[0])]
            else:
                data['results']['ssr']=lf3[tkeys.index(kl2[0])]
        ln=ln+1