Esempio n. 1
0
def PandasDataFrametoDF(df):
    '''Convert Pandas DataFrame to DF struct.'''

    if df is None:
        raise RuntimeError("PandasDataFrametoDF() empty DataFrame")

    # Here is a fundamental problem/incompatability between cppEDM and pyEDM
    # cppEDM DataFrame stores time vector as strings, and data as valarray
    # time values are not strictly required DataFrame( noTime = true )
    # Here, we don't have a way to know if the Pandas dataframe passed in
    # will have a time vector or not... So... We will just require/assume
    # that the first column is ALWAYS a time or index vector.
    timeName = df.columns[0]
    timeVec = df.get(timeName)
    time = [str(x) for x in timeVec]  # convert to list of strings

    dataList = []

    # Add time series data, Skipping the first column!!!
    for column in df.columns[1:]:
        dataList.append((column, df.get(column).tolist()))

    # cppEDM DF struct
    DF = pyBindEDM.DF()
    DF.timeName = timeName
    DF.time = time
    DF.dataList = dataList

    return DF
Esempio n. 2
0
def Embed(pathIn="./",
          dataFile="",
          dataFrame=None,
          E=0,
          tau=1,
          columns="",
          verbose=False):
    '''Takens time-delay embedding on path/file.
       Embed DataFrame columns (subset) in E dimensions.
       Calls MakeBlock() after validation and column subset selection.'''

    # Establish DF as empty list or Pandas DataFrame for Embed()
    if dataFile:
        DF = pyBindEDM.DF()
    elif isinstance(dataFrame, DataFrame):
        if dataFrame.empty:
            raise Exception("Embed(): dataFrame is empty.")
        DF = pyEDM.AuxFunc.PandasDataFrametoDF(dataFrame)
    else:
        raise Exception("Embed(): Invalid data input.")

    # D is a Python dict from pybind11 < cppEDM Embed
    D = pyBindEDM.Embed(pathIn, dataFile, DF, E, tau, columns, verbose)

    df = DataFrame(D)  # Convert to pandas DataFrame

    return df
Esempio n. 3
0
def Multiview(pathIn="./",
              dataFile="",
              dataFrame=None,
              pathOut="./",
              predictFile="",
              lib="",
              pred="",
              D=0,
              E=1,
              Tp=1,
              knn=0,
              tau=-1,
              columns="",
              target="",
              multiview=0,
              exclusionRadius=0,
              trainLib=True,
              excludeTarget=False,
              verbose=False,
              numThreads=4,
              showPlot=False):
    '''Multiview prediction on path/file.'''

    # Establish DF as empty list or Pandas DataFrame for Multiview()
    if dataFile:
        DF = pyBindEDM.DF()
    elif isinstance(dataFrame, DataFrame):
        if dataFrame.empty:
            raise Exception("Multiview(): dataFrame is empty.")
        DF = pyEDM.AuxFunc.PandasDataFrametoDF(dataFrame)
    else:
        raise Exception("Multiview(): Invalid data input.")

    # If lib, pred, columns are not string, but iterable, convert to string
    if pyEDM.AuxFunc.NotStringIterable(lib):
        lib = ' '.join(map(str, lib))
    if pyEDM.AuxFunc.NotStringIterable(pred):
        pred = ' '.join(map(str, pred))
    if pyEDM.AuxFunc.NotStringIterable(columns):
        columns = ' '.join(map(str, columns))

    # D is a Python dict from pybind11 < cppEDM Multiview:
    #  { "View" : < vector< string >, "Predictions" : {} }
    D = pyBindEDM.Multiview(pathIn, dataFile, DF, pathOut, predictFile, lib,
                            pred, D, E, Tp, knn, tau, columns, target,
                            multiview, exclusionRadius, trainLib,
                            excludeTarget, verbose, numThreads)

    df_pred = DataFrame(D['Predictions'])  # Convert to pandas DataFrame
    view = DataFrame(D['View'])

    if showPlot:
        pyEDM.AuxFunc.PlotObsPred(df_pred, dataFile, E, Tp)

    MV = {'Predictions': df_pred, 'View': view}

    return MV
Esempio n. 4
0
def PredictNonlinear(pathIn="./",
                     dataFile="",
                     dataFrame=None,
                     pathOut="./",
                     predictFile="",
                     lib="",
                     pred="",
                     theta="",
                     E=1,
                     Tp=1,
                     knn=0,
                     tau=-1,
                     columns="",
                     target="",
                     embedded=False,
                     verbose=False,
                     numThreads=4,
                     showPlot=True):
    '''Estimate S-map localisation over theta.'''

    # Establish DF as empty list or Pandas DataFrame for PredictNonlinear()
    if dataFile:
        DF = pyBindEDM.DF()
    elif isinstance(dataFrame, DataFrame):
        if dataFrame.empty:
            raise Exception("PredictNonlinear(): dataFrame is empty.")
        DF = pyEDM.AuxFunc.PandasDataFrametoDF(dataFrame)
    else:
        raise Exception("PredictNonlinear(): Invalid data input.")

    # If lib, pred, columns are not string, but iterable, convert to string
    if pyEDM.AuxFunc.NotStringIterable(lib):
        lib = ' '.join(map(str, lib))
    if pyEDM.AuxFunc.NotStringIterable(pred):
        pred = ' '.join(map(str, pred))
    if pyEDM.AuxFunc.NotStringIterable(columns):
        columns = ' '.join(map(str, columns))

    # D is a Python dict from pybind11 < cppEDM PredictNonlinear
    D = pyBindEDM.PredictNonlinear(pathIn, dataFile, DF, pathOut, predictFile,
                                   lib, pred, theta, E, Tp, knn, tau, columns,
                                   target, embedded, verbose, numThreads)

    df = DataFrame(D)  # Convert to pandas DataFrame

    if showPlot:
        title = dataFile + "\nE=" + str(E)

        ax = df.plot('Theta', 'rho', title=title, linewidth=3)
        ax.set(xlabel="S-map Localisation (θ)", ylabel="Prediction Skill ρ")
        show()

    return df
Esempio n. 5
0
def EmbedDimension(pathIn="./",
                   dataFile="",
                   dataFrame=None,
                   pathOut="./",
                   predictFile="",
                   lib="",
                   pred="",
                   maxE=10,
                   Tp=1,
                   tau=-1,
                   columns="",
                   target="",
                   embedded=False,
                   verbose=False,
                   numThreads=4,
                   showPlot=True):
    '''Estimate optimal embedding dimension [1:maxE].'''

    # Establish DF as empty list or Pandas DataFrame for EmbedDimension()
    if dataFile:
        DF = pyBindEDM.DF()
    elif isinstance(dataFrame, DataFrame):
        if dataFrame.empty:
            raise Exception("EmbedDimension(): dataFrame is empty.")
        DF = pyEDM.AuxFunc.PandasDataFrametoDF(dataFrame)
    else:
        raise Exception("EmbedDimension(): Invalid data input.")

    # If lib, pred, columns are not string, but iterable, convert to string
    if pyEDM.AuxFunc.NotStringIterable(lib):
        lib = ' '.join(map(str, lib))
    if pyEDM.AuxFunc.NotStringIterable(pred):
        pred = ' '.join(map(str, pred))
    if pyEDM.AuxFunc.NotStringIterable(columns):
        columns = ' '.join(map(str, columns))

    # D is a Python dict from pybind11 < cppEDM CCM
    D = pyBindEDM.EmbedDimension(pathIn, dataFile, DF, pathOut, predictFile,
                                 lib, pred, maxE, Tp, tau, columns, target,
                                 embedded, verbose, numThreads)

    df = DataFrame(D)  # Convert to pandas DataFrame

    if showPlot:
        title = dataFile + "\nTp=" + str(Tp)

        ax = df.plot('E', 'rho', title=title, linewidth=3)
        ax.set(xlabel="Embedding Dimension", ylabel="Prediction Skill ρ")
        show()

    return df
Esempio n. 6
0
def SMap(pathIn="./",
         dataFile="",
         dataFrame=None,
         pathOut="./",
         predictFile="",
         lib="",
         pred="",
         E=0,
         Tp=1,
         knn=0,
         tau=1,
         theta=0,
         exclusionRadius=0,
         columns="",
         target="",
         smapFile="",
         jacobians="",
         embedded=False,
         verbose=False,
         const_pred=False,
         showPlot=False):
    '''S-Map prediction on path/file.'''

    # Establish DF as empty list or Pandas DataFrame for SMap()
    if dataFile:
        DF = pyBindEDM.DF()
    elif isinstance(dataFrame, DataFrame):
        if dataFrame.empty:
            raise Exception("SMap(): dataFrame is empty.")
        DF = pyEDM.AuxFunc.PandasDataFrametoDF(dataFrame)
    else:
        raise Exception("SMap(): Invalid data input.")

    # D is a Python dict from pybind11 < cppEDM SMap:
    #  { "predictions" : {}, "coefficients" : {} }
    D = pyBindEDM.SMap(pathIn, dataFile, DF, pathOut, predictFile, lib, pred,
                       E, Tp, knn, tau, theta, exclusionRadius, columns,
                       target, smapFile, jacobians, embedded, const_pred,
                       verbose)

    df_pred = DataFrame(D['predictions'])  # Convert to pandas DataFrame
    df_coef = DataFrame(D['coefficients'])  # Convert to pandas DataFrame

    if showPlot:
        pyEDM.AuxFunc.PlotObsPred(df_pred, dataFile, E, Tp, False)
        pyEDM.AuxFunc.PlotCoeff(df_coef, dataFile, E, Tp)

    SMapDict = {'predictions': df_pred, 'coefficients': df_coef}

    return SMapDict
Esempio n. 7
0
def Simplex(pathIn="./",
            dataFile="",
            dataFrame=None,
            pathOut="./",
            predictFile="",
            lib="",
            pred="",
            E=0,
            Tp=1,
            knn=0,
            tau=-1,
            exclusionRadius=0,
            columns="",
            target="",
            embedded=False,
            verbose=False,
            const_pred=False,
            showPlot=False):
    '''Simplex prediction on path/file.'''

    # Establish DF as empty list or Pandas DataFrame for Simplex()
    if dataFile:
        DF = pyBindEDM.DF()
    elif isinstance(dataFrame, DataFrame):
        if dataFrame.empty:
            raise Exception("Simplex(): dataFrame is empty.")
        DF = pyEDM.AuxFunc.PandasDataFrametoDF(dataFrame)
    else:
        raise Exception("Simplex(): Invalid data input.")

    # If lib, pred, columns are not string, but iterable, convert to string
    if pyEDM.AuxFunc.NotStringIterable(lib):
        lib = ' '.join(map(str, lib))
    if pyEDM.AuxFunc.NotStringIterable(pred):
        pred = ' '.join(map(str, pred))
    if pyEDM.AuxFunc.NotStringIterable(columns):
        columns = ' '.join(map(str, columns))

    # D is a Python dict from pybind11 < cppEDM Simplex
    D = pyBindEDM.Simplex(pathIn, dataFile, DF, pathOut, predictFile, lib,
                          pred, E, Tp, knn, tau, exclusionRadius, columns,
                          target, embedded, const_pred, verbose)

    df = DataFrame(D)  # Convert to pandas DataFrame

    if showPlot:
        pyEDM.AuxFunc.PlotObsPred(df, dataFile, E, Tp)

    return df
Esempio n. 8
0
def CCM(pathIn="./",
        dataFile="",
        dataFrame=None,
        pathOut="./",
        predictFile="",
        E=0,
        Tp=0,
        knn=0,
        tau=1,
        columns="",
        target="",
        libSizes="",
        sample=0,
        random=True,
        replacement=False,
        seed=0,
        verbose=False,
        showPlot=False):
    '''Convergent Cross Mapping on path/file.'''

    # Establish DF as empty list or Pandas DataFrame for CCM()
    if dataFile:
        DF = pyBindEDM.DF()
    elif isinstance(dataFrame, DataFrame):
        if dataFrame.empty:
            raise Exception("CCM(): dataFrame is empty.")
        DF = pyEDM.AuxFunc.PandasDataFrametoDF(dataFrame)
    else:
        raise Exception("CCM(): Invalid data input.")

    # D is a Python dict from pybind11 < cppEDM CCM
    D = pyBindEDM.CCM(pathIn, dataFile, DF, pathOut, predictFile, E, Tp, knn,
                      tau, columns, target, libSizes, sample, random,
                      replacement, seed, verbose)

    df = DataFrame(D)  # Convert to pandas DataFrame

    if showPlot:
        title = dataFile + "\nE=" + str(E)

        ax = df.plot('LibSize', [df.columns[1], df.columns[2]],
                     title=title,
                     linewidth=3)
        ax.set(xlabel="Library Size", ylabel="Correlation ρ")
        axhline(y=0, linewidth=1)
        show()

    return df
Esempio n. 9
0
def PredictInterval(pathIn="./",
                    dataFile="",
                    dataFrame=None,
                    pathOut="./",
                    predictFile="",
                    lib="",
                    pred="",
                    maxTp=10,
                    E=1,
                    tau=1,
                    columns="",
                    target="",
                    embedded=False,
                    verbose=False,
                    numThreads=4,
                    showPlot=True):
    '''Estimate optimal prediction interval [1:maxTp]'''

    # Establish DF as empty list or Pandas DataFrame for PredictInterval()
    if dataFile:
        DF = pyBindEDM.DF()
    elif isinstance(dataFrame, DataFrame):
        if dataFrame.empty:
            raise Exception("PredictInterval(): dataFrame is empty.")
        DF = pyEDM.AuxFunc.PandasDataFrametoDF(dataFrame)
    else:
        raise Exception("PredictInterval(): Invalid data input.")

    # D is a Python dict from pybind11 < cppEDM PredictInterval
    D = pyBindEDM.PredictInterval(pathIn, dataFile, DF, pathOut, predictFile,
                                  lib, pred, maxTp, E, tau, columns, target,
                                  embedded, verbose, numThreads)

    df = DataFrame(D)  # Convert to pandas DataFrame

    if showPlot:
        title = dataFile + "\nE=" + str(E)

        ax = df.plot('Tp', 'rho', title=title, linewidth=3)
        ax.set(xlabel="Forecast Interval", ylabel="Prediction Skill ρ")
        show()

    return df
Esempio n. 10
0
def PandasDataFrametoDF( df ):
    '''Convert Pandas DataFrame to DF struct.'''

    if df is None :
        raise RuntimeError( "PandasDataFrametoDF() empty DataFrame" )

    # Here is a fundamental problem/incompatability between cppEDM and pyEDM
    # cppEDM DataFrame stores time vector as strings, and data as valarray
    # time values are not strictly required DataFrame( noTime = true )
    # Here, we don't have a way to know if the Pandas dataframe passed in
    # will have a time vector or not... So... We will just require/assume
    # that the first column is ALWAYS a time or index vector.
    # Validate that at least 2 columns are provided
    if df.shape[1] < 2:
        raise RuntimeError( "PandasDataFrametoDF() DataFrame must have"
                            " at least 2 columns. First column is time." )
    timeName = df.columns[0]
    timeVec  = df.get( timeName )
    time     = [ str( x ) for x in timeVec ] # convert to list of strings

    # Also require data homogeneity : all numeric, no mixed-data
    # but allow a Time first column that is an object...
    # in .dtypes: non-numerics are converted to dtype "object"
    if any( df.dtypes[1:] == "object" ) :
        print( df.dtypes )
        raise RuntimeError( "PandasDataFrametoDF() non-numeric data is not"
                            " allowed in a DataFrame." )
    
    dataList = []

    # Add time series data, Skipping the first column!!!
    for column in df.columns[1:] :
        dataList.append( ( column, df.get( column ).tolist() ) )

    # cppEDM DF struct
    DF          = pyBindEDM.DF()
    DF.timeName = timeName
    DF.time     = time
    DF.dataList = dataList
    
    return DF
Esempio n. 11
0
def PredictInterval( pathIn          = "./",
                     dataFile        = "",
                     dataFrame       = None,
                     pathOut         = "./",
                     predictFile     = "",
                     lib             = "",
                     pred            = "",
                     maxTp           = 10,
                     E               = 1,
                     tau             = -1,
                     exclusionRadius = 0,
                     columns         = "",
                     target          = "",
                     embedded        = False,
                     verbose         = False,
                     validLib        = [],
                     numThreads      = 4,
                     showPlot        = True ):
    '''Estimate optimal prediction interval [1:maxTp]'''

    # Establish DF as empty list or Pandas DataFrame for PredictInterval()
    if dataFile :
        DF = pyBindEDM.DF()
    elif isinstance( dataFrame, DataFrame ) :
        if dataFrame.empty :
            raise Exception( "PredictInterval(): dataFrame is empty." )
        DF = pyEDM.AuxFunc.PandasDataFrametoDF( dataFrame )
    else :
        raise Exception( "PredictInterval(): Invalid data input." )

     # If lib, pred, columns are not string, but iterable, convert to string
    if pyEDM.AuxFunc.NotStringIterable( lib ) :
        lib = ' '.join( map( str, lib ) )
    if pyEDM.AuxFunc.NotStringIterable( pred ) :
        pred = ' '.join( map( str, pred ) )
    if pyEDM.AuxFunc.NotStringIterable( columns ) :
        columns = ' '.join( map( str, columns ) )

    # D is a Python dict from pybind11 < cppEDM PredictInterval
    D = pyBindEDM.PredictInterval( pathIn,
                                   dataFile,
                                   DF,
                                   pathOut,
                                   predictFile,
                                   lib,
                                   pred, 
                                   maxTp,
                                   E,
                                   tau,
                                   exclusionRadius,
                                   columns,
                                   target,
                                   embedded,
                                   verbose,
                                   validLib,
                                   numThreads )

    df = DataFrame( D ) # Convert to pandas DataFrame

    if showPlot :
        if embedded :
            E = len( columns.split() )
        title = dataFile + "\nE=" + str( E )

        ax = df.plot( 'Tp', 'rho', title = title, linewidth = 3 )
        ax.set( xlabel = "Forecast Interval",
                ylabel = "Prediction Skill ρ" )
        show()

    return df
Esempio n. 12
0
def CCM( pathIn           = "./",
         dataFile         = "",
         dataFrame        = None,
         pathOut          = "./",
         predictFile      = "",
         E                = 0, 
         Tp               = 0,
         knn              = 0,
         tau              = -1,
         exclusionRadius  = 0,
         columns          = "",
         target           = "",
         libSizes         = "",
         sample           = 0,
         random           = True,
         replacement      = False,
         seed             = 0,
         embedded         = False,
         includeData      = False,
         parameterList    = False,
         verbose          = False,
         showPlot         = False ) :
    '''Convergent Cross Mapping on path/file.'''

    # Establish DF as empty list or Pandas DataFrame for CCM()
    if dataFile :
        DF = pyBindEDM.DF()
    elif isinstance( dataFrame, DataFrame ) :
        if dataFrame.empty :
            raise Exception( "CCM(): dataFrame is empty." )
        DF = pyEDM.AuxFunc.PandasDataFrametoDF( dataFrame )
    else :
        raise Exception( "CCM(): Invalid data input." )

    # If columns, libSizes, target are not string, but iterable, convert to string
    if pyEDM.AuxFunc.NotStringIterable( columns ) :
        columns = ' '.join( map( str, columns ) )
    if pyEDM.AuxFunc.NotStringIterable( libSizes ) :
        libSizes = ' '.join( map( str, libSizes ) )
    if pyEDM.AuxFunc.NotStringIterable( target ) :
        target = ' '.join( map( str, target ) )

    # D is a Python dict from pybind11 < cppEDM CCM
    D = pyBindEDM.CCM( pathIn,
                       dataFile,
                       DF,
                       pathOut,
                       predictFile,
                       E, 
                       Tp,
                       knn,
                       tau,
                       exclusionRadius,
                       columns,
                       target,
                       libSizes,
                       sample,
                       random,
                       replacement,
                       seed,
                       embedded,
                       includeData,
                       parameterList,
                       verbose )

    # D has { "LibMeans" : DF }
    # and if includeData has : { PredictStats1 : DF, PredictStats2 : DF }
    libMeans = DataFrame( D[ "LibMeans" ] ) # Convert to pandas DataFrame

    # If includeData, create dict with means and individual prediction stats
    if includeData :
        CM = { 'LibMeans'      : libMeans,
               'PredictStats1' : DataFrame( D[ "PredictStats1" ] ),
               'PredictStats2' : DataFrame( D[ "PredictStats2" ] ) }

    if parameterList and includeData :
        CM[ 'parameters' ] = D[ 'parameters' ]

    if showPlot :
        title = dataFile + "\nE=" + str(E)

        ax = libMeans.plot( 'LibSize',
                            [ libMeans.columns[1], libMeans.columns[2] ],
                            title = title, linewidth = 3 )
        ax.set( xlabel = "Library Size", ylabel = "Correlation ρ" )
        axhline( y = 0, linewidth = 1 )
        show()

    if includeData :
        return CM
    else :
        return libMeans
Esempio n. 13
0
def SMap( pathIn          = "./",
          dataFile        = "",
          dataFrame       = None,
          pathOut         = "./",
          predictFile     = "",
          lib             = "",
          pred            = "",
          E               = 0, 
          Tp              = 1,
          knn             = 0,
          tau             = -1,
          theta           = 0,
          exclusionRadius = 0,
          columns         = "",
          target          = "",
          smapFile        = "",
          jacobians       = "",
          solver          = None,
          embedded        = False,
          verbose         = False,
          const_pred      = False,
          showPlot        = False,
          validLib        = [],
          generateSteps   = 0,
          parameterList   = False
          ):
    '''S-Map prediction on path/file.'''

    # Establish DF as empty list or Pandas DataFrame for SMap()
    if dataFile :
        DF = pyBindEDM.DF() 
    elif isinstance( dataFrame, DataFrame ) :
        if dataFrame.empty :
            raise Exception( "SMap(): dataFrame is empty." )
        DF = pyEDM.AuxFunc.PandasDataFrametoDF( dataFrame )
    else :
        raise Exception( "SMap(): Invalid data input." )

    # If lib, pred, columns are not string, but iterable, convert to string
    if pyEDM.AuxFunc.NotStringIterable( lib ) :
        lib = ' '.join( map( str, lib ) )
    if pyEDM.AuxFunc.NotStringIterable( pred ) :
        pred = ' '.join( map( str, pred ) )
    if pyEDM.AuxFunc.NotStringIterable( columns ) :
        columns = ' '.join( map( str, columns ) )

    # Validate the solver if one was passed in
    if solver :
        supportedSolvers = [ 'LinearRegression',
                             'Ridge',   'Lasso',   'ElasticNet',
                             'RidgeCV', 'LassoCV', 'ElasticNetCV' ]
        if not solver.__class__.__name__ in supportedSolvers :
            raise Exception( "SMap(): Invalid solver." )

    # D is a Python dict from pybind11 < cppEDM SMap:
    #  { "predictions" : {}, "coefficients" : {}, ["parameters" : {}] }
    D = pyBindEDM.SMap( pathIn,
                        dataFile,
                        DF,
                        pathOut,
                        predictFile,
                        lib,
                        pred,
                        E, 
                        Tp,
                        knn,
                        tau,
                        theta,
                        exclusionRadius,
                        columns,
                        target,
                        smapFile,
                        jacobians,
                        solver,
                        embedded,
                        const_pred,
                        verbose,
                        validLib,
                        generateSteps,
                        parameterList )

    df_pred = DataFrame( D['predictions']  ) # Convert to pandas DataFrame
    df_coef = DataFrame( D['coefficients'] ) # Convert to pandas DataFrame

    SMapDict = { 'predictions' : df_pred, 'coefficients' : df_coef }

    if parameterList :
        SMapDict[ 'parameters' ] = D[ 'parameters' ]

    if showPlot :
        if embedded :
            E = len( columns.split() )
        pyEDM.AuxFunc.PlotObsPred( df_pred, dataFile, E, Tp, False )
        pyEDM.AuxFunc.PlotCoeff  ( df_coef, dataFile, E, Tp )

    return SMapDict