Example #1
0
def run(dataName=None, allocModelName=None, obsModelName=None, algName=None, \
                      doSaveToDisk=True, doWriteStdOut=True,
                      taskID=None, **kwargs):
    ''' Fit specified model to data with learning algorithm.
    
      Usage
      -------
      To fit a Gauss MixModel to a custom dataset defined in matrix X 
      >> Data = bnpy.data.XData(X)
      >> hmodel = run(Data, 'MixModel', 'Gauss', 'EM', K=3, nLap=10)

      To load a dataset specified in a specific script
      For example, 2D toy data in demodata/AsteriskK8.py
      >> hmodel = run('AsteriskK8', 'MixModel', 'Gauss', 'VB', K=3)
      
      To run 5 tasks (separate initializations) and get best of 5 runs:
      >> opts = dict(K=8, nLap=100, printEvery=0)
      >> hmodel = run('AsteriskK8','MixModel','Gauss','VB', nTask=5, **opts)

      Args
      -------
      dataName : either one of
                  * bnpy Data object,
                  * string filesystem path of Data module within BNPYDATADIR
      allocModelName : string name of allocation (latent structure) model
                        {MixModel, DPMixModel, AdmixModel, HMM, etc.}
      obsModelName : string name of observation (likelihood) model
                        {Gauss, ZMGauss, WordCount, etc.}
      **kwargs : keyword args defining properties of the model or alg
                  see Doc for details [TODO]
      Returns
      -------
      hmodel : best model fit to the dataset (across nTask runs)
      LP : local parameters of that best model on the dataset
      evBound : log evidence (ELBO) for the best model on the dataset
                  scalar, real value where larger value implies better model
  '''
    hasReqArgs = dataName is not None
    hasReqArgs &= allocModelName is not None
    hasReqArgs &= obsModelName is not None
    hasReqArgs &= algName is not None

    if hasReqArgs:
        ReqArgs = dict(dataName=dataName,
                       allocModelName=allocModelName,
                       obsModelName=obsModelName,
                       algName=algName)
    else:
        ReqArgs = BNPYArgParser.parseRequiredArgs()
        dataName = ReqArgs['dataName']
        allocModelName = ReqArgs['allocModelName']
        obsModelName = ReqArgs['obsModelName']
        algName = ReqArgs['algName']
    KwArgs, UnkArgs = BNPYArgParser.parseKeywordArgs(ReqArgs, **kwargs)

    jobname = KwArgs['OutputPrefs']['jobname']

    if taskID is None:
        starttaskid = KwArgs['OutputPrefs']['taskid']
    else:
        starttaskid = taskID
        KwArgs['OutputPrefs']['taskid'] = taskID
    nTask = KwArgs['OutputPrefs']['nTask']

    bestInfo = None
    bestEvBound = -np.inf
    for taskid in range(starttaskid, starttaskid + nTask):
        hmodel, LP, Info = _run_task_internal(jobname, taskid, nTask, ReqArgs,
                                              KwArgs, UnkArgs, dataName,
                                              allocModelName, obsModelName,
                                              algName, doSaveToDisk,
                                              doWriteStdOut)
        if (Info['evBound'] > bestEvBound):
            bestModel = hmodel
            bestLP = LP
            bestEvBound = Info['evBound']
            bestInfo = Info
    return bestModel, bestLP, bestInfo
Example #2
0
def run(dataName=None,
        allocModelName=None,
        obsModelName=None,
        algName=None,
        doSaveToDisk=True,
        doWriteStdOut=True,
        taskID=None,
        **kwargs):
    """ Fit specified model to data with learning algorithm.

        Args
        -------
        dataName : either one of
                    * bnpy Data object,
                    * string name of python file within BNPYDATADIR
        allocModelName : string name of allocation (latent structure) model
        obsModelName : string name of observation (likelihood) model
        **kwargs : keyword args defining properties of the model or alg

        Returns
        -------
        hmodel : best model fit to the dataset (across nTask runs)
        Info   : dict of information about this best model
    """
    hasReqArgs = dataName is not None
    hasReqArgs &= allocModelName is not None
    hasReqArgs &= obsModelName is not None
    hasReqArgs &= algName is not None

    if hasReqArgs:
        ReqArgs = dict(dataName=dataName,
                       allocModelName=allocModelName,
                       obsModelName=obsModelName,
                       algName=algName)
    else:
        ReqArgs = BNPYArgParser.parseRequiredArgs()
        dataName = ReqArgs['dataName']
        allocModelName = ReqArgs['allocModelName']
        obsModelName = ReqArgs['obsModelName']
        algName = ReqArgs['algName']
    KwArgs, UnkArgs = BNPYArgParser.parseKeywordArgs(ReqArgs, **kwargs)
    KwArgs['OutputPrefs']['doSaveToDisk'] = doSaveToDisk
    KwArgs['OutputPrefs']['doWriteStdOut'] = doWriteStdOut

    jobname = KwArgs['OutputPrefs']['jobname']
    # Update stored numerical options via keyword args
    bnpy.util.NumericUtil.UpdateConfig(**UnkArgs)

    if taskID is None:
        starttaskid = KwArgs['OutputPrefs']['taskid']
    else:
        starttaskid = taskID
        KwArgs['OutputPrefs']['taskid'] = taskID
    nTask = KwArgs['OutputPrefs']['nTask']

    best_info_dict = None
    best_loss = np.inf
    for taskid in range(starttaskid, starttaskid + nTask):
        hmodel, info_dict = _run_task_internal(jobname, taskid, nTask, ReqArgs,
                                               KwArgs, UnkArgs, dataName,
                                               allocModelName, obsModelName,
                                               algName, doSaveToDisk,
                                               doWriteStdOut)
        if (taskid == starttaskid or info_dict['loss'] < best_loss):
            bestModel = hmodel
            best_loss = info_dict['loss']
            best_info_dict = info_dict
    return bestModel, best_info_dict
Example #3
0
def run(dataName=None, allocModelName=None, obsModelName=None, algName=None, \
                      doSaveToDisk=True, doWriteStdOut=True, 
                      taskID=None, **kwargs):
  ''' Fit specified model to data with learning algorithm.
    
      Usage
      -------
      To fit a Gauss MixModel to a custom dataset defined in matrix X 
      >> Data = bnpy.data.XData(X)
      >> hmodel = run(Data, 'MixModel', 'Gauss', 'EM', K=3, nLap=10)

      To load a dataset specified in a specific script
      For example, 2D toy data in demodata/AsteriskK8.py
      >> hmodel = run('AsteriskK8', 'MixModel', 'Gauss', 'VB', K=3)
      
      To run 5 tasks (separate initializations) and get best of 5 runs:
      >> opts = dict(K=8, nLap=100, printEvery=0)
      >> hmodel = run('AsteriskK8','MixModel','Gauss','VB', nTask=5, **opts)

      Args
      -------
      dataName : either one of
                  * bnpy Data object,
                  * string filesystem path of Data module within BNPYDATADIR
      allocModelName : string name of allocation (latent structure) model
                        {MixModel, DPMixModel, AdmixModel, HMM, etc.}
      obsModelName : string name of observation (likelihood) model
                        {Gauss, ZMGauss, WordCount, etc.}
      **kwargs : keyword args defining properties of the model or alg
                  see Doc for details [TODO]
      Returns
      -------
      hmodel : best model fit to the dataset (across nTask runs)
      LP : local parameters of that best model on the dataset
      evBound : log evidence (ELBO) for the best model on the dataset
                  scalar, real value where larger value implies better model
  '''
  hasReqArgs = dataName is not None
  hasReqArgs &= allocModelName is not None
  hasReqArgs &= obsModelName is not None
  hasReqArgs &= algName is not None
  
  if hasReqArgs:
    ReqArgs = dict(dataName=dataName, allocModelName=allocModelName,
                    obsModelName=obsModelName, algName=algName)
  else:
    ReqArgs = BNPYArgParser.parseRequiredArgs()
    dataName = ReqArgs['dataName']
    allocModelName = ReqArgs['allocModelName']
    obsModelName = ReqArgs['obsModelName']
    algName = ReqArgs['algName']
  KwArgs, UnkArgs = BNPYArgParser.parseKeywordArgs(ReqArgs, **kwargs)
  
  jobname = KwArgs['OutputPrefs']['jobname']

  if taskID is None:
    starttaskid = KwArgs['OutputPrefs']['taskid']
  else:
    starttaskid = taskID
    KwArgs['OutputPrefs']['taskid'] = taskID
  nTask = KwArgs['OutputPrefs']['nTask']
  
  bestInfo = None
  bestEvBound = -np.inf
  for taskid in range(starttaskid, starttaskid + nTask):
    hmodel, LP, Info = _run_task_internal(jobname, taskid, nTask,
                      ReqArgs, KwArgs, UnkArgs,
                      dataName, allocModelName, obsModelName, algName,
                      doSaveToDisk, doWriteStdOut)
    if (Info['evBound'] > bestEvBound):
      bestModel = hmodel
      bestLP = LP
      bestEvBound = Info['evBound']
      bestInfo = Info
  return bestModel, bestLP, bestInfo