def cleanup_mergenewcompsonly(Data, expandModel, LP=None, 
                                    Korig=0, **kwargs):
  import MergeMove

  mergeModel = expandModel
  Ktotal = mergeModel.obsModel.K

  # Perform many merges among the fresh components
  for trial in xrange(10):
    mPairIDs = list()
    for kA in xrange(Korig, Ktotal):
      for kB in xrange(kA+1, Ktotal):
        mPairIDs.append( (kA,kB) )

    if trial == 0 and LP is not None:
      mLP = LP
    else:
      mLP = mergeModel.calc_local_params(Data)
    mLP['K'] = mergeModel.allocModel.K
    mSS = mergeModel.get_global_suff_stats(Data, mLP,
                    doPrecompEntropy=True, doPrecompMergeEntropy=True,
                    mPairIDs=mPairIDs)

    assert 'randstate' in kwargs
    mergeModel, mergeSS, mergeEv, MTracker = MergeMove.run_many_merge_moves(
                               mergeModel, Data, mSS, 
                               nMergeTrials=len(mPairIDs),
                               mPairIDs=mPairIDs, 
                               **kwargs)
    if mergeSS.K == Ktotal:
      break # no merges happened, so quit trying
    Ktotal = mergeSS.K


  return mergeModel, mergeSS, mLP, mergeEv
def cleanup_mergenewcompsintoexisting(Data, expandModel, xSS, xLP,
                                            Korig=0, **kwargs):
  import MergeMove

  Kexpand = xSS.K
  mPairIDs = MergeMove.preselect_all_merge_candidates(
              expandModel, xSS, randstate=kwargs['randstate'],
              preselectroutine=kwargs['cleanuppreselectroutine'], 
              mergePerLap=kwargs['cleanupNumMergeTrials']*(Kexpand-Korig),
              compIDs=range(Korig, Kexpand))
  mPairIDsOrig = [x for x in mPairIDs]  

  if xLP['K'] != xSS.K:
    # Provided local params are stale, so need to recompute!
    xLP = expandModel.calc_local_params(Data)
  xSS = expandModel.get_global_suff_stats(Data, xLP,
                  doPrecompEntropy=True, doPrecompMergeEntropy=True,
                  mPairIDs=mPairIDs)

  assert 'randstate' in kwargs
  mergexModel, mergexSS, mergexEv, MTracker = MergeMove.run_many_merge_moves(
                               expandModel, Data, xSS,
                               nMergeTrials=xSS.K**2, 
                               mPairIDs=mPairIDs,
                               **kwargs)

  for x in MTracker.acceptedOrigIDs:
    assert x in mPairIDsOrig
  
  targetSS = xSS
  targetSS.setELBOFieldsToZero()
  targetSS.setMergeFieldsToZero()

  return mergexSS, mergexEv
def clean_up_fresh_model(targetData, curModel, freshModel, 
                            randstate=np.random, **mergeKwArgs):
  ''' Returns set of suff stats that summarize the fresh model
      1) verifies fresh model improves over default (single component) model
      2) perform merges within fresh, requiring improvement on target data
      3) perform merges within full (combined) model,
            aiming only to remove the new/fresh comps
  '''
  import MergeMove

  # Perform many merges among the fresh components
  for trial in xrange(10):
    targetLP = freshModel.calc_local_params(targetData)
    targetSS = freshModel.get_global_suff_stats(targetData, targetLP,
                    doPrecompEntropy=True, doPrecompMergeEntropy=True)
    prevK = targetSS.K
    freshModel, targetSS, freshEvBound, MTracker = MergeMove.run_many_merge_moves(
                               freshModel, targetData, targetSS,
                               nMergeTrials=targetSS.K**2, 
                               randstate=randstate, 
                               **mergeKwArgs)
    if targetSS.K == prevK:
      break # no merges happened, so quit trying

  if targetSS.K < 2:
    return targetSS # quit early, will reject

  # Create K=1 model
  singleModel = curModel.copy()
  singleSS = targetSS.getComp(0, doCollapseK1=False)
  singleModel.update_global_params(singleSS)
  singleLP = singleModel.calc_local_params(targetData)
  singleSS = singleModel.get_global_suff_stats(targetData, singleLP,
                  doPrecompEntropy=True)
  singleModel.update_global_params(singleSS) # make it reflect targetData

  # Calculate evidence under K=1 model
  singleEvBound = singleModel.calc_evidence(SS=singleSS)
 
  # Verify fresh model preferred over K=1 model
  improveEvBound = freshEvBound - singleEvBound
  if improveEvBound <= 0 or improveEvBound < 0.00001 * abs(singleEvBound):
    msg = "BIRTH terminated. Not better than single component on target data."
    msg += "\n  fresh  | K=%3d | %.7e" % (targetSS.K, freshEvBound)
    msg += "\n  single | K=%3d | %.7e" % (singleSS.K, singleEvBound)
    raise BirthProposalError(msg)

  # Verify fresh model improves over current model 
  curLP = curModel.calc_local_params(targetData)
  curSS = curModel.get_global_suff_stats(targetData, curLP, doPrecompEntropy=True)
  curEvBound = curModel.calc_evidence(SS=curSS)
  improveEvBound = freshEvBound - curEvBound
  if improveEvBound <= 0 or improveEvBound < 0.00001 * abs(curEvBound):
    msg = "BIRTH terminated. Not better than current model on target data."
    msg += "\n  fresh | K=%3d | %.7e" % (targetSS.K, freshEvBound)
    msg += "\n  cur   | K=%3d | %.7e" % (curSS.K, curEvBound)
    raise BirthProposalError(msg)

  return targetSS
def clean_up_expanded_suff_stats(targetData, curModel, targetSS,
                                  randstate=np.random, **kwargs):
  ''' Create expanded model combining original and brand-new comps
        and try to identify brand-new comps that are redundant copies of   
        originals and can be removed 
  '''
  import MergeMove
  Korig = curModel.allocModel.K
  origLP = curModel.calc_local_params(targetData)
  expandSS = curModel.get_global_suff_stats(targetData, origLP) 
  expandSS.insertComps(targetSS)
  expandModel = curModel.copy()
  expandModel.update_global_params(expandSS)

  expandLP = expandModel.calc_local_params(targetData)
  expandSS = expandModel.get_global_suff_stats(targetData, expandLP,
                  doPrecompEntropy=True, doPrecompMergeEntropy=True)
  Kexpand = expandSS.K

  mPairIDs = MergeMove.preselect_all_merge_candidates(
              expandModel, expandSS, randstate=np.random,
              preselectroutine=kwargs['cleanuppreselectroutine'], 
              mergePerLap=kwargs['cleanupNumMergeTrials']*(Kexpand-Korig),
              compIDs=range(Korig, Kexpand))

  mPairIDsOrig = [x for x in mPairIDs]

  xModel, xSS, xEv, MTracker = MergeMove.run_many_merge_moves(
                               expandModel, targetData, expandSS,
                               nMergeTrials=expandSS.K**2, 
                               mPairIDs=mPairIDs,
                               randstate=randstate, **kwargs)

  if kwargs['doVizBirth']:
    viz_birth_proposal_2D(expandModel, xModel, None, None,
                          title1='expanded model',
                          title2='after merge')

  for x in MTracker.acceptedOrigIDs:
    assert x in mPairIDsOrig
  
  if kwargs['cleanupModifyOrigComps']:
    targetSS = xSS
    targetSS.setELBOFieldsToZero()
    targetSS.setMergeFieldsToZero()
  else:
    # Remove from targetSS all the comps whose merges were accepted
    kBList = [kB for kA,kB in MTracker.acceptedOrigIDs]

    if len(kBList) == targetSS.K:
      msg = 'BIRTH terminated. all new comps redundant with originals.'
      raise BirthProposalError(msg)
    for kB in reversed(sorted(kBList)):
      ktarget = kB - Korig
      if ktarget >= 0:
        targetSS.removeComp(ktarget)
  return targetSS
Beispiel #5
0
def cleanup_mergenewcompsintoexisting(Data,
                                      expandModel,
                                      xSS,
                                      xLP,
                                      Korig=0,
                                      **kwargs):
    import MergeMove

    Kexpand = xSS.K
    mPairIDs = MergeMove.preselect_all_merge_candidates(
        expandModel,
        xSS,
        randstate=kwargs['randstate'],
        preselectroutine=kwargs['cleanuppreselectroutine'],
        mergePerLap=kwargs['cleanupNumMergeTrials'] * (Kexpand - Korig),
        compIDs=range(Korig, Kexpand))
    mPairIDsOrig = [x for x in mPairIDs]

    if xLP['K'] != xSS.K:
        # Provided local params are stale, so need to recompute!
        xLP = expandModel.calc_local_params(Data)
    xSS = expandModel.get_global_suff_stats(Data,
                                            xLP,
                                            doPrecompEntropy=True,
                                            doPrecompMergeEntropy=True,
                                            mPairIDs=mPairIDs)

    assert 'randstate' in kwargs
    mergexModel, mergexSS, mergexEv, MTracker = MergeMove.run_many_merge_moves(
        expandModel,
        Data,
        xSS,
        nMergeTrials=xSS.K**2,
        mPairIDs=mPairIDs,
        **kwargs)

    for x in MTracker.acceptedOrigIDs:
        assert x in mPairIDsOrig

    targetSS = xSS
    targetSS.setELBOFieldsToZero()
    targetSS.setMergeFieldsToZero()

    return mergexSS, mergexEv
Beispiel #6
0
def cleanup_mergenewcompsonly(Data, expandModel, LP=None, Korig=0, **kwargs):
    import MergeMove

    mergeModel = expandModel
    Ktotal = mergeModel.obsModel.K

    # Perform many merges among the fresh components
    for trial in xrange(10):
        mPairIDs = list()
        for kA in xrange(Korig, Ktotal):
            for kB in xrange(kA + 1, Ktotal):
                mPairIDs.append((kA, kB))

        if trial == 0 and LP is not None:
            mLP = LP
        else:
            mLP = mergeModel.calc_local_params(Data)
        mLP['K'] = mergeModel.allocModel.K
        mSS = mergeModel.get_global_suff_stats(Data,
                                               mLP,
                                               doPrecompEntropy=True,
                                               doPrecompMergeEntropy=True,
                                               mPairIDs=mPairIDs)

        assert 'randstate' in kwargs
        mergeModel, mergeSS, mergeEv, MTracker = MergeMove.run_many_merge_moves(
            mergeModel,
            Data,
            mSS,
            nMergeTrials=len(mPairIDs),
            mPairIDs=mPairIDs,
            **kwargs)
        if mergeSS.K == Ktotal:
            break  # no merges happened, so quit trying
        Ktotal = mergeSS.K

    return mergeModel, mergeSS, mLP, mergeEv
Beispiel #7
0
def clean_up_fresh_model(targetData,
                         curModel,
                         freshModel,
                         randstate=np.random,
                         **mergeKwArgs):
    ''' Returns set of suff stats that summarize the fresh model
      1) verifies fresh model improves over default (single component) model
      2) perform merges within fresh, requiring improvement on target data
      3) perform merges within full (combined) model,
            aiming only to remove the new/fresh comps
  '''
    import MergeMove

    # Perform many merges among the fresh components
    for trial in xrange(10):
        targetLP = freshModel.calc_local_params(targetData)
        targetSS = freshModel.get_global_suff_stats(targetData,
                                                    targetLP,
                                                    doPrecompEntropy=True,
                                                    doPrecompMergeEntropy=True)
        prevK = targetSS.K
        freshModel, targetSS, freshEvBound, MTracker = MergeMove.run_many_merge_moves(
            freshModel,
            targetData,
            targetSS,
            nMergeTrials=targetSS.K**2,
            randstate=randstate,
            **mergeKwArgs)
        if targetSS.K == prevK:
            break  # no merges happened, so quit trying

    if targetSS.K < 2:
        return targetSS  # quit early, will reject

    # Create K=1 model
    singleModel = curModel.copy()
    singleSS = targetSS.getComp(0, doCollapseK1=False)
    singleModel.update_global_params(singleSS)
    singleLP = singleModel.calc_local_params(targetData)
    singleSS = singleModel.get_global_suff_stats(targetData,
                                                 singleLP,
                                                 doPrecompEntropy=True)
    singleModel.update_global_params(singleSS)  # make it reflect targetData

    # Calculate evidence under K=1 model
    singleEvBound = singleModel.calc_evidence(SS=singleSS)

    # Verify fresh model preferred over K=1 model
    improveEvBound = freshEvBound - singleEvBound
    if improveEvBound <= 0 or improveEvBound < 0.00001 * abs(singleEvBound):
        msg = "BIRTH terminated. Not better than single component on target data."
        msg += "\n  fresh  | K=%3d | %.7e" % (targetSS.K, freshEvBound)
        msg += "\n  single | K=%3d | %.7e" % (singleSS.K, singleEvBound)
        raise BirthProposalError(msg)

    # Verify fresh model improves over current model
    curLP = curModel.calc_local_params(targetData)
    curSS = curModel.get_global_suff_stats(targetData,
                                           curLP,
                                           doPrecompEntropy=True)
    curEvBound = curModel.calc_evidence(SS=curSS)
    improveEvBound = freshEvBound - curEvBound
    if improveEvBound <= 0 or improveEvBound < 0.00001 * abs(curEvBound):
        msg = "BIRTH terminated. Not better than current model on target data."
        msg += "\n  fresh | K=%3d | %.7e" % (targetSS.K, freshEvBound)
        msg += "\n  cur   | K=%3d | %.7e" % (curSS.K, curEvBound)
        raise BirthProposalError(msg)

    return targetSS
Beispiel #8
0
def clean_up_expanded_suff_stats(targetData,
                                 curModel,
                                 targetSS,
                                 randstate=np.random,
                                 **kwargs):
    ''' Create expanded model combining original and brand-new comps
        and try to identify brand-new comps that are redundant copies of   
        originals and can be removed 
  '''
    import MergeMove
    Korig = curModel.allocModel.K
    origLP = curModel.calc_local_params(targetData)
    expandSS = curModel.get_global_suff_stats(targetData, origLP)
    expandSS.insertComps(targetSS)
    expandModel = curModel.copy()
    expandModel.update_global_params(expandSS)

    expandLP = expandModel.calc_local_params(targetData)
    expandSS = expandModel.get_global_suff_stats(targetData,
                                                 expandLP,
                                                 doPrecompEntropy=True,
                                                 doPrecompMergeEntropy=True)
    Kexpand = expandSS.K

    mPairIDs = MergeMove.preselect_all_merge_candidates(
        expandModel,
        expandSS,
        randstate=np.random,
        preselectroutine=kwargs['cleanuppreselectroutine'],
        mergePerLap=kwargs['cleanupNumMergeTrials'] * (Kexpand - Korig),
        compIDs=range(Korig, Kexpand))

    mPairIDsOrig = [x for x in mPairIDs]

    xModel, xSS, xEv, MTracker = MergeMove.run_many_merge_moves(
        expandModel,
        targetData,
        expandSS,
        nMergeTrials=expandSS.K**2,
        mPairIDs=mPairIDs,
        randstate=randstate,
        **kwargs)

    if kwargs['doVizBirth']:
        viz_birth_proposal_2D(expandModel,
                              xModel,
                              None,
                              None,
                              title1='expanded model',
                              title2='after merge')

    for x in MTracker.acceptedOrigIDs:
        assert x in mPairIDsOrig

    if kwargs['cleanupModifyOrigComps']:
        targetSS = xSS
        targetSS.setELBOFieldsToZero()
        targetSS.setMergeFieldsToZero()
    else:
        # Remove from targetSS all the comps whose merges were accepted
        kBList = [kB for kA, kB in MTracker.acceptedOrigIDs]

        if len(kBList) == targetSS.K:
            msg = 'BIRTH terminated. all new comps redundant with originals.'
            raise BirthProposalError(msg)
        for kB in reversed(sorted(kBList)):
            ktarget = kB - Korig
            if ktarget >= 0:
                targetSS.removeComp(ktarget)
    return targetSS