def cleanup_mergenewcompsonly(Data, expandModel, LP=None, Korig=0, **kwargs): import MergeMove mergeModel = expandModel Ktotal = mergeModel.obsModel.K # Perform many merges among the fresh components for trial in xrange(10): mPairIDs = list() for kA in xrange(Korig, Ktotal): for kB in xrange(kA+1, Ktotal): mPairIDs.append( (kA,kB) ) if trial == 0 and LP is not None: mLP = LP else: mLP = mergeModel.calc_local_params(Data) mLP['K'] = mergeModel.allocModel.K mSS = mergeModel.get_global_suff_stats(Data, mLP, doPrecompEntropy=True, doPrecompMergeEntropy=True, mPairIDs=mPairIDs) assert 'randstate' in kwargs mergeModel, mergeSS, mergeEv, MTracker = MergeMove.run_many_merge_moves( mergeModel, Data, mSS, nMergeTrials=len(mPairIDs), mPairIDs=mPairIDs, **kwargs) if mergeSS.K == Ktotal: break # no merges happened, so quit trying Ktotal = mergeSS.K return mergeModel, mergeSS, mLP, mergeEv
def cleanup_mergenewcompsintoexisting(Data, expandModel, xSS, xLP, Korig=0, **kwargs): import MergeMove Kexpand = xSS.K mPairIDs = MergeMove.preselect_all_merge_candidates( expandModel, xSS, randstate=kwargs['randstate'], preselectroutine=kwargs['cleanuppreselectroutine'], mergePerLap=kwargs['cleanupNumMergeTrials']*(Kexpand-Korig), compIDs=range(Korig, Kexpand)) mPairIDsOrig = [x for x in mPairIDs] if xLP['K'] != xSS.K: # Provided local params are stale, so need to recompute! xLP = expandModel.calc_local_params(Data) xSS = expandModel.get_global_suff_stats(Data, xLP, doPrecompEntropy=True, doPrecompMergeEntropy=True, mPairIDs=mPairIDs) assert 'randstate' in kwargs mergexModel, mergexSS, mergexEv, MTracker = MergeMove.run_many_merge_moves( expandModel, Data, xSS, nMergeTrials=xSS.K**2, mPairIDs=mPairIDs, **kwargs) for x in MTracker.acceptedOrigIDs: assert x in mPairIDsOrig targetSS = xSS targetSS.setELBOFieldsToZero() targetSS.setMergeFieldsToZero() return mergexSS, mergexEv
def clean_up_fresh_model(targetData, curModel, freshModel, randstate=np.random, **mergeKwArgs): ''' Returns set of suff stats that summarize the fresh model 1) verifies fresh model improves over default (single component) model 2) perform merges within fresh, requiring improvement on target data 3) perform merges within full (combined) model, aiming only to remove the new/fresh comps ''' import MergeMove # Perform many merges among the fresh components for trial in xrange(10): targetLP = freshModel.calc_local_params(targetData) targetSS = freshModel.get_global_suff_stats(targetData, targetLP, doPrecompEntropy=True, doPrecompMergeEntropy=True) prevK = targetSS.K freshModel, targetSS, freshEvBound, MTracker = MergeMove.run_many_merge_moves( freshModel, targetData, targetSS, nMergeTrials=targetSS.K**2, randstate=randstate, **mergeKwArgs) if targetSS.K == prevK: break # no merges happened, so quit trying if targetSS.K < 2: return targetSS # quit early, will reject # Create K=1 model singleModel = curModel.copy() singleSS = targetSS.getComp(0, doCollapseK1=False) singleModel.update_global_params(singleSS) singleLP = singleModel.calc_local_params(targetData) singleSS = singleModel.get_global_suff_stats(targetData, singleLP, doPrecompEntropy=True) singleModel.update_global_params(singleSS) # make it reflect targetData # Calculate evidence under K=1 model singleEvBound = singleModel.calc_evidence(SS=singleSS) # Verify fresh model preferred over K=1 model improveEvBound = freshEvBound - singleEvBound if improveEvBound <= 0 or improveEvBound < 0.00001 * abs(singleEvBound): msg = "BIRTH terminated. Not better than single component on target data." msg += "\n fresh | K=%3d | %.7e" % (targetSS.K, freshEvBound) msg += "\n single | K=%3d | %.7e" % (singleSS.K, singleEvBound) raise BirthProposalError(msg) # Verify fresh model improves over current model curLP = curModel.calc_local_params(targetData) curSS = curModel.get_global_suff_stats(targetData, curLP, doPrecompEntropy=True) curEvBound = curModel.calc_evidence(SS=curSS) improveEvBound = freshEvBound - curEvBound if improveEvBound <= 0 or improveEvBound < 0.00001 * abs(curEvBound): msg = "BIRTH terminated. Not better than current model on target data." msg += "\n fresh | K=%3d | %.7e" % (targetSS.K, freshEvBound) msg += "\n cur | K=%3d | %.7e" % (curSS.K, curEvBound) raise BirthProposalError(msg) return targetSS
def clean_up_expanded_suff_stats(targetData, curModel, targetSS, randstate=np.random, **kwargs): ''' Create expanded model combining original and brand-new comps and try to identify brand-new comps that are redundant copies of originals and can be removed ''' import MergeMove Korig = curModel.allocModel.K origLP = curModel.calc_local_params(targetData) expandSS = curModel.get_global_suff_stats(targetData, origLP) expandSS.insertComps(targetSS) expandModel = curModel.copy() expandModel.update_global_params(expandSS) expandLP = expandModel.calc_local_params(targetData) expandSS = expandModel.get_global_suff_stats(targetData, expandLP, doPrecompEntropy=True, doPrecompMergeEntropy=True) Kexpand = expandSS.K mPairIDs = MergeMove.preselect_all_merge_candidates( expandModel, expandSS, randstate=np.random, preselectroutine=kwargs['cleanuppreselectroutine'], mergePerLap=kwargs['cleanupNumMergeTrials']*(Kexpand-Korig), compIDs=range(Korig, Kexpand)) mPairIDsOrig = [x for x in mPairIDs] xModel, xSS, xEv, MTracker = MergeMove.run_many_merge_moves( expandModel, targetData, expandSS, nMergeTrials=expandSS.K**2, mPairIDs=mPairIDs, randstate=randstate, **kwargs) if kwargs['doVizBirth']: viz_birth_proposal_2D(expandModel, xModel, None, None, title1='expanded model', title2='after merge') for x in MTracker.acceptedOrigIDs: assert x in mPairIDsOrig if kwargs['cleanupModifyOrigComps']: targetSS = xSS targetSS.setELBOFieldsToZero() targetSS.setMergeFieldsToZero() else: # Remove from targetSS all the comps whose merges were accepted kBList = [kB for kA,kB in MTracker.acceptedOrigIDs] if len(kBList) == targetSS.K: msg = 'BIRTH terminated. all new comps redundant with originals.' raise BirthProposalError(msg) for kB in reversed(sorted(kBList)): ktarget = kB - Korig if ktarget >= 0: targetSS.removeComp(ktarget) return targetSS
def cleanup_mergenewcompsintoexisting(Data, expandModel, xSS, xLP, Korig=0, **kwargs): import MergeMove Kexpand = xSS.K mPairIDs = MergeMove.preselect_all_merge_candidates( expandModel, xSS, randstate=kwargs['randstate'], preselectroutine=kwargs['cleanuppreselectroutine'], mergePerLap=kwargs['cleanupNumMergeTrials'] * (Kexpand - Korig), compIDs=range(Korig, Kexpand)) mPairIDsOrig = [x for x in mPairIDs] if xLP['K'] != xSS.K: # Provided local params are stale, so need to recompute! xLP = expandModel.calc_local_params(Data) xSS = expandModel.get_global_suff_stats(Data, xLP, doPrecompEntropy=True, doPrecompMergeEntropy=True, mPairIDs=mPairIDs) assert 'randstate' in kwargs mergexModel, mergexSS, mergexEv, MTracker = MergeMove.run_many_merge_moves( expandModel, Data, xSS, nMergeTrials=xSS.K**2, mPairIDs=mPairIDs, **kwargs) for x in MTracker.acceptedOrigIDs: assert x in mPairIDsOrig targetSS = xSS targetSS.setELBOFieldsToZero() targetSS.setMergeFieldsToZero() return mergexSS, mergexEv
def cleanup_mergenewcompsonly(Data, expandModel, LP=None, Korig=0, **kwargs): import MergeMove mergeModel = expandModel Ktotal = mergeModel.obsModel.K # Perform many merges among the fresh components for trial in xrange(10): mPairIDs = list() for kA in xrange(Korig, Ktotal): for kB in xrange(kA + 1, Ktotal): mPairIDs.append((kA, kB)) if trial == 0 and LP is not None: mLP = LP else: mLP = mergeModel.calc_local_params(Data) mLP['K'] = mergeModel.allocModel.K mSS = mergeModel.get_global_suff_stats(Data, mLP, doPrecompEntropy=True, doPrecompMergeEntropy=True, mPairIDs=mPairIDs) assert 'randstate' in kwargs mergeModel, mergeSS, mergeEv, MTracker = MergeMove.run_many_merge_moves( mergeModel, Data, mSS, nMergeTrials=len(mPairIDs), mPairIDs=mPairIDs, **kwargs) if mergeSS.K == Ktotal: break # no merges happened, so quit trying Ktotal = mergeSS.K return mergeModel, mergeSS, mLP, mergeEv
def clean_up_fresh_model(targetData, curModel, freshModel, randstate=np.random, **mergeKwArgs): ''' Returns set of suff stats that summarize the fresh model 1) verifies fresh model improves over default (single component) model 2) perform merges within fresh, requiring improvement on target data 3) perform merges within full (combined) model, aiming only to remove the new/fresh comps ''' import MergeMove # Perform many merges among the fresh components for trial in xrange(10): targetLP = freshModel.calc_local_params(targetData) targetSS = freshModel.get_global_suff_stats(targetData, targetLP, doPrecompEntropy=True, doPrecompMergeEntropy=True) prevK = targetSS.K freshModel, targetSS, freshEvBound, MTracker = MergeMove.run_many_merge_moves( freshModel, targetData, targetSS, nMergeTrials=targetSS.K**2, randstate=randstate, **mergeKwArgs) if targetSS.K == prevK: break # no merges happened, so quit trying if targetSS.K < 2: return targetSS # quit early, will reject # Create K=1 model singleModel = curModel.copy() singleSS = targetSS.getComp(0, doCollapseK1=False) singleModel.update_global_params(singleSS) singleLP = singleModel.calc_local_params(targetData) singleSS = singleModel.get_global_suff_stats(targetData, singleLP, doPrecompEntropy=True) singleModel.update_global_params(singleSS) # make it reflect targetData # Calculate evidence under K=1 model singleEvBound = singleModel.calc_evidence(SS=singleSS) # Verify fresh model preferred over K=1 model improveEvBound = freshEvBound - singleEvBound if improveEvBound <= 0 or improveEvBound < 0.00001 * abs(singleEvBound): msg = "BIRTH terminated. Not better than single component on target data." msg += "\n fresh | K=%3d | %.7e" % (targetSS.K, freshEvBound) msg += "\n single | K=%3d | %.7e" % (singleSS.K, singleEvBound) raise BirthProposalError(msg) # Verify fresh model improves over current model curLP = curModel.calc_local_params(targetData) curSS = curModel.get_global_suff_stats(targetData, curLP, doPrecompEntropy=True) curEvBound = curModel.calc_evidence(SS=curSS) improveEvBound = freshEvBound - curEvBound if improveEvBound <= 0 or improveEvBound < 0.00001 * abs(curEvBound): msg = "BIRTH terminated. Not better than current model on target data." msg += "\n fresh | K=%3d | %.7e" % (targetSS.K, freshEvBound) msg += "\n cur | K=%3d | %.7e" % (curSS.K, curEvBound) raise BirthProposalError(msg) return targetSS
def clean_up_expanded_suff_stats(targetData, curModel, targetSS, randstate=np.random, **kwargs): ''' Create expanded model combining original and brand-new comps and try to identify brand-new comps that are redundant copies of originals and can be removed ''' import MergeMove Korig = curModel.allocModel.K origLP = curModel.calc_local_params(targetData) expandSS = curModel.get_global_suff_stats(targetData, origLP) expandSS.insertComps(targetSS) expandModel = curModel.copy() expandModel.update_global_params(expandSS) expandLP = expandModel.calc_local_params(targetData) expandSS = expandModel.get_global_suff_stats(targetData, expandLP, doPrecompEntropy=True, doPrecompMergeEntropy=True) Kexpand = expandSS.K mPairIDs = MergeMove.preselect_all_merge_candidates( expandModel, expandSS, randstate=np.random, preselectroutine=kwargs['cleanuppreselectroutine'], mergePerLap=kwargs['cleanupNumMergeTrials'] * (Kexpand - Korig), compIDs=range(Korig, Kexpand)) mPairIDsOrig = [x for x in mPairIDs] xModel, xSS, xEv, MTracker = MergeMove.run_many_merge_moves( expandModel, targetData, expandSS, nMergeTrials=expandSS.K**2, mPairIDs=mPairIDs, randstate=randstate, **kwargs) if kwargs['doVizBirth']: viz_birth_proposal_2D(expandModel, xModel, None, None, title1='expanded model', title2='after merge') for x in MTracker.acceptedOrigIDs: assert x in mPairIDsOrig if kwargs['cleanupModifyOrigComps']: targetSS = xSS targetSS.setELBOFieldsToZero() targetSS.setMergeFieldsToZero() else: # Remove from targetSS all the comps whose merges were accepted kBList = [kB for kA, kB in MTracker.acceptedOrigIDs] if len(kBList) == targetSS.K: msg = 'BIRTH terminated. all new comps redundant with originals.' raise BirthProposalError(msg) for kB in reversed(sorted(kBList)): ktarget = kB - Korig if ktarget >= 0: targetSS.removeComp(ktarget) return targetSS