def test_alignEstStateSeqToTrue__Kest_gt_Ktrue(self): ''' Verify alignment works when est sequence has more states than true ''' print('') zEst = np.asarray([0, 0, 0, 1, 1, 2, 0, 0], dtype=np.int32) zTru = np.asarray([0, 0, 0, 0, 0, 0, 0, 0], dtype=np.int32) zExp = np.asarray([0, 0, 0, 1, 1, 2, 0, 0], dtype=np.int32) zAct = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zAct, zExp) zEst = [2, 2, 2, 1, 1, 0, 2, 2] zTru = [0, 0, 0, 0, 0, 0, 0, 0] zExp = [0, 0, 0, 1, 1, 2, 0, 0] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) distA = SSU.calcHammingDistance(zA, zTru) distExp = SSU.calcHammingDistance(zExp, zTru) assert distA == distExp zEst = [0, 0, 1, 2, 3, 4, 5, 6] zTru = [0, 0, 0, 1, 1, 1, 2, 2] zExp = [0, 0, 5, 1, 3, 4, 2, 6] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) distA = SSU.calcHammingDistance(zA, zTru) distExp = SSU.calcHammingDistance(zExp, zTru) assert distA == distExp zEst = [6, 6, 0, 5, 4, 3, 2, 1] zTru = [0, 0, 0, 1, 1, 1, 2, 2] zExp = [0, 0, 5, 1, 3, 4, 2, 6] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) distA = SSU.calcHammingDistance(zA, zTru) distExp = SSU.calcHammingDistance(zExp, zTru) assert distA == distExp
def test_alignEstStateSeqToTrue__Kest_equals_Ktrue(self): ''' Verify alignment works when both sequences have same number of states ''' print '' zEst = [0, 0, 0, 1, 1, 1, 1, 1] zTru = [0, 0, 1, 1, 1, 1, 1, 0] zExp = [0, 0, 0, 1, 1, 1, 1, 1] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) zEst = [1, 1, 1, 0, 0, 0, 0, 0] zTru = [0, 0, 1, 1, 1, 1, 1, 0] zExp = [0, 0, 0, 1, 1, 1, 1, 1] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) zEst = [0, 0, 0, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1] zTru = [1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 0] zExp = [0, 0, 0, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) zEst = [2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1] zTru = [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0] zExp = [1, 1, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp)
def calcHammingDistanceAndSave(zHatFlatAligned, excludeTstepsWithNegativeTrueLabels=1, **kwargs): ''' Calculate hamming distance for all sequences, saving to flat file. Excludes any Keyword Args (all workspace variables passed along from learning alg) ------- hmodel : current HModel object Data : current Data object representing *entire* dataset (not just one chunk) Returns ------- None. Hamming distance saved to file. Output ------- hamming-distance.txt ''' Data = kwargs['Data'] zTrue = Data.TrueParams['Z'] hdistance = StateSeqUtil.calcHammingDistance(zTrue, zHatFlatAligned, **kwargs) normhdist = float(hdistance) / float(zHatFlatAligned.size) learnAlgObj = kwargs['learnAlg'] lapFrac = kwargs['lapFrac'] prefix = makePrefixForLap(lapFrac) outpath = os.path.join(learnAlgObj.savedir, 'hamming-distance.txt') with open(outpath, 'a') as f: f.write('%.6f\n' % (normhdist))
def test_alignEstStateSeqToTrue__Kest_equals_Ktrue(self): ''' Verify alignment works when both sequences have same number of states ''' zEst = [0, 0, 0, 1, 1, 1, 1, 1] zTru = [0, 0, 1, 1, 1, 1, 1, 0] zExp = [0, 0, 0, 1, 1, 1, 1, 1] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) zEst = [1, 1, 1, 0, 0, 0, 0, 0] zTru = [0, 0, 1, 1, 1, 1, 1, 0] zExp = [0, 0, 0, 1, 1, 1, 1, 1] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) zEst = [0, 0, 0, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1] zTru = [1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 0] zExp = [0, 0, 0, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1] zExp2 = [1, 1, 1, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0] # two equally good solutions assert SSU.calcHammingDistance(zTru, zExp) == SSU.calcHammingDistance( zTru, zExp2) zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) or np.allclose(zA, zExp2) zEst = [2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1] zTru = [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0] zExp = [1, 1, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp)
def test_alignEstStateSeqToTrue__Kest_gt_Ktrue_someempty(self): ''' Verify alignment works when est sequence has more states than true Note: Will always align 'extra' states (beyond # true states) so that they have increasing order in order of appearance in original zEst seq. ''' print('') zEst = [1, 1, 1, 2, 2, 1, 1, 1] zTru = [0, 0, 0, 0, 0, 0, 0, 0] zExp = [0, 0, 0, 1, 1, 0, 0, 0] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) hdist = SSU.calcHammingDistance(zA, zTru) assert hdist == 2 / float(len(zTru)) zEst = [2, 2, 2, 3, 4, 5, 2, 2] zTru = [0, 0, 0, 0, 0, 0, 0, 0] zExp = [0, 0, 0, 1, 2, 3, 0, 0] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) hdist = SSU.calcHammingDistance(zA, zTru) assert hdist == 3 / float(len(zTru)) zEst = [2, 2, 2, 3, 4, 5, 2, 2] zTru = [1, 1, 0, 0, 0, 0, 0, 0] zExp = [0, 0, 0, 2, 3, 4, 0, 0] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) hdist = SSU.calcHammingDistance(zA, zTru) assert hdist == 5 / float(len(zTru))
def test_alignEstStateSeqToTrue__Kest_gt_Ktrue_someempty(self): ''' Verify alignment works when est sequence has more states than true ''' print '' zEst = [1, 1, 1, 2, 2, 1, 1, 1] zTru = [0, 0, 0, 0, 0, 0, 0, 0] zExp = [0, 0, 0, 2, 2, 0, 0, 0] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) hdist = SSU.calcHammingDistance(zA, zTru) print hdist assert hdist == 2 / float(len(zTru)) zEst = [2, 2, 2, 3, 4, 5, 2, 2] zTru = [0, 0, 0, 0, 0, 0, 0, 0] zExp = [0, 0, 0, 3, 4, 5, 0, 0] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) hdist = SSU.calcHammingDistance(zA, zTru) print hdist assert hdist == 3 / float(len(zTru)) zEst = [2, 2, 2, 3, 4, 5, 2, 2] zTru = [1, 1, 0, 0, 0, 0, 0, 0] zExp = [0, 0, 0, 4, 5, 1, 0, 0] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) print zA assert np.allclose(zA, zExp) hdist = SSU.calcHammingDistance(zA, zTru) assert hdist == 5 / float(len(zTru))
def test_alignEstStateSeqToTrue__Zest_equals_Ztrue(self): ''' Verify alignment works when both sequences match exactly ''' zEst = [0, 0, 0, 1, 1, 1, 1, 1] zTru = [0, 0, 0, 1, 1, 1, 1, 1] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zTru) zEst = [1, 1, 1, 0, 0, 0, 0, 0] zTru = [0, 0, 0, 1, 1, 1, 1, 1] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zTru) zEst = [2, 2, 2, 0, 0, 0, 0, 0] zTru = [0, 0, 0, 1, 1, 1, 1, 1] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zTru) zEst = [2, 2, 2, 0, 0, 0, 0, 0] zTru = [3, 3, 3, 1, 1, 1, 1, 1] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zTru)
def test_alignEstStateSeqToTrue__Kest_lt_Ktrue(self): ''' Verify alignment works when est sequence has fewer states than true ''' print('') zEst = [0, 0, 0, 0, 0, 0, 0, 0] zTru = [0, 0, 1, 1, 1, 1, 1, 0] zExp = [1, 1, 1, 1, 1, 1, 1, 1] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) zEst = [0, 0, 0, 0, 0, 0, 0, 0] zTru = [0, 0, 1, 1, 1, 1, 1, 2] zExp = [1, 1, 1, 1, 1, 1, 1, 1] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) assert np.allclose(zA, zExp) zEst = [0, 1, 1, 1, 0, 0, 0, 1] zTru = [0, 0, 1, 1, 1, 1, 1, 2] zExp = [1, 0, 0, 0, 1, 1, 1, 0] zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru) distA = SSU.calcHammingDistance(zA, zTru) distExp = SSU.calcHammingDistance(zExp, zTru)
def decode(self, X, lengths): Xprev = X[:-1, :] X = X[1:, :] doc_range = list([0]) doc_range += (np.cumsum(lengths).tolist()) dataset = bnpy.data.GroupXData(X, doc_range, None, Xprev) from bnpy.allocmodel.hmm.HMMUtil import runViterbiAlg from bnpy.util import StateSeqUtil initPi = self.model.allocModel.get_init_prob_vector() transPi = self.model.allocModel.get_trans_prob_matrix() LP = self.model.calc_local_params(dataset) Lik = LP['E_log_soft_ev'] zHatBySeq = list() for n in range(dataset.nDoc): start = dataset.doc_range[n] stop = dataset.doc_range[n + 1] zHat = runViterbiAlg(Lik[start:stop], initPi, transPi) zHatBySeq.append(zHat) zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, dataset) return zHatFlat
def findCompInModelWithLargestMisalignment(model, Data, Zref=None): ''' Finds cluster in model that is best candidate for a birth move. Post Condition -------------- Prints useful info to stdout. ''' if Zref is None: Zref = Data.TrueParams['Z'] LP = model.calc_local_params(Data) Z = LP['resp'].argmax(axis=1) AZ, AlignInfo = StateSeqUtil.alignEstimatedStateSeqToTruth(Z, Zref, returnInfo=1) maxK = AZ.max() dist = np.zeros(maxK) for k in range(maxK): mask = AZ == k nDisagree = np.sum(Zref[mask] != k) nTotal = mask.sum() dist[k] = float(nDisagree) / (float(nTotal) + 1e-10) print(k, dist[k]) ktarget = np.argmax(dist) korig = AlignInfo['AlignedToOrigMap'][ktarget] print('ktarget %d: %s' % (ktarget, chr(65 + ktarget))) print('korig %d' % (korig)) # Determine what is hiding inside of it that shouldnt be mask = AZ == ktarget nTarget = np.sum(mask) print('%d total atoms assigned to ktarget...' % (nTarget)) trueLabels = np.asarray(np.unique(Zref[mask]), np.int32) for ll in trueLabels: nTrue = np.sum(Zref[mask] == ll) print('%d/%d should have true label %d: %s' % (nTrue, nTarget, ll, chr(65 + ll))) return korig
def runViterbiAndSave(**kwargs): ''' Run viterbi alg on each sequence in dataset, and save to file. Keyword Args (all workspace variables passed along from learning alg) ------- hmodel : current HModel object Data : current Data object representing *entire* dataset (not just one chunk) Returns ------- None. MAP state sequences are saved to a MAT file. Output ------- MATfile format: Lap0020.000MAPStateSeqs.mat ''' if 'Data' in kwargs: Data = kwargs['Data'] elif 'DataIterator' in kwargs: try: Data = kwargs['DataIterator'].Data except AttributeError: from bnpy.data.DataIteratorFromDisk import loadDataForSlice Dinfo = dict() Dinfo.update(kwargs['DataIterator'].DataInfo) if 'evalDataPath' in Dinfo: Dinfo['filepath'] = os.path.expandvars(Dinfo['evalDataPath']) Data = loadDataForSlice(**Dinfo) else: raise ValueError('DataIterator has no attribute Data') else: return None hmodel = kwargs['hmodel'] lapFrac = kwargs['lapFrac'] if 'savedir' in kwargs: savedir = kwargs['savedir'] elif 'learnAlg' in kwargs: learnAlgObj = kwargs['learnAlg'] savedir = learnAlgObj.savedir if hasattr(learnAlgObj, 'start_time'): elapsedTime = learnAlgObj.get_elapsed_time() else: elapsedTime = 0.0 timestxtpath = os.path.join(savedir, 'times-saved-params.txt') with open(timestxtpath, 'a') as f: f.write('%.3f\n' % (elapsedTime)) initPi = hmodel.allocModel.get_init_prob_vector() transPi = hmodel.allocModel.get_trans_prob_matrix() LP = hmodel.obsModel.calc_local_params(Data) Lik = LP['E_log_soft_ev'] # Loop over each sequence in the collection zHatBySeq = list() for n in range(Data.nDoc): start = Data.doc_range[n] stop = Data.doc_range[n + 1] zHat = runViterbiAlg(Lik[start:stop], initPi, transPi) zHatBySeq.append(zHat) # Store MAP sequence to file prefix = makePrefixForLap(lapFrac) matfilepath = os.path.join(savedir, prefix + 'MAPStateSeqs.mat') MATVarsDict = dict( zHatBySeq=StateSeqUtil.convertStateSeq_list2MAT(zHatBySeq)) scipy.io.savemat(matfilepath, MATVarsDict, oned_as='row') zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, Data) Keff = np.unique(zHatFlat).size Kefftxtpath = os.path.join(savedir, 'Keff-saved-params.txt') with open(Kefftxtpath, 'a') as f: f.write('%d\n' % (Keff)) Ktotal = hmodel.obsModel.K Ktotaltxtpath = os.path.join(savedir, 'Ktotal-saved-params.txt') with open(Ktotaltxtpath, 'a') as f: f.write('%d\n' % (Keff)) # Save sequence aligned to truth and calculate Hamming distance if (hasattr(Data, 'TrueParams')) and ('Z' in Data.TrueParams): zHatFlatAligned = StateSeqUtil.alignEstimatedStateSeqToTruth( zHatFlat, Data.TrueParams['Z']) zHatBySeqAligned = StateSeqUtil.convertStateSeq_flat2list( zHatFlatAligned, Data) zHatBySeqAligned_Arr = StateSeqUtil.convertStateSeq_list2MAT( zHatBySeqAligned) MATVarsDict = dict(zHatBySeqAligned=zHatBySeqAligned_Arr) matfilepath = os.path.join(savedir, prefix + 'MAPStateSeqsAligned.mat') scipy.io.savemat(matfilepath, MATVarsDict, oned_as='row') kwargs['Data'] = Data calcHammingDistanceAndSave(zHatFlatAligned, **kwargs)