def AlignModelAndCalcDistance(Data, hmodel): from bnpy.allocmodel.hmm.HMMUtil import runViterbiAlg initPi = hmodel.allocModel.get_init_prob_vector() transPi = hmodel.allocModel.get_trans_prob_matrix() LP = hmodel.obsModel.calc_local_params(Data) Lik = LP['E_log_soft_ev'] # Loop over each sequence in the collection zHatBySeq = list() for n in range(Data.nDoc): start = Data.doc_range[n] stop = Data.doc_range[n+1] zHat = runViterbiAlg(Lik[start:stop], initPi, transPi) zHatBySeq.append(zHat) zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, Data) zHatFlatAligned = \ StateSeqUtil.alignEstimatedStateSeqToTruth(zHatFlat, Data.TrueParams['Z']) zHatBySeqAligned = StateSeqUtil.convertStateSeq_flat2list( zHatFlatAligned, Data) zTrue = Data.TrueParams['Z'] hdistance = StateSeqUtil.calcHammingDistance(zHatFlatAligned, zTrue) normhdist = float(hdistance) / float(zHatFlatAligned.size) print 'distance=', normhdist return zHatBySeqAligned
def CheckViterbiVsSoft(jobpath, taskid): taskpath = PU.MakePath(os.path.join(jobpath, str(taskid))) hmodel = bnpy.load_model(taskpath) LPsoft = hmodel.calc_local_params(Data, limitMemoryLP=1) SSsoft = hmodel.get_global_suff_stats(Data, LPsoft) #zHatBySeq = AlignModelAndCalcDistance(Data, hmodel) from bnpy.allocmodel.hmm.HMMUtil import runViterbiAlg initPi = hmodel.allocModel.get_init_prob_vector() transPi = hmodel.allocModel.get_trans_prob_matrix() LP = hmodel.obsModel.calc_local_params(Data) Lik = LP['E_log_soft_ev'] # Loop over each sequence in the collection zHatBySeq = list() for n in range(Data.nDoc): start = Data.doc_range[n] stop = Data.doc_range[n+1] zHat = runViterbiAlg(Lik[start:stop], initPi, transPi) zHatBySeq.append(zHat) zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, Data) LP = dict(Z=zHatFlat) LP = bnpy.init.FromTruth.convertLPFromHardToSoft(LP, Data, startIDsAt0=True, Kmax=SSsoft.K) LP = hmodel.allocModel.initLPFromResp(Data, LP) SShard = hmodel.get_global_suff_stats(Data, LP) return SShard, SSsoft, LPsoft
def decode(self, X, lengths): Xprev = X[:-1, :] X = X[1:, :] doc_range = list([0]) doc_range += (np.cumsum(lengths).tolist()) dataset = bnpy.data.GroupXData(X, doc_range, None, Xprev) from bnpy.allocmodel.hmm.HMMUtil import runViterbiAlg from bnpy.util import StateSeqUtil initPi = self.model.allocModel.get_init_prob_vector() transPi = self.model.allocModel.get_trans_prob_matrix() LP = self.model.calc_local_params(dataset) Lik = LP['E_log_soft_ev'] zHatBySeq = list() for n in range(dataset.nDoc): start = dataset.doc_range[n] stop = dataset.doc_range[n + 1] zHat = runViterbiAlg(Lik[start:stop], initPi, transPi) zHatBySeq.append(zHat) zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, dataset) return zHatFlat
def runViterbiAndSave(**kwargs): ''' Run viterbi alg on each sequence in dataset, and save to file. Keyword Args (all workspace variables passed along from learning alg) ------- hmodel : current HModel object Data : current Data object representing *entire* dataset (not just one chunk) Returns ------- None. MAP state sequences are saved to a MAT file. Output ------- MATfile format: Lap0020.000MAPStateSeqs.mat ''' if 'Data' in kwargs: Data = kwargs['Data'] elif 'DataIterator' in kwargs: try: Data = kwargs['DataIterator'].Data except AttributeError: from bnpy.data.DataIteratorFromDisk import loadDataForSlice Dinfo = dict() Dinfo.update(kwargs['DataIterator'].DataInfo) if 'evalDataPath' in Dinfo: Dinfo['filepath'] = os.path.expandvars(Dinfo['evalDataPath']) Data = loadDataForSlice(**Dinfo) else: raise ValueError('DataIterator has no attribute Data') else: return None hmodel = kwargs['hmodel'] lapFrac = kwargs['lapFrac'] if 'savedir' in kwargs: savedir = kwargs['savedir'] elif 'learnAlg' in kwargs: learnAlgObj = kwargs['learnAlg'] savedir = learnAlgObj.savedir if hasattr(learnAlgObj, 'start_time'): elapsedTime = learnAlgObj.get_elapsed_time() else: elapsedTime = 0.0 timestxtpath = os.path.join(savedir, 'times-saved-params.txt') with open(timestxtpath, 'a') as f: f.write('%.3f\n' % (elapsedTime)) initPi = hmodel.allocModel.get_init_prob_vector() transPi = hmodel.allocModel.get_trans_prob_matrix() LP = hmodel.obsModel.calc_local_params(Data) Lik = LP['E_log_soft_ev'] # Loop over each sequence in the collection zHatBySeq = list() for n in range(Data.nDoc): start = Data.doc_range[n] stop = Data.doc_range[n + 1] zHat = runViterbiAlg(Lik[start:stop], initPi, transPi) zHatBySeq.append(zHat) # Store MAP sequence to file prefix = makePrefixForLap(lapFrac) matfilepath = os.path.join(savedir, prefix + 'MAPStateSeqs.mat') MATVarsDict = dict( zHatBySeq=StateSeqUtil.convertStateSeq_list2MAT(zHatBySeq)) scipy.io.savemat(matfilepath, MATVarsDict, oned_as='row') zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, Data) Keff = np.unique(zHatFlat).size Kefftxtpath = os.path.join(savedir, 'Keff-saved-params.txt') with open(Kefftxtpath, 'a') as f: f.write('%d\n' % (Keff)) Ktotal = hmodel.obsModel.K Ktotaltxtpath = os.path.join(savedir, 'Ktotal-saved-params.txt') with open(Ktotaltxtpath, 'a') as f: f.write('%d\n' % (Keff)) # Save sequence aligned to truth and calculate Hamming distance if (hasattr(Data, 'TrueParams')) and ('Z' in Data.TrueParams): zHatFlatAligned = StateSeqUtil.alignEstimatedStateSeqToTruth( zHatFlat, Data.TrueParams['Z']) zHatBySeqAligned = StateSeqUtil.convertStateSeq_flat2list( zHatFlatAligned, Data) zHatBySeqAligned_Arr = StateSeqUtil.convertStateSeq_list2MAT( zHatBySeqAligned) MATVarsDict = dict(zHatBySeqAligned=zHatBySeqAligned_Arr) matfilepath = os.path.join(savedir, prefix + 'MAPStateSeqsAligned.mat') scipy.io.savemat(matfilepath, MATVarsDict, oned_as='row') kwargs['Data'] = Data calcHammingDistanceAndSave(zHatFlatAligned, **kwargs)