def AlignModelAndCalcDistance(Data, hmodel):
  from bnpy.allocmodel.hmm.HMMUtil import runViterbiAlg
  initPi = hmodel.allocModel.get_init_prob_vector()
  transPi = hmodel.allocModel.get_trans_prob_matrix()

  LP = hmodel.obsModel.calc_local_params(Data)
  Lik = LP['E_log_soft_ev']

  # Loop over each sequence in the collection
  zHatBySeq = list()
  for n in range(Data.nDoc):
    start = Data.doc_range[n]
    stop = Data.doc_range[n+1]
    zHat = runViterbiAlg(Lik[start:stop], initPi, transPi)
    zHatBySeq.append(zHat)
  zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, Data)
  zHatFlatAligned = \
      StateSeqUtil.alignEstimatedStateSeqToTruth(zHatFlat, Data.TrueParams['Z'])
  zHatBySeqAligned = StateSeqUtil.convertStateSeq_flat2list(
                                    zHatFlatAligned, Data)

  zTrue = Data.TrueParams['Z']

  hdistance = StateSeqUtil.calcHammingDistance(zHatFlatAligned, zTrue)
  normhdist = float(hdistance) / float(zHatFlatAligned.size)
  print 'distance=', normhdist
  return zHatBySeqAligned
def CheckViterbiVsSoft(jobpath, taskid):
  taskpath = PU.MakePath(os.path.join(jobpath, str(taskid)))
  hmodel = bnpy.load_model(taskpath)

  LPsoft = hmodel.calc_local_params(Data, limitMemoryLP=1)
  SSsoft = hmodel.get_global_suff_stats(Data, LPsoft)

  #zHatBySeq = AlignModelAndCalcDistance(Data, hmodel)
  from bnpy.allocmodel.hmm.HMMUtil import runViterbiAlg
  initPi = hmodel.allocModel.get_init_prob_vector()
  transPi = hmodel.allocModel.get_trans_prob_matrix()
  LP = hmodel.obsModel.calc_local_params(Data)
  Lik = LP['E_log_soft_ev']
  # Loop over each sequence in the collection
  zHatBySeq = list()
  for n in range(Data.nDoc):
    start = Data.doc_range[n]
    stop = Data.doc_range[n+1]
    zHat = runViterbiAlg(Lik[start:stop], initPi, transPi)
    zHatBySeq.append(zHat)
  zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, Data)
  LP = dict(Z=zHatFlat)
  LP = bnpy.init.FromTruth.convertLPFromHardToSoft(LP, Data, 
                                                   startIDsAt0=True,
                                                   Kmax=SSsoft.K)
  LP = hmodel.allocModel.initLPFromResp(Data, LP)
  SShard = hmodel.get_global_suff_stats(Data, LP)
  return SShard, SSsoft, LPsoft
Beispiel #3
0
    def decode(self, X, lengths):
        Xprev = X[:-1, :]
        X = X[1:, :]
        doc_range = list([0])
        doc_range += (np.cumsum(lengths).tolist())
        dataset = bnpy.data.GroupXData(X, doc_range, None, Xprev)

        from bnpy.allocmodel.hmm.HMMUtil import runViterbiAlg
        from bnpy.util import StateSeqUtil
        initPi = self.model.allocModel.get_init_prob_vector()
        transPi = self.model.allocModel.get_trans_prob_matrix()
        LP = self.model.calc_local_params(dataset)
        Lik = LP['E_log_soft_ev']
        zHatBySeq = list()
        for n in range(dataset.nDoc):
            start = dataset.doc_range[n]
            stop = dataset.doc_range[n + 1]
            zHat = runViterbiAlg(Lik[start:stop], initPi, transPi)
            zHatBySeq.append(zHat)
        zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, dataset)
        return zHatFlat
def runViterbiAndSave(**kwargs):
    ''' Run viterbi alg on each sequence in dataset, and save to file.

    Keyword Args (all workspace variables passed along from learning alg)
    -------
    hmodel : current HModel object
    Data : current Data object
        representing *entire* dataset (not just one chunk)

    Returns
    -------
    None. MAP state sequences are saved to a MAT file.

    Output
    -------
    MATfile format: Lap0020.000MAPStateSeqs.mat
    '''
    if 'Data' in kwargs:
        Data = kwargs['Data']
    elif 'DataIterator' in kwargs:
        try:
            Data = kwargs['DataIterator'].Data
        except AttributeError:
            from bnpy.data.DataIteratorFromDisk import loadDataForSlice
            Dinfo = dict()
            Dinfo.update(kwargs['DataIterator'].DataInfo)
            if 'evalDataPath' in Dinfo:
                Dinfo['filepath'] = os.path.expandvars(Dinfo['evalDataPath'])
                Data = loadDataForSlice(**Dinfo)
            else:
                raise ValueError('DataIterator has no attribute Data')
    else:
        return None

    hmodel = kwargs['hmodel']
    lapFrac = kwargs['lapFrac']

    if 'savedir' in kwargs:
        savedir = kwargs['savedir']
    elif 'learnAlg' in kwargs:
        learnAlgObj = kwargs['learnAlg']
        savedir = learnAlgObj.savedir
        if hasattr(learnAlgObj, 'start_time'):
            elapsedTime = learnAlgObj.get_elapsed_time()
        else:
            elapsedTime = 0.0

    timestxtpath = os.path.join(savedir, 'times-saved-params.txt')
    with open(timestxtpath, 'a') as f:
        f.write('%.3f\n' % (elapsedTime))

    initPi = hmodel.allocModel.get_init_prob_vector()
    transPi = hmodel.allocModel.get_trans_prob_matrix()

    LP = hmodel.obsModel.calc_local_params(Data)
    Lik = LP['E_log_soft_ev']

    # Loop over each sequence in the collection
    zHatBySeq = list()
    for n in range(Data.nDoc):
        start = Data.doc_range[n]
        stop = Data.doc_range[n + 1]
        zHat = runViterbiAlg(Lik[start:stop], initPi, transPi)
        zHatBySeq.append(zHat)

    # Store MAP sequence to file
    prefix = makePrefixForLap(lapFrac)
    matfilepath = os.path.join(savedir, prefix + 'MAPStateSeqs.mat')
    MATVarsDict = dict(
        zHatBySeq=StateSeqUtil.convertStateSeq_list2MAT(zHatBySeq))
    scipy.io.savemat(matfilepath, MATVarsDict, oned_as='row')

    zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, Data)
    Keff = np.unique(zHatFlat).size
    Kefftxtpath = os.path.join(savedir, 'Keff-saved-params.txt')
    with open(Kefftxtpath, 'a') as f:
        f.write('%d\n' % (Keff))

    Ktotal = hmodel.obsModel.K
    Ktotaltxtpath = os.path.join(savedir, 'Ktotal-saved-params.txt')
    with open(Ktotaltxtpath, 'a') as f:
        f.write('%d\n' % (Keff))

    # Save sequence aligned to truth and calculate Hamming distance
    if (hasattr(Data, 'TrueParams')) and ('Z' in Data.TrueParams):
        zHatFlatAligned = StateSeqUtil.alignEstimatedStateSeqToTruth(
            zHatFlat, Data.TrueParams['Z'])

        zHatBySeqAligned = StateSeqUtil.convertStateSeq_flat2list(
            zHatFlatAligned, Data)
        zHatBySeqAligned_Arr = StateSeqUtil.convertStateSeq_list2MAT(
            zHatBySeqAligned)

        MATVarsDict = dict(zHatBySeqAligned=zHatBySeqAligned_Arr)
        matfilepath = os.path.join(savedir, prefix + 'MAPStateSeqsAligned.mat')
        scipy.io.savemat(matfilepath, MATVarsDict, oned_as='row')

        kwargs['Data'] = Data
        calcHammingDistanceAndSave(zHatFlatAligned, **kwargs)