Example #1
0
    def test_alignEstStateSeqToTrue__Kest_gt_Ktrue(self):
        ''' Verify alignment works when est sequence has more states than true
        '''
        print('')

        zEst = np.asarray([0, 0, 0, 1, 1, 2, 0, 0], dtype=np.int32)
        zTru = np.asarray([0, 0, 0, 0, 0, 0, 0, 0], dtype=np.int32)
        zExp = np.asarray([0, 0, 0, 1, 1, 2, 0, 0], dtype=np.int32)
        zAct = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zAct, zExp)

        zEst = [2, 2, 2, 1, 1, 0, 2, 2]
        zTru = [0, 0, 0, 0, 0, 0, 0, 0]
        zExp = [0, 0, 0, 1, 1, 2, 0, 0]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        distA = SSU.calcHammingDistance(zA, zTru)
        distExp = SSU.calcHammingDistance(zExp, zTru)
        assert distA == distExp

        zEst = [0, 0, 1, 2, 3, 4, 5, 6]
        zTru = [0, 0, 0, 1, 1, 1, 2, 2]
        zExp = [0, 0, 5, 1, 3, 4, 2, 6]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        distA = SSU.calcHammingDistance(zA, zTru)
        distExp = SSU.calcHammingDistance(zExp, zTru)
        assert distA == distExp

        zEst = [6, 6, 0, 5, 4, 3, 2, 1]
        zTru = [0, 0, 0, 1, 1, 1, 2, 2]
        zExp = [0, 0, 5, 1, 3, 4, 2, 6]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        distA = SSU.calcHammingDistance(zA, zTru)
        distExp = SSU.calcHammingDistance(zExp, zTru)
        assert distA == distExp
Example #2
0
    def test_alignEstStateSeqToTrue__Kest_equals_Ktrue(self):
        ''' Verify alignment works when both sequences have same number of states
        '''
        print ''

        zEst = [0, 0, 0, 1, 1, 1, 1, 1]
        zTru = [0, 0, 1, 1, 1, 1, 1, 0]
        zExp = [0, 0, 0, 1, 1, 1, 1, 1]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)

        zEst = [1, 1, 1, 0, 0, 0, 0, 0]
        zTru = [0, 0, 1, 1, 1, 1, 1, 0]
        zExp = [0, 0, 0, 1, 1, 1, 1, 1]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)

        zEst = [0, 0, 0, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1]
        zTru = [1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 0]
        zExp = [0, 0, 0, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)

        zEst = [2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
        zTru = [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0]
        zExp = [1, 1, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)
def calcHammingDistanceAndSave(zHatFlatAligned,
                               excludeTstepsWithNegativeTrueLabels=1,
                               **kwargs):
    ''' Calculate hamming distance for all sequences, saving to flat file.

    Excludes any

    Keyword Args (all workspace variables passed along from learning alg)
    -------
    hmodel : current HModel object
    Data : current Data object
        representing *entire* dataset (not just one chunk)

    Returns
    -------
    None. Hamming distance saved to file.

    Output
    -------
    hamming-distance.txt
    '''
    Data = kwargs['Data']
    zTrue = Data.TrueParams['Z']
    hdistance = StateSeqUtil.calcHammingDistance(zTrue, zHatFlatAligned,
                                                 **kwargs)
    normhdist = float(hdistance) / float(zHatFlatAligned.size)

    learnAlgObj = kwargs['learnAlg']
    lapFrac = kwargs['lapFrac']
    prefix = makePrefixForLap(lapFrac)
    outpath = os.path.join(learnAlgObj.savedir, 'hamming-distance.txt')
    with open(outpath, 'a') as f:
        f.write('%.6f\n' % (normhdist))
Example #4
0
    def test_alignEstStateSeqToTrue__Kest_equals_Ktrue(self):
        ''' Verify alignment works when both sequences have same number of states
        '''
        zEst = [0, 0, 0, 1, 1, 1, 1, 1]
        zTru = [0, 0, 1, 1, 1, 1, 1, 0]
        zExp = [0, 0, 0, 1, 1, 1, 1, 1]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)

        zEst = [1, 1, 1, 0, 0, 0, 0, 0]
        zTru = [0, 0, 1, 1, 1, 1, 1, 0]
        zExp = [0, 0, 0, 1, 1, 1, 1, 1]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)

        zEst = [0, 0, 0, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1]
        zTru = [1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 0]
        zExp = [0, 0, 0, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1]
        zExp2 = [1, 1, 1, 0, 2, 2, 2, 2, 2, 0, 0, 0,
                 0]  # two equally good solutions
        assert SSU.calcHammingDistance(zTru, zExp) == SSU.calcHammingDistance(
            zTru, zExp2)

        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp) or np.allclose(zA, zExp2)

        zEst = [2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
        zTru = [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0]
        zExp = [1, 1, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)
Example #5
0
    def test_alignEstStateSeqToTrue__Kest_gt_Ktrue_someempty(self):
        ''' Verify alignment works when est sequence has more states than true

        Note: Will always align 'extra' states (beyond # true states)
        so that they have increasing order in order of appearance in original zEst seq.
        '''
        print('')

        zEst = [1, 1, 1, 2, 2, 1, 1, 1]
        zTru = [0, 0, 0, 0, 0, 0, 0, 0]
        zExp = [0, 0, 0, 1, 1, 0, 0, 0]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)
        hdist = SSU.calcHammingDistance(zA, zTru)
        assert hdist == 2 / float(len(zTru))

        zEst = [2, 2, 2, 3, 4, 5, 2, 2]
        zTru = [0, 0, 0, 0, 0, 0, 0, 0]
        zExp = [0, 0, 0, 1, 2, 3, 0, 0]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)

        assert np.allclose(zA, zExp)
        hdist = SSU.calcHammingDistance(zA, zTru)
        assert hdist == 3 / float(len(zTru))

        zEst = [2, 2, 2, 3, 4, 5, 2, 2]
        zTru = [1, 1, 0, 0, 0, 0, 0, 0]
        zExp = [0, 0, 0, 2, 3, 4, 0, 0]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)

        assert np.allclose(zA, zExp)
        hdist = SSU.calcHammingDistance(zA, zTru)
        assert hdist == 5 / float(len(zTru))
Example #6
0
    def test_alignEstStateSeqToTrue__Kest_gt_Ktrue_someempty(self):
        ''' Verify alignment works when est sequence has more states than true
        '''
        print ''

        zEst = [1, 1, 1, 2, 2, 1, 1, 1]
        zTru = [0, 0, 0, 0, 0, 0, 0, 0]
        zExp = [0, 0, 0, 2, 2, 0, 0, 0]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)
        hdist = SSU.calcHammingDistance(zA, zTru)
        print hdist
        assert hdist == 2 / float(len(zTru))

        zEst = [2, 2, 2, 3, 4, 5, 2, 2]
        zTru = [0, 0, 0, 0, 0, 0, 0, 0]
        zExp = [0, 0, 0, 3, 4, 5, 0, 0]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)
        hdist = SSU.calcHammingDistance(zA, zTru)
        print hdist
        assert hdist == 3  / float(len(zTru))

        zEst = [2, 2, 2, 3, 4, 5, 2, 2]
        zTru = [1, 1, 0, 0, 0, 0, 0, 0]
        zExp = [0, 0, 0, 4, 5, 1, 0, 0]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        print zA
        assert np.allclose(zA, zExp)
        hdist = SSU.calcHammingDistance(zA, zTru)
        assert hdist == 5  / float(len(zTru))
Example #7
0
    def test_alignEstStateSeqToTrue__Zest_equals_Ztrue(self):
        ''' Verify alignment works when both sequences match exactly
        '''
        zEst = [0, 0, 0, 1, 1, 1, 1, 1]
        zTru = [0, 0, 0, 1, 1, 1, 1, 1]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zTru)

        zEst = [1, 1, 1, 0, 0, 0, 0, 0]
        zTru = [0, 0, 0, 1, 1, 1, 1, 1]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zTru)

        zEst = [2, 2, 2, 0, 0, 0, 0, 0]
        zTru = [0, 0, 0, 1, 1, 1, 1, 1]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zTru)

        zEst = [2, 2, 2, 0, 0, 0, 0, 0]
        zTru = [3, 3, 3, 1, 1, 1, 1, 1]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zTru)
Example #8
0
    def test_alignEstStateSeqToTrue__Kest_lt_Ktrue(self):
        ''' Verify alignment works when est sequence has fewer states than true
        '''
        print('')

        zEst = [0, 0, 0, 0, 0, 0, 0, 0]
        zTru = [0, 0, 1, 1, 1, 1, 1, 0]
        zExp = [1, 1, 1, 1, 1, 1, 1, 1]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)

        zEst = [0, 0, 0, 0, 0, 0, 0, 0]
        zTru = [0, 0, 1, 1, 1, 1, 1, 2]
        zExp = [1, 1, 1, 1, 1, 1, 1, 1]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)
        assert np.allclose(zA, zExp)

        zEst = [0, 1, 1, 1, 0, 0, 0, 1]
        zTru = [0, 0, 1, 1, 1, 1, 1, 2]
        zExp = [1, 0, 0, 0, 1, 1, 1, 0]
        zA = SSU.alignEstimatedStateSeqToTruth(zEst, zTru)

        distA = SSU.calcHammingDistance(zA, zTru)
        distExp = SSU.calcHammingDistance(zExp, zTru)
Example #9
0
    def decode(self, X, lengths):
        Xprev = X[:-1, :]
        X = X[1:, :]
        doc_range = list([0])
        doc_range += (np.cumsum(lengths).tolist())
        dataset = bnpy.data.GroupXData(X, doc_range, None, Xprev)

        from bnpy.allocmodel.hmm.HMMUtil import runViterbiAlg
        from bnpy.util import StateSeqUtil
        initPi = self.model.allocModel.get_init_prob_vector()
        transPi = self.model.allocModel.get_trans_prob_matrix()
        LP = self.model.calc_local_params(dataset)
        Lik = LP['E_log_soft_ev']
        zHatBySeq = list()
        for n in range(dataset.nDoc):
            start = dataset.doc_range[n]
            stop = dataset.doc_range[n + 1]
            zHat = runViterbiAlg(Lik[start:stop], initPi, transPi)
            zHatBySeq.append(zHat)
        zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, dataset)
        return zHatFlat
Example #10
0
def findCompInModelWithLargestMisalignment(model, Data, Zref=None):
    ''' Finds cluster in model that is best candidate for a birth move.

    Post Condition
    --------------
    Prints useful info to stdout.
    '''
    if Zref is None:
        Zref = Data.TrueParams['Z']
    LP = model.calc_local_params(Data)
    Z = LP['resp'].argmax(axis=1)
    AZ, AlignInfo = StateSeqUtil.alignEstimatedStateSeqToTruth(Z,
                                                               Zref,
                                                               returnInfo=1)
    maxK = AZ.max()
    dist = np.zeros(maxK)
    for k in range(maxK):
        mask = AZ == k
        nDisagree = np.sum(Zref[mask] != k)
        nTotal = mask.sum()
        dist[k] = float(nDisagree) / (float(nTotal) + 1e-10)
        print(k, dist[k])
    ktarget = np.argmax(dist)
    korig = AlignInfo['AlignedToOrigMap'][ktarget]
    print('ktarget %d: %s' % (ktarget, chr(65 + ktarget)))
    print('korig %d' % (korig))
    # Determine what is hiding inside of it that shouldnt be
    mask = AZ == ktarget
    nTarget = np.sum(mask)
    print('%d total atoms assigned to ktarget...' % (nTarget))
    trueLabels = np.asarray(np.unique(Zref[mask]), np.int32)
    for ll in trueLabels:
        nTrue = np.sum(Zref[mask] == ll)
        print('%d/%d should have true label %d: %s' %
              (nTrue, nTarget, ll, chr(65 + ll)))
    return korig
def runViterbiAndSave(**kwargs):
    ''' Run viterbi alg on each sequence in dataset, and save to file.

    Keyword Args (all workspace variables passed along from learning alg)
    -------
    hmodel : current HModel object
    Data : current Data object
        representing *entire* dataset (not just one chunk)

    Returns
    -------
    None. MAP state sequences are saved to a MAT file.

    Output
    -------
    MATfile format: Lap0020.000MAPStateSeqs.mat
    '''
    if 'Data' in kwargs:
        Data = kwargs['Data']
    elif 'DataIterator' in kwargs:
        try:
            Data = kwargs['DataIterator'].Data
        except AttributeError:
            from bnpy.data.DataIteratorFromDisk import loadDataForSlice
            Dinfo = dict()
            Dinfo.update(kwargs['DataIterator'].DataInfo)
            if 'evalDataPath' in Dinfo:
                Dinfo['filepath'] = os.path.expandvars(Dinfo['evalDataPath'])
                Data = loadDataForSlice(**Dinfo)
            else:
                raise ValueError('DataIterator has no attribute Data')
    else:
        return None

    hmodel = kwargs['hmodel']
    lapFrac = kwargs['lapFrac']

    if 'savedir' in kwargs:
        savedir = kwargs['savedir']
    elif 'learnAlg' in kwargs:
        learnAlgObj = kwargs['learnAlg']
        savedir = learnAlgObj.savedir
        if hasattr(learnAlgObj, 'start_time'):
            elapsedTime = learnAlgObj.get_elapsed_time()
        else:
            elapsedTime = 0.0

    timestxtpath = os.path.join(savedir, 'times-saved-params.txt')
    with open(timestxtpath, 'a') as f:
        f.write('%.3f\n' % (elapsedTime))

    initPi = hmodel.allocModel.get_init_prob_vector()
    transPi = hmodel.allocModel.get_trans_prob_matrix()

    LP = hmodel.obsModel.calc_local_params(Data)
    Lik = LP['E_log_soft_ev']

    # Loop over each sequence in the collection
    zHatBySeq = list()
    for n in range(Data.nDoc):
        start = Data.doc_range[n]
        stop = Data.doc_range[n + 1]
        zHat = runViterbiAlg(Lik[start:stop], initPi, transPi)
        zHatBySeq.append(zHat)

    # Store MAP sequence to file
    prefix = makePrefixForLap(lapFrac)
    matfilepath = os.path.join(savedir, prefix + 'MAPStateSeqs.mat')
    MATVarsDict = dict(
        zHatBySeq=StateSeqUtil.convertStateSeq_list2MAT(zHatBySeq))
    scipy.io.savemat(matfilepath, MATVarsDict, oned_as='row')

    zHatFlat = StateSeqUtil.convertStateSeq_list2flat(zHatBySeq, Data)
    Keff = np.unique(zHatFlat).size
    Kefftxtpath = os.path.join(savedir, 'Keff-saved-params.txt')
    with open(Kefftxtpath, 'a') as f:
        f.write('%d\n' % (Keff))

    Ktotal = hmodel.obsModel.K
    Ktotaltxtpath = os.path.join(savedir, 'Ktotal-saved-params.txt')
    with open(Ktotaltxtpath, 'a') as f:
        f.write('%d\n' % (Keff))

    # Save sequence aligned to truth and calculate Hamming distance
    if (hasattr(Data, 'TrueParams')) and ('Z' in Data.TrueParams):
        zHatFlatAligned = StateSeqUtil.alignEstimatedStateSeqToTruth(
            zHatFlat, Data.TrueParams['Z'])

        zHatBySeqAligned = StateSeqUtil.convertStateSeq_flat2list(
            zHatFlatAligned, Data)
        zHatBySeqAligned_Arr = StateSeqUtil.convertStateSeq_list2MAT(
            zHatBySeqAligned)

        MATVarsDict = dict(zHatBySeqAligned=zHatBySeqAligned_Arr)
        matfilepath = os.path.join(savedir, prefix + 'MAPStateSeqsAligned.mat')
        scipy.io.savemat(matfilepath, MATVarsDict, oned_as='row')

        kwargs['Data'] = Data
        calcHammingDistanceAndSave(zHatFlatAligned, **kwargs)