Esempio n. 1
0
def FetchPatternCov(curPattern, seqLis, kmer2seqIdInt):
    """
    fetch the sequences ID in seqLis that curPattern covers
    ***
    to speed up 
    1. check whether current has already be calculated 
    2. parse pattern into kmer and store seq id in kmer2seqIdLis
    ***
    """
    seqCnt = len(seqLis)
    if curPattern in kmer2seqIdInt:
        patternCovSeqIdxInt = kmer2seqIdInt[curPattern]
    else:
        allCovIdLis = []
        kmerLis = FetchAllKmerFromPattern(curPattern)
        for kmer in kmerLis:
            if kmer in kmer2seqIdInt:
                allCovIdLis.append(kmer2seqIdInt[kmer])
            else:
                kmerCovIdxSet = {seqId for seqId, seq in enumerate(seqLis) if kmer in seq}
                kmerCovIdxBitarray = IdxLis2bin(kmerCovIdxSet, seqCnt)
                kmerCovIdxInt = Comm.bitarray2int(kmerCovIdxBitarray)
                kmer2seqIdInt[kmer] = kmerCovIdxInt
                allCovIdLis.append(kmerCovIdxInt)
        patternCovSeqIdxInt = reduce(lambda x, y: x | y, allCovIdLis)
        kmer2seqIdInt[curPattern] = patternCovSeqIdxInt
    cov = covIdxInt2covCnt(patternCovSeqIdxInt)
    return cov, patternCovSeqIdxInt, kmer2seqIdInt
Esempio n. 2
0
def formatCovId(kmer2seqIdSet, totalSeqCnt):
    """
    format covered sequences Id set into binary num
    """
    userKmer2seqIdInt = {}
    for kmer, seqIdxSet in kmer2seqIdSet.iteritems():
        covIdxBitArray = IdxLis2bin(seqIdxSet, totalSeqCnt)
        covIdxBitInt = Comm.bitarray2int(covIdxBitArray)
        userKmer2seqIdInt[kmer] = covIdxBitInt
    return userKmer2seqIdInt
Esempio n. 3
0
def FetchCovSeqDetail(seqMatrix, kmer, samplingSize):
    """
    fetch sequence id in seqMatrix covered by kmer     
    """
    covIdxMatrix = []
    covLis = []
    for seqLis in seqMatrix:
        covIdxLis = {seqId for seqId, seq in enumerate(seqLis) if kmer in seq}
        covIdxBitarray = BioinfoComm.IdxLis2bin(covIdxLis, samplingSize)
        covIdxInt = Comm.bitarray2int(covIdxBitarray)
        covIdxMatrix.append(covIdxInt)
        covLis.append(int(bitarray.bitarray.count(covIdxBitarray)))
    # to save memory
    covIdxMatrix = np.array(covIdxMatrix)
    return covIdxMatrix, covLis