def FetchPatternWithICIter(allNode, adjMap, minIC, maxIC, seqLis, allPattern, kmer2seqIdSet, overallKmer2Cov, pattern2IC, startNode2rlt={}, ICminInterval=0.05, enableParallel=False, coreNm=8, pattern2kmerSet={}): Comm.PrintWithTime('current IC: min: %s, max: %s' % (minIC, maxIC), isLogging=True) # build the backpack model bp = Backpack(adjMap, minIC, maxIC, seqLis, kmer2seqIdSet, overallKmer2Cov, pattern2IC, pattern2kmerSet) # give different start node in case that there are multiple motif patternSet = set() ICLis = [] if not enableParallel: for startNode in allNode: includedNodes = {startNode} availNodes = adjMap[startNode] disableNode = set() userCov, curPattern = startNode2rlt[ startNode] if startNode in startNode2rlt and minIC <= startNode2rlt[ startNode][0] <= maxIC else bp.searchPattern( includedNodes, availNodes, disableNode) startNode2rlt[startNode] = (userCov, curPattern) # INFO('current starting node: %s, current pattern: %s' % (startNode, curPattern)) if not curPattern: continue # filter 'ACGU' and 'X[ACGU]X' parsedPatternLis = BioinfoComm.parsePatternStr(curPattern) charCntLis = map(lambda x: len(x), parsedPatternLis) if set(charCntLis) == {1} or 4 in charCntLis: continue # ==== TODO # kmerLis = BioinfoComm.FetchAllKmerFromPattern(curPattern) # patternLen = len(kmerLis[0]) # if patternLen == 4: # if patternLen == 6: # curPattern = '[GT]G[ACG][CG]' # curPattern = '[CGT]CC[AGT]' # curPattern = '[AGT][AGT][AGT][ACG][AT][CG]' # ==== # calculate new information content curIC, bp.pattern2IC = FetchPatternWeighedIC( curPattern, bp.pattern2IC, overallKmer2Cov) ICLis.append(curIC) patternSet.add(curPattern) else: startNodeInfoLis = Parallel(n_jobs=coreNm)(delayed( ParallelDiffStartnode)(startNode, bp, adjMap, startNode2rlt, minIC, maxIC, overallKmer2Cov) for startNode in allNode) for startNode, curIC, curPattern, userCov, curPattern2IC in startNodeInfoLis: if curIC == -1: continue pattern2IC = dict(pattern2IC, **curPattern2IC) startNode2rlt[startNode] = (userCov, curPattern) ICLis.append(curIC) patternSet.add(curPattern) kmer2seqIdSet = bp.kmer2seqIdSet pattern2kmerSet = bp.pattern2kmerSet # pattern2IC = bp.pattern2IC if not patternSet: return allPattern, kmer2seqIdSet, pattern2IC, pattern2kmerSet # in case there is no result allPattern = allPattern | patternSet nextIC = min(ICLis) # the left part minIC1 = minIC maxIC1 = min(nextIC, maxIC - 0.05) # at least move the bound for 0.05 if maxIC1 < maxIC and maxIC1 - minIC1 > ICminInterval: allPattern, kmer2seqIdSet, pattern2IC, pattern2kmerSet = FetchPatternWithICIter( allNode, adjMap, minIC1, maxIC1, seqLis, allPattern, kmer2seqIdSet, overallKmer2Cov, pattern2IC, startNode2rlt, enableParallel=enableParallel, coreNm=coreNm, pattern2kmerSet=pattern2kmerSet) # the right part minIC2 = max(nextIC, minIC + 0.05) # at least move the bound for 0.05 maxIC2 = maxIC if minIC2 > minIC and maxIC2 - minIC2 > ICminInterval: allPattern, kmer2seqIdSet, pattern2IC, pattern2kmerSet = FetchPatternWithICIter( allNode, adjMap, minIC2, maxIC2, seqLis, allPattern, kmer2seqIdSet, overallKmer2Cov, pattern2IC, startNode2rlt, enableParallel=enableParallel, coreNm=coreNm, pattern2kmerSet=pattern2kmerSet) INFO('iteration for IC [%s, %s]' % (minIC, maxIC)) return allPattern, kmer2seqIdSet, pattern2IC, pattern2kmerSet