def getGlobalString(signal): signal = smoothSignal(signal, smoothParam) signalShift, signalScale = computeNorm(signal, 0, len(signal)) signalString = computeString(signal, 0, len(signal), signalShift, signalScale, levels, overflow=overflow) return signalString
def getLevelStr(signal, l): currSignal = np.array(copy.deepcopy(signal), float) currSignal = smoothSignal(currSignal, smoothParam) currSignalShift, currSignalScale = computeNorm(currSignal, 0, len(currSignal)) currString = computeString( currSignal, 0, len(currSignal), currSignalShift, currSignalScale, l, overflow=overflow, ) return currString
def getDictFromSequence(signal, refWindowSize, refWindowJump): dic = {} for winBeg in range(0, len(signal) - refWindowSize + 1, refWindowJump): winEnd = winBeg + refWindowSize currSignal = np.array(copy.deepcopy(signal[winBeg:winEnd]), float) currSignal = smoothSignal(currSignal, smoothParam) currSignalShift, currSignalScale = computeNorm(currSignal, 0, refWindowSize) currString = computeString(currSignal, 0, refWindowSize, currSignalShift, currSignalScale, levels, overflow=overflow) dic.update(buildDictionary(currString, kmerLength)) return dic
def getDictFromSequence(signal, l=False): dic = {} currSignal = np.array(copy.deepcopy(signal), float) currSignal = smoothSignal(currSignal, smoothParam) currSignalShift, currSignalScale = computeNorm(currSignal, 0, len(currSignal)) currString = computeString( currSignal, 0, len(currSignal), currSignalShift, currSignalScale, levels, overflow=overflow, ) if l == True: return currString return buildDictionarySpecial(currString, kmerLength)
fakeSignal = [] fakeIndex = -1 while len(fakeSignal) <= toSignal: fakeIndex = random.randint(0, len(negReadsPaths) - 1) fakeSignal = np.array(getSignalFromRead(negReadsPaths[fakeIndex]), dtype=float) fakeSignal = fakeSignal[fromSignal:toSignal] readSignal = readSignal[:workingLen] refSignal = refSignal[:workingLen] fakeSignal = fakeSignal[:workingLen] readSignal = smoothSignal(readSignal, smoothParam) refSignal = smoothSignal(refSignal, smoothParam) fakeSignal = smoothSignal(fakeSignal, smoothParam) readShift, readScale = computeNorm(readSignal, 0, len(readSignal)) refShift, refScale = computeNorm(refSignal, 0, len(refSignal)) fakeShift, fakeScale = computeNorm(fakeSignal, 0, len(fakeSignal)) readStrings, refStrings, fakeStrings = {}, {}, {} for l in levels: readStrings[l] = computeString( readSignal, 0, len(readSignal), readShift, readScale, l, overflow=0.30, )
#plt.plot(y) #plt.xticks(y_pos, x, color='orange', rotation=45, fontweight='bold', horizontalalignment='right') #plt.tick_params(labelbottom='off') refSignal = stringToSignal(refSeq, mod, repeatSignal=repeatSignal) refSeqHelper = [] for i in refSeq: refSeqHelper.append(i) for k in range(repeatSignal - 1): refSeqHelper.append("_") y = smoothSignal(y, 5) refSignal = smoothSignal(refSignal, 5) ySignalShift, ySignalScale = computeNorm(y, 0, len(y)) #y -= ySignalShift #y /= ySignalScale refSignalShift, refSignalScale = computeNorm(refSignal, 0, len(refSignal)) #refSignal -= refSignalShift #refSignal /= refSignalScale readString = computeString(y, 0, len(y), ySignalShift, ySignalScale, levels, overflow=0.30) refString = computeString(refSignal,
refSignal = np.array( stringToSignal(refSeq, mod, repeatSignal=repeatSignal), float) # fakeSignal = np.array(stringToSignal(fakeSeq, mod, repeatSignal = repeatSignal), # float) fakeSignal = [] fakeIndex = -1 while len(fakeSignal) <= signalTo: fakeIndex = random.randint(0, len(negReads) - 1) fakeSignal = np.array(getSignalFromRead(negReads[fakeIndex]), dtype=float) fakeSignal = fakeSignal[signalFrom:signalTo] readSignalSm = smoothSignal(readSignal, 5) refSignalSm = smoothSignal(refSignal, 5) fakeSignalSm = smoothSignal(fakeSignal, 5) readShiftSm, readScaleSm = computeNorm(readSignalSm, 0, len(readSignalSm)) refShiftSm, refScaleSm = computeNorm(refSignalSm, 0, len(refSignalSm)) fakeShiftSm, fakeScaleSm = computeNorm(fakeSignalSm, 0, len(fakeSignalSm)) readString2Sm = computeString( readSignalSm, 0, len(readSignalSm), readShiftSm, readScaleSm, levels, overflow=0.25, ) refString2Sm = computeString(refSignalSm, 0, len(refSignalSm), refShiftSm,
refSeq = str(Fasta(refFilePath)[ctg][fromRef:toRef]) else: refSeq = str(-Fasta(refFilePath)[ctg][fromRef:toRef]) refSignal = np.array( stringToSignal(refSeq, mod, repeatSignal=repeatSignal), float) readSignal = np.array(getSignalFromRead(posRead), dtype=float) readSignal = readSignal[fromSignal:toSignal] readSignal = readSignal[:workingLen] refSignal = refSignal[:workingLen] readSignalSm = smoothSignal(readSignal, smoothParam) refSignalSm = smoothSignal(refSignal, smoothParam) readShift, readScale = computeNorm(readSignal, 0, len(readSignal)) readShiftSm, readScaleSm = computeNorm(readSignalSm, 0, len(readSignalSm)) refShift, refScale = computeNorm(refSignal, 0, len(refSignal)) refShiftSm, refScaleSm = computeNorm(refSignalSm, 0, len(refSignalSm)) readStrings, readStringsSm, refStrings, refStringsSm = {}, {}, {}, {} for l in levels: readStrings[l] = computeString( readSignal, 0, len(readSignal), readShift, readScale, l, overflow=0.30,
ref = Fasta(refFilePath) mod = KmerModel.load_from_hdf5(kmerModelFilePath) for contig in ref: refSeqPos = str(contig[:]) refSeqNeg = str(contig[:].complement) refSignalPos = np.array( stringToSignal(refSeqPos, mod, repeatSignal=repeatSignal), float) refSignalNeg = np.array( stringToSignal(refSeqNeg, mod, repeatSignal=repeatSignal), float) refSignalPos = smoothSignal(refSignalPos, smoothParam) refSignalNeg = smoothSignal(refSignalNeg, smoothParam) refSignalPosShift, refSignalPosScale = computeNorm(refSignalPos, 0, len(refSignalPos)) refSignalNegShift, refSignalNegScale = computeNorm(refSignalNeg, 0, len(refSignalNeg)) for l in levels: refStringPos = computeString( refSignalPos, 0, len(refSignalPos), refSignalPosShift, refSignalPosScale, l, overflow=overflow, ) refStringNeg = computeString(
hits = [ aln for aln in referenceIdx.map(readFastq) if aln.q_en - aln.q_st > 0.95 * len(readFastq) and aln.strand == 1 and aln.ctg == workingContig ] if len(hits) != 1: continue hit = hits[0] location = hit.r_st / len(Fasta(refFilePath)[hit.ctg]) print(f"I am in ctg {hit.ctg} in around {location}") refSeq = str(Fasta(refFilePath)[hit.ctg][hit.r_st:hit.r_en]) refSignal = stringToSignal(refSeq, mod, repeatSignal) refSignal = smoothSignal(refSignal, smoothParam) refShift, refScale = computeNorm(refSignal, 0, len(refSignal)) #refShift, refScale = globalNorms[hit.ctg][0], globalNorms[hit.ctg][1] refString = computeString(refSignal, 0, len(refSignal), refShift, refScale, level, overflow=overflow) refString = refString[10:len(refString) - 10] contigX = storeContig[hit.ctg] startInRef = -1 for e in range(len(contigX) - len(refString) + 1): w = contigX[e:e + len(refString)] if w == refString: