def getGlobalString(signal): signal = smoothSignal(signal, smoothParam) signalShift, signalScale = computeNorm(signal, 0, len(signal)) signalString = computeString(signal, 0, len(signal), signalShift, signalScale, levels, overflow=overflow) return signalString
def getLevelStr(signal, l): currSignal = np.array(copy.deepcopy(signal), float) currSignal = smoothSignal(currSignal, smoothParam) currSignalShift, currSignalScale = computeNorm(currSignal, 0, len(currSignal)) currString = computeString( currSignal, 0, len(currSignal), currSignalShift, currSignalScale, l, overflow=overflow, ) return currString
def getDictFromSequence(signal, refWindowSize, refWindowJump): dic = {} for winBeg in range(0, len(signal) - refWindowSize + 1, refWindowJump): winEnd = winBeg + refWindowSize currSignal = np.array(copy.deepcopy(signal[winBeg:winEnd]), float) currSignal = smoothSignal(currSignal, smoothParam) currSignalShift, currSignalScale = computeNorm(currSignal, 0, refWindowSize) currString = computeString(currSignal, 0, refWindowSize, currSignalShift, currSignalScale, levels, overflow=overflow) dic.update(buildDictionary(currString, kmerLength)) return dic
def getDictFromSequence(signal, l=False): dic = {} currSignal = np.array(copy.deepcopy(signal), float) currSignal = smoothSignal(currSignal, smoothParam) currSignalShift, currSignalScale = computeNorm(currSignal, 0, len(currSignal)) currString = computeString( currSignal, 0, len(currSignal), currSignalShift, currSignalScale, levels, overflow=overflow, ) if l == True: return currString return buildDictionarySpecial(currString, kmerLength)
stringToSignal(refSeq, mod, repeatSignal=repeatSignal), float) readSignal = np.array(getSignalFromRead(posRead), dtype=float) readSignal = readSignal[fromSignal:toSignal] fakeSignal = [] fakeIndex = -1 while len(fakeSignal) <= toSignal: fakeIndex = random.randint(0, len(negReadsPaths) - 1) fakeSignal = np.array(getSignalFromRead(negReadsPaths[fakeIndex]), dtype=float) fakeSignal = fakeSignal[fromSignal:toSignal] readSignal = readSignal[:workingLen] refSignal = refSignal[:workingLen] fakeSignal = fakeSignal[:workingLen] readSignal = smoothSignal(readSignal, smoothParam) refSignal = smoothSignal(refSignal, smoothParam) fakeSignal = smoothSignal(fakeSignal, smoothParam) readShift, readScale = computeNorm(readSignal, 0, len(readSignal)) refShift, refScale = computeNorm(refSignal, 0, len(refSignal)) fakeShift, fakeScale = computeNorm(fakeSignal, 0, len(fakeSignal)) readStrings, refStrings, fakeStrings = {}, {}, {} for l in levels: readStrings[l] = computeString( readSignal, 0, len(readSignal), readShift, readScale,
x.append(" ") y.append(originalSignal[i]) #plt.plot(y) #plt.xticks(y_pos, x, color='orange', rotation=45, fontweight='bold', horizontalalignment='right') #plt.tick_params(labelbottom='off') refSignal = stringToSignal(refSeq, mod, repeatSignal=repeatSignal) refSeqHelper = [] for i in refSeq: refSeqHelper.append(i) for k in range(repeatSignal - 1): refSeqHelper.append("_") y = smoothSignal(y, 5) refSignal = smoothSignal(refSignal, 5) ySignalShift, ySignalScale = computeNorm(y, 0, len(y)) #y -= ySignalShift #y /= ySignalScale refSignalShift, refSignalScale = computeNorm(refSignal, 0, len(refSignal)) #refSignal -= refSignalShift #refSignal /= refSignalScale readString = computeString(y, 0, len(y), ySignalShift, ySignalScale,
readSignal = np.array(getSignalFromRead(readFile)[signalFrom:signalTo], dtype=float) refSignal = np.array( stringToSignal(refSeq, mod, repeatSignal=repeatSignal), float) # fakeSignal = np.array(stringToSignal(fakeSeq, mod, repeatSignal = repeatSignal), # float) fakeSignal = [] fakeIndex = -1 while len(fakeSignal) <= signalTo: fakeIndex = random.randint(0, len(negReads) - 1) fakeSignal = np.array(getSignalFromRead(negReads[fakeIndex]), dtype=float) fakeSignal = fakeSignal[signalFrom:signalTo] readSignalSm = smoothSignal(readSignal, 5) refSignalSm = smoothSignal(refSignal, 5) fakeSignalSm = smoothSignal(fakeSignal, 5) readShiftSm, readScaleSm = computeNorm(readSignalSm, 0, len(readSignalSm)) refShiftSm, refScaleSm = computeNorm(refSignalSm, 0, len(refSignalSm)) fakeShiftSm, fakeScaleSm = computeNorm(fakeSignalSm, 0, len(fakeSignalSm)) readString2Sm = computeString( readSignalSm, 0, len(readSignalSm), readShiftSm, readScaleSm, levels, overflow=0.25, ) refString2Sm = computeString(refSignalSm,
readCounter += 1 if strand == 1: refSeq = str(Fasta(refFilePath)[ctg][fromRef:toRef]) else: refSeq = str(-Fasta(refFilePath)[ctg][fromRef:toRef]) refSignal = np.array( stringToSignal(refSeq, mod, repeatSignal=repeatSignal), float) readSignal = np.array(getSignalFromRead(posRead), dtype=float) readSignal = readSignal[fromSignal:toSignal] readSignal = readSignal[:workingLen] refSignal = refSignal[:workingLen] readSignalSm = smoothSignal(readSignal, smoothParam) refSignalSm = smoothSignal(refSignal, smoothParam) readShift, readScale = computeNorm(readSignal, 0, len(readSignal)) readShiftSm, readScaleSm = computeNorm(readSignalSm, 0, len(readSignalSm)) refShift, refScale = computeNorm(refSignal, 0, len(refSignal)) refShiftSm, refScaleSm = computeNorm(refSignalSm, 0, len(refSignalSm)) readStrings, readStringsSm, refStrings, refStringsSm = {}, {}, {}, {} for l in levels: readStrings[l] = computeString( readSignal, 0, len(readSignal), readShift,
if aln.q_en - aln.q_st > 0.95 * len(readFastq) and aln.strand == 1 and aln.ctg == workingContig ] if len(hits) != 1: continue hit = hits[0] refPosition = hit.r_st / len(ref[hit.ctg]) print(f"I am in ctg {hit.ctg} in around {refPosition}") if hit.strand == 1: refSeq = str(ref[hit.ctg][hit.r_st : hit.r_en]) refSignal = stringToSignal(refSeq, mod, repeatSignal) refSignal = smoothSignal(refSignal, smoothParam) #refShift, refScale = computeNorm(refSignal, 0, len(refSignal)) refShift, refScale = globalNorms[hit.ctg][0], globalNorms[hit.ctg][1] refString = computeString( refSignal, 0, len(refSignal), refShift, refScale, level, overflow=overflow, ) refString = refString[5:-5] readSignal = getSignalFromRead(sample) readSignalLen = len(readSignal) readSignal = readSignal[readSignalBeg:readSignalEnd] readString = getLevelString(readSignal, smoothParam, level, overflow) found = None for i in range(len(storeContig[hit.ctg]) - len(refString) + 1): w = storeContig[hit.ctg][i : i + len(refString)]
refSignal = stringToSignal(refSeq, mod, repeatSignal=repeatSignal) refSignal = np.array(refSignal, dtype=float) readSignal = np.array(readSignal, dtype=float) refSignal -= np.mean(refSignal) readSignal -= np.mean(readSignal) refSignal /= np.std(refSignal) readSignal /= np.std(readSignal) refSignal[refSignal > 2.0] = 2.0 readSignal[readSignal < -2.0] = -2.0 refSignalAvg = smoothSignal(refSignal, smoothParam) readSignalAvg = smoothSignal(readSignal, smoothParam) refSignalMed = smoothSignalMed(refSignal, smoothParam) readSignalMed = smoothSignalMed(readSignal, smoothParam) f, (ax1, ax2, ax3) = plt.subplots(3, 2) ax1[0].plot(range(len(refSignal)), refSignal) ax1[1].plot(range(len(readSignal)), readSignal) ax2[0].plot(range(len(refSignalAvg)), refSignalAvg) ax2[1].plot(range(len(readSignalAvg)), readSignalAvg) ax3[0].plot(range(len(refSignalMed)), refSignalMed) ax3[1].plot(range(len(readSignalMed)), readSignalMed) ax1[0].set_ylim(bottom=-2.2, top=2.2) ax1[1].set_ylim(bottom=-2.2, top=2.2)
from signalHelper import stringToSignal from signalHelper import smoothSignal, computeNorm, computeString ref = Fasta(refFilePath) mod = KmerModel.load_from_hdf5(kmerModelFilePath) for contig in ref: refSeqPos = str(contig[:]) refSeqNeg = str(contig[:].complement) refSignalPos = np.array( stringToSignal(refSeqPos, mod, repeatSignal=repeatSignal), float) refSignalNeg = np.array( stringToSignal(refSeqNeg, mod, repeatSignal=repeatSignal), float) refSignalPos = smoothSignal(refSignalPos, smoothParam) refSignalNeg = smoothSignal(refSignalNeg, smoothParam) refSignalPosShift, refSignalPosScale = computeNorm(refSignalPos, 0, len(refSignalPos)) refSignalNegShift, refSignalNegScale = computeNorm(refSignalNeg, 0, len(refSignalNeg)) for l in levels: refStringPos = computeString( refSignalPos, 0, len(refSignalPos), refSignalPosShift, refSignalPosScale, l,