def getGlobalString(signal):
    signal = smoothSignal(signal, smoothParam)
    signalShift, signalScale = computeNorm(signal, 0, len(signal))
    signalString = computeString(signal,
                                 0,
                                 len(signal),
                                 signalShift,
                                 signalScale,
                                 levels,
                                 overflow=overflow)
    return signalString
Exemple #2
0
def getLevelStr(signal, l):
    currSignal = np.array(copy.deepcopy(signal), float)
    currSignal = smoothSignal(currSignal, smoothParam)
    currSignalShift, currSignalScale = computeNorm(currSignal, 0,
                                                   len(currSignal))
    currString = computeString(
        currSignal,
        0,
        len(currSignal),
        currSignalShift,
        currSignalScale,
        l,
        overflow=overflow,
    )
    return currString
Exemple #3
0
def getDictFromSequence(signal, refWindowSize, refWindowJump):
    dic = {}
    for winBeg in range(0, len(signal) - refWindowSize + 1, refWindowJump):
        winEnd = winBeg + refWindowSize
        currSignal = np.array(copy.deepcopy(signal[winBeg:winEnd]), float)
        currSignal = smoothSignal(currSignal, smoothParam)
        currSignalShift, currSignalScale = computeNorm(currSignal, 0,
                                                       refWindowSize)
        currString = computeString(currSignal,
                                   0,
                                   refWindowSize,
                                   currSignalShift,
                                   currSignalScale,
                                   levels,
                                   overflow=overflow)
        dic.update(buildDictionary(currString, kmerLength))
    return dic
Exemple #4
0
def getDictFromSequence(signal, l=False):
    dic = {}
    currSignal = np.array(copy.deepcopy(signal), float)
    currSignal = smoothSignal(currSignal, smoothParam)
    currSignalShift, currSignalScale = computeNorm(currSignal, 0,
                                                   len(currSignal))
    currString = computeString(
        currSignal,
        0,
        len(currSignal),
        currSignalShift,
        currSignalScale,
        levels,
        overflow=overflow,
    )
    if l == True:
        return currString
    return buildDictionarySpecial(currString, kmerLength)
Exemple #5
0
    fakeSignal = []
    fakeIndex = -1
    while len(fakeSignal) <= toSignal:
        fakeIndex = random.randint(0, len(negReadsPaths) - 1)
        fakeSignal = np.array(getSignalFromRead(negReadsPaths[fakeIndex]),
                              dtype=float)
    fakeSignal = fakeSignal[fromSignal:toSignal]

    readSignal = readSignal[:workingLen]
    refSignal = refSignal[:workingLen]
    fakeSignal = fakeSignal[:workingLen]

    readSignal = smoothSignal(readSignal, smoothParam)
    refSignal = smoothSignal(refSignal, smoothParam)
    fakeSignal = smoothSignal(fakeSignal, smoothParam)
    readShift, readScale = computeNorm(readSignal, 0, len(readSignal))
    refShift, refScale = computeNorm(refSignal, 0, len(refSignal))
    fakeShift, fakeScale = computeNorm(fakeSignal, 0, len(fakeSignal))

    readStrings, refStrings, fakeStrings = {}, {}, {}

    for l in levels:
        readStrings[l] = computeString(
            readSignal,
            0,
            len(readSignal),
            readShift,
            readScale,
            l,
            overflow=0.30,
        )
#plt.plot(y)
#plt.xticks(y_pos, x, color='orange', rotation=45, fontweight='bold', horizontalalignment='right')
#plt.tick_params(labelbottom='off')

refSignal = stringToSignal(refSeq, mod, repeatSignal=repeatSignal)
refSeqHelper = []

for i in refSeq:
    refSeqHelper.append(i)
    for k in range(repeatSignal - 1):
        refSeqHelper.append("_")

y = smoothSignal(y, 5)
refSignal = smoothSignal(refSignal, 5)

ySignalShift, ySignalScale = computeNorm(y, 0, len(y))
#y -= ySignalShift
#y /= ySignalScale

refSignalShift, refSignalScale = computeNorm(refSignal, 0, len(refSignal))
#refSignal -= refSignalShift
#refSignal /= refSignalScale

readString = computeString(y,
                           0,
                           len(y),
                           ySignalShift,
                           ySignalScale,
                           levels,
                           overflow=0.30)
refString = computeString(refSignal,
Exemple #7
0
    refSignal = np.array(
        stringToSignal(refSeq, mod, repeatSignal=repeatSignal), float)
    # fakeSignal = np.array(stringToSignal(fakeSeq, mod, repeatSignal = repeatSignal),
    #                float)
    fakeSignal = []
    fakeIndex = -1
    while len(fakeSignal) <= signalTo:
        fakeIndex = random.randint(0, len(negReads) - 1)
        fakeSignal = np.array(getSignalFromRead(negReads[fakeIndex]),
                              dtype=float)
    fakeSignal = fakeSignal[signalFrom:signalTo]

    readSignalSm = smoothSignal(readSignal, 5)
    refSignalSm = smoothSignal(refSignal, 5)
    fakeSignalSm = smoothSignal(fakeSignal, 5)
    readShiftSm, readScaleSm = computeNorm(readSignalSm, 0, len(readSignalSm))
    refShiftSm, refScaleSm = computeNorm(refSignalSm, 0, len(refSignalSm))
    fakeShiftSm, fakeScaleSm = computeNorm(fakeSignalSm, 0, len(fakeSignalSm))
    readString2Sm = computeString(
        readSignalSm,
        0,
        len(readSignalSm),
        readShiftSm,
        readScaleSm,
        levels,
        overflow=0.25,
    )
    refString2Sm = computeString(refSignalSm,
                                 0,
                                 len(refSignalSm),
                                 refShiftSm,
        refSeq = str(Fasta(refFilePath)[ctg][fromRef:toRef])
    else:
        refSeq = str(-Fasta(refFilePath)[ctg][fromRef:toRef])

    refSignal = np.array(
        stringToSignal(refSeq, mod, repeatSignal=repeatSignal), float)
    readSignal = np.array(getSignalFromRead(posRead), dtype=float)
    readSignal = readSignal[fromSignal:toSignal]

    readSignal = readSignal[:workingLen]
    refSignal = refSignal[:workingLen]

    readSignalSm = smoothSignal(readSignal, smoothParam)
    refSignalSm = smoothSignal(refSignal, smoothParam)

    readShift, readScale = computeNorm(readSignal, 0, len(readSignal))
    readShiftSm, readScaleSm = computeNorm(readSignalSm, 0, len(readSignalSm))
    refShift, refScale = computeNorm(refSignal, 0, len(refSignal))
    refShiftSm, refScaleSm = computeNorm(refSignalSm, 0, len(refSignalSm))

    readStrings, readStringsSm, refStrings, refStringsSm = {}, {}, {}, {}

    for l in levels:
        readStrings[l] = computeString(
            readSignal,
            0,
            len(readSignal),
            readShift,
            readScale,
            l,
            overflow=0.30,
Exemple #9
0
ref = Fasta(refFilePath)
mod = KmerModel.load_from_hdf5(kmerModelFilePath)

for contig in ref:
    refSeqPos = str(contig[:])
    refSeqNeg = str(contig[:].complement)

    refSignalPos = np.array(
        stringToSignal(refSeqPos, mod, repeatSignal=repeatSignal), float)
    refSignalNeg = np.array(
        stringToSignal(refSeqNeg, mod, repeatSignal=repeatSignal), float)

    refSignalPos = smoothSignal(refSignalPos, smoothParam)
    refSignalNeg = smoothSignal(refSignalNeg, smoothParam)

    refSignalPosShift, refSignalPosScale = computeNorm(refSignalPos, 0,
                                                       len(refSignalPos))
    refSignalNegShift, refSignalNegScale = computeNorm(refSignalNeg, 0,
                                                       len(refSignalNeg))

    for l in levels:
        refStringPos = computeString(
            refSignalPos,
            0,
            len(refSignalPos),
            refSignalPosShift,
            refSignalPosScale,
            l,
            overflow=overflow,
        )

        refStringNeg = computeString(
Exemple #10
0
    hits = [
        aln for aln in referenceIdx.map(readFastq)
        if aln.q_en - aln.q_st > 0.95 *
        len(readFastq) and aln.strand == 1 and aln.ctg == workingContig
    ]
    if len(hits) != 1:
        continue
    hit = hits[0]

    location = hit.r_st / len(Fasta(refFilePath)[hit.ctg])
    print(f"I am in ctg {hit.ctg} in around {location}")

    refSeq = str(Fasta(refFilePath)[hit.ctg][hit.r_st:hit.r_en])
    refSignal = stringToSignal(refSeq, mod, repeatSignal)
    refSignal = smoothSignal(refSignal, smoothParam)
    refShift, refScale = computeNorm(refSignal, 0, len(refSignal))
    #refShift, refScale = globalNorms[hit.ctg][0], globalNorms[hit.ctg][1]
    refString = computeString(refSignal,
                              0,
                              len(refSignal),
                              refShift,
                              refScale,
                              level,
                              overflow=overflow)
    refString = refString[10:len(refString) - 10]

    contigX = storeContig[hit.ctg]
    startInRef = -1
    for e in range(len(contigX) - len(refString) + 1):
        w = contigX[e:e + len(refString)]
        if w == refString: