def processRead(path, contig_name=None): print(path) readSignal = np.array(getSignalFromRead(path), dtype=float) readSignal = readSignal[fromRead:toRead] readLevelString = getGlobalString(readSignal) helperDict = {"a": "A", "b": "C", "c": "G", "d": "T"} helperString = "".join(helperDict[i] for i in readLevelString) levelHits = list(index.map(helperString)) if len(levelHits) == 0: print("Return False.") return False for hit in levelHits: diff = (hit.r_en - hit.r_st) - (hit.q_en - hit.q_st) print("{0}: {1} vs {2}".format(hit.ctg, hit.r_en - hit.r_st, hit.q_en - hit.q_st)) print("Position of hit is {0}".format(hit.r_st / lengths[hit.ctg])) if diff < 0.05 * (hit.q_en - hit.q_st): a, b = stringAllignment(str(refFasta[hit.ctg][hit.r_st:hit.r_en]), helperString[hit.q_st:hit.q_en]) for i in range(1, 20): print(i, ":", countDashes(a, i) + countDashes(b, i)) if contig_name != None and hit.ctg != contig_name: print("Zle urceny contig!") print("Return False.") return False print("Return True.") return True print("Return False.") return False
from signalHelper import stringAllignment assert stringAllignment("ACAATG", "ACAATG") == ("ACAATG", "ACAATG") assert stringAllignment("ACXAATG", "ACAATG") == ("ACXAATG", "AC-AATG") assert stringAllignment("TTT", "CCC") == ("---TTT", "CCC---") from signalHelper import countDashes assert countDashes("AAAC---AC--", 3) == 1 assert countDashes("AAAC---AC--", 2) == 1 assert countDashes("AAAC----A----", 4) == 2
refScale, l, overflow=0.30, ) fakeStrings[l] = computeString( fakeSignal, 0, len(fakeSignal), fakeShift, fakeScale, l, overflow=0.30, ) for l in levels: a, b = stringAllignment(refStrings[l], readStrings[l]) c, d = stringAllignment(refStrings[l], fakeStrings[l]) # alignLenRead[levels.index(l)] += len(a) # alignLenFake[levels.index(l)] += len(c) alignLenRead[levels.index(l)] += len(readStrings[l]) alignLenFake[levels.index(l)] += len(fakeStrings[l]) # a = a[:300] # b = b[:300] # c = c[:300] # d = d[:300] dashes1 = [countDashes(a, i) + countDashes(b, i) for i in range(1, 21)] dashes2 = [countDashes(c, i) + countDashes(d, i) for i in range(1, 21)]
fakeString = computeString(fakeSignal, 0, len(fakeSignal), fakeSignalShift, fakeSignalScale, levels, overflow=0.30) print("refstring vs Readstring vs fakestring") print(refString) print(readString) print(fakeString) print("Readstring allignment") a, b = stringAllignment(refString, readString) print(a) print(b) print("Fakestring allignment") c, d = stringAllignment(refString, fakeString) print(c) print(d) y = np.array(y, float) refSignal = np.array(refSignal, float) y -= ySignalShift y /= ySignalScale y[y < minSignal] = minSignal
fakeShiftSm, fakeScaleSm, levels, overflow=0.25, ) # print("readsm-1:",readString2Sm) # print("refsm-1 :",refString2Sm) # print("fakesm-1:",fakeString2Sm) over1 = overlappingKmers(readString2Sm, refString2Sm, kmerLen) over2 = overlappingKmers(fakeString2Sm, refString2Sm, kmerLen) print("Overlap of len {0} -> good:{1} vs fake:{2}".format( kmerLen, over1, over2)) a, b = stringAllignment(refString2Sm, readString2Sm) c, d = stringAllignment(refString2Sm, fakeString2Sm) oldGoodK = goodK for i in range(0, len(a) - kmerLen + 1): totalK += 1 goodK += 1 for j in range(i, i + kmerLen): if a[j] == "-" or b[j] == "-": goodK -= 1 break if goodK != oldGoodK: dobre += 1 print(f"Pocet {kmerLen}-ic je {goodK-oldGoodK}") else:
hitLen = levelHits[0].r_en-levelHits[0].r_st print("Hit len is {0}".format(hitLen)) print("Goodreads to all reads {0}/{1}".format(goodPosReads, counter)) ''' for i in levelHits: print("Hit len is {0} from {1} to {2}".format(i.r_en-i.r_st, i.q_st, i.q_en)) print("Len of reference is {0}".format(len(contig))) print("Len of refstring is {0}".format(len(refString))) print("Hit je v {0}".format(hits[0].r_st/len(contig))) print("Hit je v {0}".format(i.r_st/len(refString))) counter += 1 ''' a, b = stringAllignment(refString[levelHits[0].r_st:levelHits[0].r_en], readString[levelHits[0].q_st:levelHits[0].q_en]) for i in range(1, 20): print(i, ":", countDashes(a, i)+countDashes(b, i)) #x = overlappingKmers(refString[levelHits[0].r_st:levelHits[0].r_en],readString[levelHits[0].q_st:levelHits[0].q_en],kmerLen) #print("Overlaps is {0}".format(x)) if counter >= posCounter: break print("\n\nNegative reads!\n") print("*"*200) for readFile in negReadsFiles[:negCounter]: print(readFile)