def tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput): fastqSequence.applyDrasticThreshold(threshold) f = open(reverseFileName, "r") reverseLineHeader = fastqSequence.getReverseLineHeader() logging.debug("Search " + reverseLineHeader) lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 parseFastq = False rFastqSequence = None lineNumber = 0 for line in f: line = line[:-1] lineNumber += 1 #logging.debug(str(lineNumber) +"\t**** "+line + "\t" + str(line[0] == "@") + "\t" + str(lineNumber == 1)) if line[0] == "@" and (lastSeparatorLineNumber != lineNumber - 1 or lineNumber == 1): logging.debug(line + " est un header") if parseFastq: rFastqSequence.applyDrasticThreshold(threshold) s = FastqSequence.matchFastqSequence(fastqSequence, rFastqSequence) if s != None: seqOutput.write(">" + s.getLineHeader() + "\n") seqOutput.write(s.sequence + "\n") logging.debug(s.getLineHeader() + "\t" + str(s.typeFusion)) if s.typeFusion == s.TYPE_FUSION_OK: log.write("Merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") elif s.typeFusion == s.TYPE_FUSION_NO_MATCH: log.write("Forced merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") elif s.typeFusion == s.TYPE_FUSION_PARTIAL: log.write("Partial merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") else: log.write("Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") else: log.write("*Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") parseFastq = False #logging.debug("-"+line) #logging.debug("-"+reverseLineHeader) #logging.debug(line + " =?= " + reverseLineHeader + " ? " + str(line == reverseLineHeader)) if line == reverseLineHeader: parseFastq = True if parseFastq: rFastqSequence = FastqSequence.readFasqSequenceHeader(line) #logging.debug("Compare to " + rFastqSequence.getLineHeader()) lastHeaderLineNumber = lineNumber elif parseFastq: if lastHeaderLineNumber == lineNumber - 1: rFastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: rFastqSequence.setQuality(line) f.close()
def testQualityThreshold(fastqFileName, threshold): f = open(fastqFileName, "r") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) avLen = len(fastqSequence.sequence) fastqSequence.applyDrasticThreshold(threshold) logger.debug("Avant : " + str(avLen) + " / Apres : " + str(len(fastqSequence.sequence))) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber logging.debug(line) elif lastHeaderLineNumber == lineNumber - 1: fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) f.close()
def readFastQFile(fileName, reverseFileName, threshold, seqOutputName, logFileName): f = open(fileName, "r") log = open(logFileName, "w") seqOutput = open(seqOutputName, "w") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber logging.debug(line) elif lastHeaderLineNumber == lineNumber - 1: fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) f.close() log.close() seqOutput.close()
def readFastQFile(fileName, reverseFileName, threshold, logFileName): f = open(fileName, "r") log = open(logFileName, "w") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: tryToFusion(fastqSequence, reverseFileName, threshold, log) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber elif lastHeaderLineNumber == lineNumber - 1: fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 tryToFusion(fastqSequence, reverseFileName, threshold, log) f.close() log.close()
def cutFusionAndMerge(headerF, dataF, headerR, dataR, wellThresholds, log, outputFile): logger.debug("[cutFusionAndMerge] " + headerF + ", " + headerR + ", " + str(wellThresholds)) forward = 0 reverse = 1 dataF = dataF[0:wellThresholds[forward]] #a = "ATAACGCTGTTATCCCTGCGGTAACTTGTTCTTTTGATCACTGTAAGTGGATCACACCTTCATTTTTATGATTTAAGAAAAACAATTCTTTTATTTTAGGTTAATATAACCATATAGTAGCGGAGGATTTTCTTTCTCCGGGATTGCCCCAATCAAAGCTTGTTTCAATTTGCCATGCTCTAGGCCTACTATTTCTATTATATTAGTTAGGGCTAATAGTAAATAACAATTAAAATTCAACTACAGCTCG" #dataR = DnaUtils.complementAndReverseDnaSequence(dataR[0:wellThresholds[reverse]]) cdataR = DnaUtils.complementAndReverseDnaSequence(dataR) cdataR = cdataR[:wellThresholds[reverse]] #cdataR = DnaUtils.complementAndReverseDnaSequence(cdataR) #data = merge(dataF, cdataR) # print(dataF) #print(DnaUtils.complementAndReverseDnaSequence(cdataR)) fastqSequenceForward = FastqSequence.createFasqSequenceHeader(headerF) fastqSequenceForward.setSequence(dataF, True) fastqSequenceForward.quality = None fastqSequenceReverse = FastqSequence.createFasqSequenceHeader(headerR) fastqSequenceReverse.setSequence(cdataR) fastqSequenceReverse.quality = None newFastQ = FastqSequence.matchFastqSequence(fastqSequenceForward, fastqSequenceReverse) logging.debug(newFastQ.getLineHeader() + "\t" + str(newFastQ.typeFusion)) if newFastQ.typeFusion == newFastQ.TYPE_FUSION_OK: log.write("Merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n") elif newFastQ.typeFusion == newFastQ.TYPE_FUSION_NO_MATCH: log.write("Forced merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n") elif newFastQ.typeFusion == newFastQ.TYPE_FUSION_PARTIAL: log.write("Partial merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n") else: log.write("Can't merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n") #outputFile.write(">" + newFastQ.getLineHeader() + "\n") #outputFile.write(newFastQ.sequence + "\n") #sys.exit(1) return newFastQ
def tryToFusion(fastqSequence, reverseFileName, threshold, log): fastqSequence f = open(reverseFileName, "r") reverseLineHeader = fastqSequence.getReverseLineHeader() lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 parseFastq = False rFastqSequence = None lineNumber = 0 for line in f: line = line[:-1] lineNumber += 1 if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if parseFastq: rFastqSequence.applyDrasticThreshold(threshold) s = FastqSequence.matchFastqSequence(fastqSequence, rFastqSequence) if s != None: print(s.getLineHeader()) print(s.sequence) else: log.write("Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n") parseFastq = False if line == reverseLineHeader: parseFastq = True if parseFastq: rFastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber elif parseFastq: if lastHeaderLineNumber == lineNumber - 1: rFastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: rFastqSequence.setQuality(line) f.close()
def readSingleFastQFile(fileName, threshold, logFileName, mapOligos, outputDir): f = open(fileName, "r") log = open(logFileName, "w") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) logger.debug("before threshold " + fastqSequence.sequence) fastqSequence.applyDrasticThreshold(threshold) logger.debug("after threshold " + fastqSequence.sequence) logger.debug("after threshold " + DnaUtils.complementAndReverseDnaSequence( fastqSequence.sequence)) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber logging.debug(line) elif lastHeaderLineNumber == lineNumber - 1: logging.debug("seq in file : " + line) fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) fastqSequence.applyDrasticThreshold(threshold) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) f.close() log.close()
def readSingleFastQFile(fileName, threshold, logFileName, mapOligos, outputDir): f = open(fileName, "r") log = open(logFileName, "w") lineNumber = 0 lastHeaderLineNumber = 0 lastSeparatorLineNumber = 0 sequences = [] fastqSequence = None for line in f: line = line[:-1] if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1: if fastqSequence != None: #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) logger.debug("before threshold " + fastqSequence.sequence) fastqSequence.applyDrasticThreshold(threshold) logger.debug("after threshold " + fastqSequence.sequence) logger.debug("after threshold " + DnaUtils.complementAndReverseDnaSequence(fastqSequence.sequence)) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) fastqSequence = FastqSequence.readFasqSequenceHeader(line) lastHeaderLineNumber = lineNumber logging.debug(line) elif lastHeaderLineNumber == lineNumber - 1: logging.debug("seq in file : " + line) fastqSequence.setSequence(line) elif line[0] == "+": lastSeparatorLineNumber = lineNumber elif lastSeparatorLineNumber == lineNumber - 1: fastqSequence.setQuality(line) lineNumber += 1 #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput) fastqSequence.applyDrasticThreshold(threshold) DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName) f.close() log.close()