Example #1
0
File: test.py Project: chaby/dana
def tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput):
    fastqSequence.applyDrasticThreshold(threshold)
    f = open(reverseFileName, "r")
    reverseLineHeader = fastqSequence.getReverseLineHeader()
    logging.debug("Search " + reverseLineHeader)
    
    lastHeaderLineNumber    = 0
    lastSeparatorLineNumber = 0
    parseFastq     = False
    rFastqSequence = None
    lineNumber     = 0
    
    for line in f:
        line = line[:-1]
        lineNumber += 1
        #logging.debug(str(lineNumber) +"\t****   "+line + "\t" + str(line[0] == "@") + "\t" + str(lineNumber == 1))
        
        if line[0] == "@" and (lastSeparatorLineNumber != lineNumber - 1 or lineNumber == 1):
            logging.debug(line + " est un header")
            if parseFastq:
                rFastqSequence.applyDrasticThreshold(threshold)
                
                s = FastqSequence.matchFastqSequence(fastqSequence, rFastqSequence)
                if s != None:
                    seqOutput.write(">" + s.getLineHeader() + "\n")
                    seqOutput.write(s.sequence + "\n")
                    logging.debug(s.getLineHeader() + "\t" + str(s.typeFusion))
                    if s.typeFusion == s.TYPE_FUSION_OK:
                        log.write("Merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                    elif s.typeFusion == s.TYPE_FUSION_NO_MATCH:
                        log.write("Forced merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                    elif s.typeFusion == s.TYPE_FUSION_PARTIAL:
                        log.write("Partial merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                    else:
                        log.write("Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                else:
                    log.write("*Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                parseFastq = False
                
            #logging.debug("-"+line)
            #logging.debug("-"+reverseLineHeader)
            #logging.debug(line + " =?= " + reverseLineHeader + " ? " + str(line == reverseLineHeader))
            if line == reverseLineHeader:
                parseFastq = True
                
            if parseFastq:
                rFastqSequence = FastqSequence.readFasqSequenceHeader(line)
                #logging.debug("Compare to " + rFastqSequence.getLineHeader())
                lastHeaderLineNumber = lineNumber
        elif parseFastq:
            if  lastHeaderLineNumber == lineNumber - 1:
                rFastqSequence.setSequence(line)
            elif line[0] == "+":
                lastSeparatorLineNumber = lineNumber
            elif lastSeparatorLineNumber == lineNumber - 1:
                rFastqSequence.setQuality(line)                
            
    f.close()
Example #2
0
def testQualityThreshold(fastqFileName, threshold):
    f = open(fastqFileName, "r")

    lineNumber = 0
    lastHeaderLineNumber = 0
    lastSeparatorLineNumber = 0
    sequences = []
    fastqSequence = None

    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
                avLen = len(fastqSequence.sequence)
                fastqSequence.applyDrasticThreshold(threshold)
                logger.debug("Avant : " + str(avLen) + " / Apres : " +
                             str(len(fastqSequence.sequence)))

            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber

            logging.debug(line)
        elif lastHeaderLineNumber == lineNumber - 1:
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)

        lineNumber += 1
    #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
    f.close()
Example #3
0
def readFastQFile(fileName, reverseFileName, threshold, seqOutputName,
                  logFileName):
    f = open(fileName, "r")
    log = open(logFileName, "w")
    seqOutput = open(seqOutputName, "w")

    lineNumber = 0
    lastHeaderLineNumber = 0
    lastSeparatorLineNumber = 0
    sequences = []
    fastqSequence = None

    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                tryToFusion(fastqSequence, reverseFileName, threshold, log,
                            seqOutput)

            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber

            logging.debug(line)
        elif lastHeaderLineNumber == lineNumber - 1:
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)

        lineNumber += 1
    tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
    f.close()
    log.close()
    seqOutput.close()
Example #4
0
def readFastQFile(fileName, reverseFileName, threshold, logFileName):
    f = open(fileName, "r")
    log = open(logFileName, "w")
    
    lineNumber = 0
    lastHeaderLineNumber    = 0
    lastSeparatorLineNumber = 0
    sequences  = []
    fastqSequence = None
    
    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                tryToFusion(fastqSequence, reverseFileName, threshold, log)
                
            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber
        elif  lastHeaderLineNumber == lineNumber - 1:
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)
            
        lineNumber += 1
    tryToFusion(fastqSequence, reverseFileName, threshold, log)
    f.close()
    log.close()
Example #5
0
File: test.py Project: chaby/dana
def testQualityThreshold(fastqFileName, threshold):
    f = open(fastqFileName, "r")
    
    lineNumber = 0
    lastHeaderLineNumber    = 0
    lastSeparatorLineNumber = 0
    sequences  = []
    fastqSequence = None
    
    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
                avLen = len(fastqSequence.sequence)
                fastqSequence.applyDrasticThreshold(threshold)
                logger.debug("Avant : " + str(avLen) + " / Apres : " + str(len(fastqSequence.sequence)))
                
            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber
            
            logging.debug(line)
        elif  lastHeaderLineNumber == lineNumber - 1:
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)
            
        lineNumber += 1
    #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
    f.close()
Example #6
0
def cutFusionAndMerge(headerF, dataF, headerR, dataR, wellThresholds, log,
                      outputFile):
    logger.debug("[cutFusionAndMerge] " + headerF + ", " + headerR + ", " +
                 str(wellThresholds))

    forward = 0
    reverse = 1

    dataF = dataF[0:wellThresholds[forward]]
    #a = "ATAACGCTGTTATCCCTGCGGTAACTTGTTCTTTTGATCACTGTAAGTGGATCACACCTTCATTTTTATGATTTAAGAAAAACAATTCTTTTATTTTAGGTTAATATAACCATATAGTAGCGGAGGATTTTCTTTCTCCGGGATTGCCCCAATCAAAGCTTGTTTCAATTTGCCATGCTCTAGGCCTACTATTTCTATTATATTAGTTAGGGCTAATAGTAAATAACAATTAAAATTCAACTACAGCTCG"
    #dataR = DnaUtils.complementAndReverseDnaSequence(dataR[0:wellThresholds[reverse]])
    cdataR = DnaUtils.complementAndReverseDnaSequence(dataR)
    cdataR = cdataR[:wellThresholds[reverse]]
    #cdataR = DnaUtils.complementAndReverseDnaSequence(cdataR)
    #data   = merge(dataF, cdataR)
    # print(dataF)
    #print(DnaUtils.complementAndReverseDnaSequence(cdataR))

    fastqSequenceForward = FastqSequence.createFasqSequenceHeader(headerF)
    fastqSequenceForward.setSequence(dataF, True)
    fastqSequenceForward.quality = None

    fastqSequenceReverse = FastqSequence.createFasqSequenceHeader(headerR)
    fastqSequenceReverse.setSequence(cdataR)
    fastqSequenceReverse.quality = None

    newFastQ = FastqSequence.matchFastqSequence(fastqSequenceForward,
                                                fastqSequenceReverse)

    logging.debug(newFastQ.getLineHeader() + "\t" + str(newFastQ.typeFusion))
    if newFastQ.typeFusion == newFastQ.TYPE_FUSION_OK:
        log.write("Merge " + fastqSequenceForward.getLineHeader() + " with " +
                  fastqSequenceReverse.getLineHeader() + "\n")
    elif newFastQ.typeFusion == newFastQ.TYPE_FUSION_NO_MATCH:
        log.write("Forced merge " + fastqSequenceForward.getLineHeader() +
                  " with " + fastqSequenceReverse.getLineHeader() + "\n")
    elif newFastQ.typeFusion == newFastQ.TYPE_FUSION_PARTIAL:
        log.write("Partial merge " + fastqSequenceForward.getLineHeader() +
                  " with " + fastqSequenceReverse.getLineHeader() + "\n")
    else:
        log.write("Can't merge " + fastqSequenceForward.getLineHeader() +
                  " with " + fastqSequenceReverse.getLineHeader() + "\n")

    #outputFile.write(">" + newFastQ.getLineHeader() + "\n")
    #outputFile.write(newFastQ.sequence + "\n")
    #sys.exit(1)
    return newFastQ
Example #7
0
def tryToFusion(fastqSequence, reverseFileName, threshold, log):
    fastqSequence
    f = open(reverseFileName, "r")
    reverseLineHeader = fastqSequence.getReverseLineHeader()
    
    lastHeaderLineNumber    = 0
    lastSeparatorLineNumber = 0
    parseFastq     = False
    rFastqSequence = None
    lineNumber     = 0
    
    for line in f:
        line = line[:-1]
        lineNumber += 1
        
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if parseFastq:
                rFastqSequence.applyDrasticThreshold(threshold)
                s = FastqSequence.matchFastqSequence(fastqSequence, rFastqSequence)
                if s != None:
                    print(s.getLineHeader())
                    print(s.sequence)
                else:
                    log.write("Can't merge " + fastqSequence.getLineHeader() + " with " + rFastqSequence.getLineHeader() + "\n")
                parseFastq = False
                
            if line == reverseLineHeader:
                parseFastq = True
                
            if parseFastq:
                rFastqSequence = FastqSequence.readFasqSequenceHeader(line)
                lastHeaderLineNumber = lineNumber
        elif parseFastq:
            if  lastHeaderLineNumber == lineNumber - 1:
                rFastqSequence.setSequence(line)
            elif line[0] == "+":
                lastSeparatorLineNumber = lineNumber
            elif lastSeparatorLineNumber == lineNumber - 1:
                rFastqSequence.setQuality(line)                
            
    f.close()
Example #8
0
File: test.py Project: chaby/dana
def cutFusionAndMerge(headerF, dataF, headerR, dataR, wellThresholds, log, outputFile):
    logger.debug("[cutFusionAndMerge] " + headerF + ", " + headerR + ", " + str(wellThresholds))
    
    forward  = 0
    reverse  = 1
    
    dataF = dataF[0:wellThresholds[forward]]
    #a = "ATAACGCTGTTATCCCTGCGGTAACTTGTTCTTTTGATCACTGTAAGTGGATCACACCTTCATTTTTATGATTTAAGAAAAACAATTCTTTTATTTTAGGTTAATATAACCATATAGTAGCGGAGGATTTTCTTTCTCCGGGATTGCCCCAATCAAAGCTTGTTTCAATTTGCCATGCTCTAGGCCTACTATTTCTATTATATTAGTTAGGGCTAATAGTAAATAACAATTAAAATTCAACTACAGCTCG"
    #dataR = DnaUtils.complementAndReverseDnaSequence(dataR[0:wellThresholds[reverse]])
    cdataR = DnaUtils.complementAndReverseDnaSequence(dataR)
    cdataR = cdataR[:wellThresholds[reverse]]
    #cdataR = DnaUtils.complementAndReverseDnaSequence(cdataR)
    #data   = merge(dataF, cdataR)
    # print(dataF)
    #print(DnaUtils.complementAndReverseDnaSequence(cdataR))
    
    fastqSequenceForward = FastqSequence.createFasqSequenceHeader(headerF)
    fastqSequenceForward.setSequence(dataF, True)
    fastqSequenceForward.quality = None
    
    fastqSequenceReverse = FastqSequence.createFasqSequenceHeader(headerR)
    fastqSequenceReverse.setSequence(cdataR)
    fastqSequenceReverse.quality = None
    
    newFastQ = FastqSequence.matchFastqSequence(fastqSequenceForward, fastqSequenceReverse)
    
    logging.debug(newFastQ.getLineHeader() + "\t" + str(newFastQ.typeFusion))
    if newFastQ.typeFusion == newFastQ.TYPE_FUSION_OK:
        log.write("Merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n")
    elif newFastQ.typeFusion == newFastQ.TYPE_FUSION_NO_MATCH:
        log.write("Forced merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n")
    elif newFastQ.typeFusion == newFastQ.TYPE_FUSION_PARTIAL:
        log.write("Partial merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n")
    else:
        log.write("Can't merge " + fastqSequenceForward.getLineHeader() + " with " + fastqSequenceReverse.getLineHeader() + "\n")
        
    #outputFile.write(">" + newFastQ.getLineHeader() + "\n")
    #outputFile.write(newFastQ.sequence + "\n")
    #sys.exit(1)
    return newFastQ
Example #9
0
def readSingleFastQFile(fileName, threshold, logFileName, mapOligos,
                        outputDir):
    f = open(fileName, "r")
    log = open(logFileName, "w")

    lineNumber = 0
    lastHeaderLineNumber = 0
    lastSeparatorLineNumber = 0
    sequences = []
    fastqSequence = None

    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
                logger.debug("before threshold " + fastqSequence.sequence)
                fastqSequence.applyDrasticThreshold(threshold)
                logger.debug("after threshold " + fastqSequence.sequence)
                logger.debug("after threshold " +
                             DnaUtils.complementAndReverseDnaSequence(
                                 fastqSequence.sequence))
                DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence,
                                      fileName)

            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber

            logging.debug(line)
        elif lastHeaderLineNumber == lineNumber - 1:
            logging.debug("seq in file : " + line)
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)

        lineNumber += 1
    #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
    fastqSequence.applyDrasticThreshold(threshold)
    DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName)
    f.close()
    log.close()
Example #10
0
File: test.py Project: chaby/dana
def readSingleFastQFile(fileName, threshold, logFileName, mapOligos, outputDir):
    f = open(fileName, "r")
    log = open(logFileName, "w")
    
    lineNumber = 0
    lastHeaderLineNumber    = 0
    lastSeparatorLineNumber = 0
    sequences  = []
    fastqSequence = None
    
    for line in f:
        line = line[:-1]
        if line[0] == "@" and lastSeparatorLineNumber != lineNumber - 1:
            if fastqSequence != None:
                #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
                logger.debug("before threshold " + fastqSequence.sequence)
                fastqSequence.applyDrasticThreshold(threshold)
                logger.debug("after threshold " + fastqSequence.sequence)
                logger.debug("after threshold " + DnaUtils.complementAndReverseDnaSequence(fastqSequence.sequence))
                DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName)
                
            fastqSequence = FastqSequence.readFasqSequenceHeader(line)
            lastHeaderLineNumber = lineNumber
            
            logging.debug(line)
        elif  lastHeaderLineNumber == lineNumber - 1:
            logging.debug("seq in file : " + line)
            fastqSequence.setSequence(line)
        elif line[0] == "+":
            lastSeparatorLineNumber = lineNumber
        elif lastSeparatorLineNumber == lineNumber - 1:
            fastqSequence.setQuality(line)
            
        lineNumber += 1
    #tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput)
    fastqSequence.applyDrasticThreshold(threshold)
    DnaUtils.splitMarker1(mapOligos, outputDir, fastqSequence, fileName)
    f.close()
    log.close()
Example #11
0
def tryToFusion(fastqSequence, reverseFileName, threshold, log, seqOutput):
    fastqSequence.applyDrasticThreshold(threshold)
    f = open(reverseFileName, "r")
    reverseLineHeader = fastqSequence.getReverseLineHeader()
    logging.debug("Search " + reverseLineHeader)

    lastHeaderLineNumber = 0
    lastSeparatorLineNumber = 0
    parseFastq = False
    rFastqSequence = None
    lineNumber = 0

    for line in f:
        line = line[:-1]
        lineNumber += 1
        #logging.debug(str(lineNumber) +"\t****   "+line + "\t" + str(line[0] == "@") + "\t" + str(lineNumber == 1))

        if line[0] == "@" and (lastSeparatorLineNumber != lineNumber - 1
                               or lineNumber == 1):
            logging.debug(line + " est un header")
            if parseFastq:
                rFastqSequence.applyDrasticThreshold(threshold)

                s = FastqSequence.matchFastqSequence(fastqSequence,
                                                     rFastqSequence)
                if s != None:
                    seqOutput.write(">" + s.getLineHeader() + "\n")
                    seqOutput.write(s.sequence + "\n")
                    logging.debug(s.getLineHeader() + "\t" + str(s.typeFusion))
                    if s.typeFusion == s.TYPE_FUSION_OK:
                        log.write("Merge " + fastqSequence.getLineHeader() +
                                  " with " + rFastqSequence.getLineHeader() +
                                  "\n")
                    elif s.typeFusion == s.TYPE_FUSION_NO_MATCH:
                        log.write("Forced merge " +
                                  fastqSequence.getLineHeader() + " with " +
                                  rFastqSequence.getLineHeader() + "\n")
                    elif s.typeFusion == s.TYPE_FUSION_PARTIAL:
                        log.write("Partial merge " +
                                  fastqSequence.getLineHeader() + " with " +
                                  rFastqSequence.getLineHeader() + "\n")
                    else:
                        log.write("Can't merge " +
                                  fastqSequence.getLineHeader() + " with " +
                                  rFastqSequence.getLineHeader() + "\n")
                else:
                    log.write("*Can't merge " + fastqSequence.getLineHeader() +
                              " with " + rFastqSequence.getLineHeader() + "\n")
                parseFastq = False

            #logging.debug("-"+line)
            #logging.debug("-"+reverseLineHeader)
            #logging.debug(line + " =?= " + reverseLineHeader + " ? " + str(line == reverseLineHeader))
            if line == reverseLineHeader:
                parseFastq = True

            if parseFastq:
                rFastqSequence = FastqSequence.readFasqSequenceHeader(line)
                #logging.debug("Compare to " + rFastqSequence.getLineHeader())
                lastHeaderLineNumber = lineNumber
        elif parseFastq:
            if lastHeaderLineNumber == lineNumber - 1:
                rFastqSequence.setSequence(line)
            elif line[0] == "+":
                lastSeparatorLineNumber = lineNumber
            elif lastSeparatorLineNumber == lineNumber - 1:
                rFastqSequence.setQuality(line)

    f.close()