コード例 #1
0
ファイル: FASTQTrimmer.py プロジェクト: jstaf/fastqUtils
def main(argv):
    # parse arguments
    opts, args = getopt.getopt(argv[1:-1], 'ho:q:', longopts = ['help'])

    qmin = 30
    outputName = ''

    for option, val in opts:
        if option in ['-h', '--help']:
            print(helpString)
            sys.exit()
        elif option == '-o':
            outputName = val
        elif option == '-q':
            qmin = float(val)
        else:
            sys.exit('Unsupported parameter.')
    # deal with invalid paths
    if (len(argv) == 1) or (not os.path.isfile(argv[-1])):
        print(helpString)
        sys.exit('Valid input file required.')
    else:
        inputName = argv[-1]

    parser = FASTQParser(inputName)
    if outputName != '':
        outFile = open(outputName, 'w')
    # read file and print back lines that pass the filter
    while True:
        read = parser.nextRead()
        # stop at EOF
        if read['quals'] == '':
            break
        numQual = encoding2num(read['quals'], parser.encoding)
        # get first index of acceptable quality
        start = -1
        for baseq in numQual:
            start += 1
            if baseq >= qmin:
                break
        # now get last index
        end = -1
        for baseq in reversed(numQual):
            end += 1
            if baseq >= qmin:
                break
        # only print back reads where there was at least one decent base call
        end = len(numQual) - end - 1
        if start < end:
            if outputName == '':
                print(read['header'], read['bases'][start:end], '\n', read['qheader'], read['quals'][start:end], '\n',\
                      sep='', end='')
            else:
                outFile.writelines([read['header'], read['bases'][start:end], '\n', \
                                    read['qheader'], read['quals'][start:end], '\n'])
    parser.file.close()
    if outputName != '':
        outFile.close()
コード例 #2
0
ファイル: matchFASTQ.py プロジェクト: jstaf/fastqUtils
def indexFile(fileName):
    idxParser = FASTQParser(fileName)

    IDStore = {}
    try:
        while True:
            # process individual reads
            pos = idxParser.file.tell()
            read = idxParser.nextRead()
            # break at EOF
            if read["quals"] == "":
                break
            # get tile X/Y position and use as key for dictionary that stores file position for later read
            IDStore[regex.findall(read["header"])[0]] = pos
    finally:
        idxParser.close()
    return IDStore
コード例 #3
0
ファイル: FASTQFilter.py プロジェクト: jstaf/fastqUtils
def main(argv):
    # parse arguments
    opts, args = getopt.getopt(argv[1:-1], 'ho:q:', longopts = ['help'])

    qmin = 30
    outputName = ''

    for option, val in opts:
        if option in ['-h', '--help']:
            print(helpString)
            sys.exit()
        elif option == '-o':
            outputName = val
        elif option == '-q':
            qmin = float(val)
        else:
            sys.exit('Unsupported parameter.')
    # deal with invalid paths
    if (len(argv) == 1) or (not os.path.isfile(argv[-1])):
        print(helpString)
        sys.exit('Valid input file required.')
    else:
        inputName = argv[-1]

    parser = FASTQParser(inputName)
    if outputName != '':
        outFile = open(outputName, 'w')
    # read file and print back lines that pass the filter
    while True:
        read = parser.nextRead()
        # stop at EOF
        if read['quals'] == '':
            break
        meanQual = mean(encoding2num(read['quals'], parser.encoding))
        if meanQual >= qmin:
            if outputName == '':
                print(read['header'], read['bases'], read['qheader'], read['quals'], sep = '', end = '')
            else:
                outFile.writelines([read['header'], read['bases'], read['qheader'], read['quals']])
    parser.file.close()
    if outputName != '':
        outFile.close()
コード例 #4
0
ファイル: matchFASTQ.py プロジェクト: jstaf/fastqUtils
def matchReads(fastq1, fastq2):
    idxStore = indexFile(fastq2)

    # open file handles
    fastq1_common = open(fastq1 + ".common", "w")
    fastq1_unique = open(fastq1 + ".unique", "w")
    fastq2_common = open(fastq2 + ".common", "w")
    fastq1_parser = FASTQParser(fastq1)
    fastq2_parser = FASTQParser(fastq2)
    while True:
        read = fastq1_parser.nextRead()
        # EOF
        if read["quals"] == "":
            break
        ID = regex.findall(read["header"])[0]
        if ID in idxStore.keys():
            # write both reads out to common files, remove key from index
            fastq1_common.writelines([read["header"], read["bases"], read["qheader"], read["quals"]])

            fastq2_parser.file.seek(idxStore.pop(ID))
            readMatch = fastq2_parser.nextRead()
            fastq2_common.writelines(
                [readMatch["header"], readMatch["bases"], readMatch["qheader"], readMatch["quals"]]
            )
        else:
            # write out to unique file for fastq1
            fastq1_unique.writelines([read["header"], read["bases"], read["qheader"], read["quals"]])
    # close file handles
    fastq1_common.close()
    fastq1_unique.close()
    fastq2_common.close()

    # all remaining keys in dictionary are the unique reads for fastq2
    with open(fastq2 + ".unique", "w") as fastq2_unique:
        for remaining in idxStore:
            fastq2_parser.file.seek(idxStore[remaining])
            read = fastq2_parser.nextRead()
            fastq2_unique.writelines([read["header"], read["bases"], read["qheader"], read["quals"]])
    fastq1_parser.close()
    fastq2_parser.close()
    return