コード例 #1
0
ファイル: matchFASTQ.py プロジェクト: jstaf/fastqUtils
def indexFile(fileName):
    idxParser = FASTQParser(fileName)

    IDStore = {}
    try:
        while True:
            # process individual reads
            pos = idxParser.file.tell()
            read = idxParser.nextRead()
            # break at EOF
            if read["quals"] == "":
                break
            # get tile X/Y position and use as key for dictionary that stores file position for later read
            IDStore[regex.findall(read["header"])[0]] = pos
    finally:
        idxParser.close()
    return IDStore
コード例 #2
0
ファイル: matchFASTQ.py プロジェクト: jstaf/fastqUtils
def matchReads(fastq1, fastq2):
    idxStore = indexFile(fastq2)

    # open file handles
    fastq1_common = open(fastq1 + ".common", "w")
    fastq1_unique = open(fastq1 + ".unique", "w")
    fastq2_common = open(fastq2 + ".common", "w")
    fastq1_parser = FASTQParser(fastq1)
    fastq2_parser = FASTQParser(fastq2)
    while True:
        read = fastq1_parser.nextRead()
        # EOF
        if read["quals"] == "":
            break
        ID = regex.findall(read["header"])[0]
        if ID in idxStore.keys():
            # write both reads out to common files, remove key from index
            fastq1_common.writelines([read["header"], read["bases"], read["qheader"], read["quals"]])

            fastq2_parser.file.seek(idxStore.pop(ID))
            readMatch = fastq2_parser.nextRead()
            fastq2_common.writelines(
                [readMatch["header"], readMatch["bases"], readMatch["qheader"], readMatch["quals"]]
            )
        else:
            # write out to unique file for fastq1
            fastq1_unique.writelines([read["header"], read["bases"], read["qheader"], read["quals"]])
    # close file handles
    fastq1_common.close()
    fastq1_unique.close()
    fastq2_common.close()

    # all remaining keys in dictionary are the unique reads for fastq2
    with open(fastq2 + ".unique", "w") as fastq2_unique:
        for remaining in idxStore:
            fastq2_parser.file.seek(idxStore[remaining])
            read = fastq2_parser.nextRead()
            fastq2_unique.writelines([read["header"], read["bases"], read["qheader"], read["quals"]])
    fastq1_parser.close()
    fastq2_parser.close()
    return