def indexFile(fileName): idxParser = FASTQParser(fileName) IDStore = {} try: while True: # process individual reads pos = idxParser.file.tell() read = idxParser.nextRead() # break at EOF if read["quals"] == "": break # get tile X/Y position and use as key for dictionary that stores file position for later read IDStore[regex.findall(read["header"])[0]] = pos finally: idxParser.close() return IDStore
def matchReads(fastq1, fastq2): idxStore = indexFile(fastq2) # open file handles fastq1_common = open(fastq1 + ".common", "w") fastq1_unique = open(fastq1 + ".unique", "w") fastq2_common = open(fastq2 + ".common", "w") fastq1_parser = FASTQParser(fastq1) fastq2_parser = FASTQParser(fastq2) while True: read = fastq1_parser.nextRead() # EOF if read["quals"] == "": break ID = regex.findall(read["header"])[0] if ID in idxStore.keys(): # write both reads out to common files, remove key from index fastq1_common.writelines([read["header"], read["bases"], read["qheader"], read["quals"]]) fastq2_parser.file.seek(idxStore.pop(ID)) readMatch = fastq2_parser.nextRead() fastq2_common.writelines( [readMatch["header"], readMatch["bases"], readMatch["qheader"], readMatch["quals"]] ) else: # write out to unique file for fastq1 fastq1_unique.writelines([read["header"], read["bases"], read["qheader"], read["quals"]]) # close file handles fastq1_common.close() fastq1_unique.close() fastq2_common.close() # all remaining keys in dictionary are the unique reads for fastq2 with open(fastq2 + ".unique", "w") as fastq2_unique: for remaining in idxStore: fastq2_parser.file.seek(idxStore[remaining]) read = fastq2_parser.nextRead() fastq2_unique.writelines([read["header"], read["bases"], read["qheader"], read["quals"]]) fastq1_parser.close() fastq2_parser.close() return