def merge_fastq(fastq_path1, fastq_path2): combine_str = "" if isCompressed(fastq_path1): with gzip.open(fastq_path1, "rt") as handle1: for read in SeqIO.parse(handle1, "fastq"): combine_str = combine_str + str(read.format("fastq")) if isCompressed(fastq_path2): with gzip.open(fastq_path1, "rt") as handle2: for read in SeqIO.parse(handle2, "fastq"): combine_str = combine_str + str(read.format("fastq")) else: with open(fastq_path2, "rt") as handle2: for read in SeqIO.parse(handle2, "fastq"): combine_str = combine_str + str(read.format("fastq")) else: with open(fastq_path1, "rt") as handle1: for read in SeqIO.parse(handle1, "fastq"): combine_str = combine_str + str(read.format("fastq")) if isCompressed(fastq_path2): with gzip.open(fastq_path2, "rt") as handle2: for read in SeqIO.parse(handle2, "fastq"): combine_str = combine_str + str(read.format("fastq")) else: with open(fastq_path2, "rt") as handle2: for read in SeqIO.parse(handle2, "fastq"): combine_str = combine_str + str(read.format("fastq")) return combine_str
def trim_quality(filename, limit): r_str = "" if isCompressed(filename): with gzip.open(filename, "rt") as handle: for read in SeqIO.parse(handle, "fastq"): if min(read.letter_annotations["phred_quality"]) >= limit: r_str = r_str + str(read.id) + "\n" r_str = r_str + str(read.seq) + "\n" r_str = r_str + str( read.format("fastq").split("\n")[2]) + "\n" r_str = r_str + str( read.format("fastq").split("\n")[3]) + "\n" else: with open(filename, "rt") as handle: for read in SeqIO.parse(handle, "fastq"): if min(read.letter_annotations["phred_quality"]) >= limit: r_str = r_str + str(read.id) + "\n" r_str = r_str + str(read.seq) + "\n" r_str = r_str + str( read.format("fastq").split("\n")[2]) + "\n" r_str = r_str + str( read.format("fastq").split("\n")[3]) + "\n" return r_str
def fetch_all_reads(filename, option): filter_str = "" if isCompressed(filename): with gzip.open(filename, "rt") as handle: #filterstr="" if option == 0: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.format("fastq")) break elif option == 1: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.seq) + "\n" elif option == 2: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.id) + "\n" elif option == 3: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str( read.format("fastq").split("\n")[3]) + "\n" else: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.id) + "\n" filter_str = filter_str + str(read.seq) + "\n" else: with open(filename, "rt") as handle: #filterstr="" if option == 0: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.format("fastq")) print("im in option 0") #print(filter_str) elif option == 1: for read in SeqIO.parse(handle, "fastq"): print("im in option 1") filter_str = filter_str + str(read.seq) + "\n" elif option == 2: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.id) + "\n" elif option == 3: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str( read.format("fastq").split("\n")[3]) + "\n" else: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.id) + "\n" filter_str = filter_str + str(read.seq) + "\n" return filter_str
def trimoffprimer(filename, primersequence): trim_str = "" if isCompressed(filename): with gzip.open(filename, "rt") as handle: for read in SeqIO.parse(handle, "fastq"): trim_str = trim_str + str(trim_primer(read, primersequence)) #return trim_str else: with open(filename, "rt") as handle: for read in SeqIO.parse(handle, "fastq"): trim_str = trim_str + str(trim_primer(read, primersequence)) return trim_str
def isFastq(filename): #status=True if isCompressed(filename): try: with gzip.open(filename, "rt") as handle: read = SeqIO.parse(handle, "fastq") return any(read) except: return False else: try: with open(filename, "rt") as handle: read = SeqIO.parse(handle, "fastq") return any(read) except: return False
def search(filename, sequence): filesearch = "" if isCompressed(filename): with gzip.open(filename, "rt") as handle: for read in SeqIO.parse(handle, "fastq"): if (sequence in read.seq): filesearch = str(filesearch + read.id + "\n" + read.seq + "\n") return filesearch else: with open(filename, "rt") as handle: for read in SeqIO.parse(handle, "fastq"): if sequence in read.seq: filesearch = str(filesearch + read.id + "\n" + read.seq + "\n") return filesearch #a=search("t1.fastq.gz","CCGC") #print(a) #print(type(a)) #file_save(a)
def statistics(filename): count=1 rlength=0 gc=0 if isCompressed(filename): with gzip.open(filename, "rt") as handle: for read in SeqIO.parse(handle,"fastq"): count=count+1 gc=gc+GC(read.seq) rlength=rlength+len(read.seq) count=count-1 average=round(rlength/count) gcper=round(gc/count) return [count,average,"Compressed",gcper] else: with open(filename, "rt") as handle: for read in SeqIO.parse(handle,"fastq"): count=count+1 gc=gc+GC(read.seq) rlength=rlength+len(read.seq) count=count-1 average=round(rlength/count) gcper=round(gc/count) return [count,average,"Simple Text",gcper]
def fetch_head_reads_only(filename, option): filter_str = "" count = 0 #record_length = len(list(SeqIO.parse("test.fastq", "fastq"))) if isCompressed(filename): with gzip.open(filename, "rt") as handle: #filterstr="" if option == 0: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.format("fastq")) count = count + 1 if count >= 10: break elif option == 1: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.seq) + "\n" if count >= 10: break elif option == 2: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.id) + "\n" if count >= 10: break elif option == 3: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str( read.format("fastq").split("\n")[3]) + "\n" if count >= 10: break else: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.id) + "\n" filter_str = filter_str + str(read.seq) + "\n" if count >= 10: break else: with open(filename, "rt") as handle: #filterstr="" if option == 0: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.format("fastq")) count = count + 1 print(count) if count >= 10: break elif option == 1: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.seq) + "\n" if count >= 10: break elif option == 2: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.id) + "\n" if count >= 10: break elif option == 3: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str( read.format("fastq").split("\n")[3]) + "\n" if count >= 10: break else: for read in SeqIO.parse(handle, "fastq"): filter_str = filter_str + str(read.id) + "\n" filter_str = filter_str + str(read.seq) + "\n" if count >= 10: break return filter_str
def fetch_inrange_reads_only(filename, option, l_range, u_range): filter_str = "" count = 0 print("hello") print(l_range, u_range) record_length = len(list(SeqIO.parse("test.fastq", "fastq"))) if isCompressed(filename): with gzip.open(filename, "rt") as handle: #filterstr="" if option == 0: for read in SeqIO.parse(handle, "fastq"): if count >= l_range - 1 and count <= u_range + 1: filter_str = filter_str + str(read.format("fastq")) count = count + 1 count = count + 1 elif option == 1: for read in SeqIO.parse(handle, "fastq"): if count >= l_range - 1 and count <= u_range + 1: filter_str = filter_str + str(read.seq) + "\n" count = count + 1 count = count + 1 elif option == 2: for read in SeqIO.parse(handle, "fastq"): if count >= l_range - 1 and count <= u_range + 1: filter_str = filter_str + str(read.id) + "\n" count = count + 1 count = count + 1 elif option == 3: for read in SeqIO.parse(handle, "fastq"): if count >= l_range - 1 and count <= u_range + 1: filter_str = filter_str + str( read.format("fastq").split("\n")[3]) + "\n" count = count + 1 count = count + 1 else: for read in SeqIO.parse(handle, "fastq"): if count >= l_range - 1 and count <= u_range + 1: filter_str = filter_str + str(read.id) + "\n" filter_str = filter_str + str(read.seq) + "\n" count = count + 1 count = count + 1 else: with open(filename, "rt") as handle: if option == 0: for read in SeqIO.parse(handle, "fastq"): if count >= l_range - 1 and count <= u_range + 1: filter_str = filter_str + str(read.format("fastq")) count = count + 1 count = count + 1 elif option == 1: for read in SeqIO.parse(handle, "fastq"): if count >= l_range - 1 and count <= u_range + 1: filter_str = filter_str + str(read.seq) + "\n" count = count + 1 count = count + 1 elif option == 2: for read in SeqIO.parse(handle, "fastq"): if count >= l_range - 1 and count <= u_range + 1: filter_str = filter_str + str(read.id) + "\n" count = count + 1 count = count + 1 elif option == 3: for read in SeqIO.parse(handle, "fastq"): if count >= l_range - 1 and count <= u_range + 1: filter_str = filter_str + str( read.format("fastq").split("\n")[3]) + "\n" count = count + 1 count = count + 1 else: for read in SeqIO.parse(handle, "fastq"): if count >= l_range - 1 and count <= u_range + 1: filter_str = filter_str + str(read.id) + "\n" filter_str = filter_str + str(read.seq) + "\n" count = count + 1 count = count + 1 return filter_str