def cutadapt(fq): readDict = {} for fqreads in fq: if int(cu_ver) >= 3: info = ModificationInfo(None) info.matches = [] else: matches = [] if qiagenumi: currentSeq = fqreads.sequence umi_seq = "" for modifier in ingredients: if int(cu_ver) >= 3: fqreads = modifier(fqreads, info) else: fqreads = modifier(fqreads, matches) try: umi_seq = currentSeq.split(str(fqreads.sequence))[1] umi_cut = umi.split(",") max_ad = len(qiaAdapter) + int(umi_cut[1]) umi_seq = umi_seq[:max_ad][-int(umi_cut[1]):] except ValueError: umi_seq = "" final_seq = fqreads.sequence + umi_seq if int(len(final_seq)) >= int(min_len): if str(final_seq) in readDict: readDict[str(final_seq)] += 1 else: readDict[str(final_seq)] = 1 else: for modifier in ingredients: if int(cu_ver) >= 3: fqreads = modifier(fqreads, info) else: fqreads = modifier(fqreads, matches) if int(len(fqreads.sequence)) >= int(min_len): if str(fqreads.sequence) in readDict: readDict[str(fqreads.sequence)] += 1 else: readDict[str(fqreads.sequence)] = 1 trimmed_pairs = list(readDict.items()) return trimmed_pairs
def cutadapt(fq): #ourDir_n = str("miRge3_temp") #tempDir = Path.cwd()/ourDir_n #Path(tempDir).mkdir(exist_ok=True, parents=True) #tempFile = Path(tempDir)/"input_EC.fastq" in_fqfile = open("input.fq", "a+") in_fqfileCor = open("correct_read.fastq", "a+") id_read = open("id_read.txt", "a+") id_read_qc = open("ID_read_quality_cor.txt", "a+") id_read_qcin = open("ID_read_quality_input.txt", "a+") readDict = {} for fqreads in fq: if int(cu_ver) >= 3: info = ModificationInfo(None) info.matches = [] else: matches = [] if qiagenumi: currentSeq = fqreads.sequence umi_seq = "" for modifier in ingredients: if int(cu_ver) >= 3: fqreads = modifier(fqreads, info) else: fqreads = modifier(fqreads, matches) try: umi_seq = currentSeq.split(str(fqreads.sequence))[1] umi_cut = umi.split(",") max_ad = len(qiaAdapter) + int(umi_cut[1]) umi_seq = umi_seq[:max_ad][-int(umi_cut[1]):] except ValueError: umi_seq = "" final_seq = fqreads.sequence + umi_seq if int(len(final_seq)) >= int(min_len): if str(final_seq) in readDict: readDict[str(final_seq)] += 1 else: readDict[str(final_seq)] = 1 else: for modifier in ingredients: if int(cu_ver) >= 3: fqreads = modifier(fqreads, info) else: fqreads = modifier(fqreads, matches) if int(len(fqreads.sequence)) >= int(min_len): in_fqfile.write("@" + str((fqreads.name).split(" ")[0]) + "\n" + str(fqreads.sequence) + "\n" + str("+") + "\n" + str((fqreads.qualities)) + "\n") in_fqfileCor.write("@" + str((fqreads.name).split(" ")[0]) + "\n" + str(fqreads.sequence) + "\n" + str("+") + "\n" + str((fqreads.qualities)) + "\n") id_read.write("@" + str((fqreads.name).split(" ")[0]) + " " + str(fqreads.sequence) + " " + str((fqreads.qualities)) + "\n") id_read_qc.write("@" + str((fqreads.name).split(" ")[0]) + " " + str(fqreads.sequence) + " " + str((fqreads.qualities)) + "\n") id_read_qcin.write("@" + str((fqreads.name).split(" ")[0]) + " " + str(fqreads.sequence) + " " + str((fqreads.qualities)) + "\n") if str(fqreads.sequence) in readDict: readDict[str(fqreads.sequence)] += 1 #<Sequence(name='SRR772403.13 SN603_WA038_2_1102_1455.00_139.00_0 length=50', sequence='TACCCTGTAGAAACGAATTTGT', qualities='@@@DDDDDFFFF<+AFFGFEIF')> #<Sequence(name='SRR772403.133926 SN603_WA038_2_1205_884.60_9723.70_0 length=50', sequence='TGAGATGAAGCACTGTAGCT', qualities='CCCFFFFFHHHHHJJIIJJJ')> else: readDict[str(fqreads.sequence)] = 1 trimmed_pairs = list(readDict.items()) return trimmed_pairs