Ejemplo n.º 1
0
def cutadapt(fq):
    readDict = {}
    for fqreads in fq:
        if int(cu_ver) >= 3:
            info = ModificationInfo(None)
            info.matches = []
        else:
            matches = []
        if qiagenumi:
            currentSeq = fqreads.sequence
            umi_seq = ""
            for modifier in ingredients:
                if int(cu_ver) >= 3:
                    fqreads = modifier(fqreads, info)
                else:
                    fqreads = modifier(fqreads, matches)
            try:
                umi_seq = currentSeq.split(str(fqreads.sequence))[1]
                umi_cut = umi.split(",")
                max_ad = len(qiaAdapter) + int(umi_cut[1])
                umi_seq = umi_seq[:max_ad][-int(umi_cut[1]):]
            except ValueError:
                umi_seq = ""
            final_seq = fqreads.sequence + umi_seq
            if int(len(final_seq)) >= int(min_len):
                if str(final_seq) in readDict:
                    readDict[str(final_seq)] += 1
                else:
                    readDict[str(final_seq)] = 1
        else:
            for modifier in ingredients:
                if int(cu_ver) >= 3:
                    fqreads = modifier(fqreads, info)
                else:
                    fqreads = modifier(fqreads, matches)
            if int(len(fqreads.sequence)) >= int(min_len):
                if str(fqreads.sequence) in readDict:
                    readDict[str(fqreads.sequence)] += 1
                else:
                    readDict[str(fqreads.sequence)] = 1
    trimmed_pairs = list(readDict.items())
    return trimmed_pairs
Ejemplo n.º 2
0
def cutadapt(fq):
    #ourDir_n = str("miRge3_temp")
    #tempDir = Path.cwd()/ourDir_n
    #Path(tempDir).mkdir(exist_ok=True, parents=True)
    #tempFile = Path(tempDir)/"input_EC.fastq"
    in_fqfile = open("input.fq", "a+")
    in_fqfileCor = open("correct_read.fastq", "a+")
    id_read = open("id_read.txt", "a+")
    id_read_qc = open("ID_read_quality_cor.txt", "a+")
    id_read_qcin = open("ID_read_quality_input.txt", "a+")
    readDict = {}
    for fqreads in fq:
        if int(cu_ver) >= 3:
            info = ModificationInfo(None)
            info.matches = []
        else:
            matches = []
        if qiagenumi:
            currentSeq = fqreads.sequence
            umi_seq = ""
            for modifier in ingredients:
                if int(cu_ver) >= 3:
                    fqreads = modifier(fqreads, info)
                else:
                    fqreads = modifier(fqreads, matches)
            try:
                umi_seq = currentSeq.split(str(fqreads.sequence))[1]
                umi_cut = umi.split(",")
                max_ad = len(qiaAdapter) + int(umi_cut[1])
                umi_seq = umi_seq[:max_ad][-int(umi_cut[1]):]
            except ValueError:
                umi_seq = ""
            final_seq = fqreads.sequence + umi_seq
            if int(len(final_seq)) >= int(min_len):
                if str(final_seq) in readDict:
                    readDict[str(final_seq)] += 1
                else:
                    readDict[str(final_seq)] = 1
        else:
            for modifier in ingredients:
                if int(cu_ver) >= 3:
                    fqreads = modifier(fqreads, info)
                else:
                    fqreads = modifier(fqreads, matches)
            if int(len(fqreads.sequence)) >= int(min_len):
                in_fqfile.write("@" + str((fqreads.name).split(" ")[0]) +
                                "\n" + str(fqreads.sequence) + "\n" +
                                str("+") + "\n" + str((fqreads.qualities)) +
                                "\n")
                in_fqfileCor.write("@" + str((fqreads.name).split(" ")[0]) +
                                   "\n" + str(fqreads.sequence) + "\n" +
                                   str("+") + "\n" + str((fqreads.qualities)) +
                                   "\n")
                id_read.write("@" + str((fqreads.name).split(" ")[0]) + " " +
                              str(fqreads.sequence) + " " +
                              str((fqreads.qualities)) + "\n")
                id_read_qc.write("@" + str((fqreads.name).split(" ")[0]) +
                                 " " + str(fqreads.sequence) + " " +
                                 str((fqreads.qualities)) + "\n")
                id_read_qcin.write("@" + str((fqreads.name).split(" ")[0]) +
                                   " " + str(fqreads.sequence) + " " +
                                   str((fqreads.qualities)) + "\n")
                if str(fqreads.sequence) in readDict:
                    readDict[str(fqreads.sequence)] += 1
                    #<Sequence(name='SRR772403.13 SN603_WA038_2_1102_1455.00_139.00_0 length=50', sequence='TACCCTGTAGAAACGAATTTGT', qualities='@@@DDDDDFFFF<+AFFGFEIF')>
                    #<Sequence(name='SRR772403.133926 SN603_WA038_2_1205_884.60_9723.70_0 length=50', sequence='TGAGATGAAGCACTGTAGCT', qualities='CCCFFFFFHHHHHJJIIJJJ')>
                else:
                    readDict[str(fqreads.sequence)] = 1

    trimmed_pairs = list(readDict.items())
    return trimmed_pairs