コード例 #1
0
def merge_fastq(fastq_path1, fastq_path2):
    combine_str = ""
    if isCompressed(fastq_path1):
        with gzip.open(fastq_path1, "rt") as handle1:
            for read in SeqIO.parse(handle1, "fastq"):
                combine_str = combine_str + str(read.format("fastq"))
            if isCompressed(fastq_path2):
                with gzip.open(fastq_path1, "rt") as handle2:
                    for read in SeqIO.parse(handle2, "fastq"):
                        combine_str = combine_str + str(read.format("fastq"))
            else:
                with open(fastq_path2, "rt") as handle2:
                    for read in SeqIO.parse(handle2, "fastq"):
                        combine_str = combine_str + str(read.format("fastq"))

    else:
        with open(fastq_path1, "rt") as handle1:
            for read in SeqIO.parse(handle1, "fastq"):
                combine_str = combine_str + str(read.format("fastq"))

            if isCompressed(fastq_path2):
                with gzip.open(fastq_path2, "rt") as handle2:
                    for read in SeqIO.parse(handle2, "fastq"):
                        combine_str = combine_str + str(read.format("fastq"))
            else:
                with open(fastq_path2, "rt") as handle2:
                    for read in SeqIO.parse(handle2, "fastq"):
                        combine_str = combine_str + str(read.format("fastq"))

    return combine_str
コード例 #2
0
ファイル: trimquality.py プロジェクト: suheelyousuf/ReFreshFQ
def trim_quality(filename, limit):
    r_str = ""

    if isCompressed(filename):
        with gzip.open(filename, "rt") as handle:
            for read in SeqIO.parse(handle, "fastq"):
                if min(read.letter_annotations["phred_quality"]) >= limit:
                    r_str = r_str + str(read.id) + "\n"
                    r_str = r_str + str(read.seq) + "\n"
                    r_str = r_str + str(
                        read.format("fastq").split("\n")[2]) + "\n"
                    r_str = r_str + str(
                        read.format("fastq").split("\n")[3]) + "\n"

    else:
        with open(filename, "rt") as handle:
            for read in SeqIO.parse(handle, "fastq"):
                if min(read.letter_annotations["phred_quality"]) >= limit:
                    r_str = r_str + str(read.id) + "\n"
                    r_str = r_str + str(read.seq) + "\n"
                    r_str = r_str + str(
                        read.format("fastq").split("\n")[2]) + "\n"
                    r_str = r_str + str(
                        read.format("fastq").split("\n")[3]) + "\n"
    return r_str
コード例 #3
0
def fetch_all_reads(filename, option):
    filter_str = ""
    if isCompressed(filename):
        with gzip.open(filename, "rt") as handle:
            #filterstr=""
            if option == 0:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.format("fastq"))
                    break
            elif option == 1:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.seq) + "\n"
            elif option == 2:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.id) + "\n"
            elif option == 3:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(
                        read.format("fastq").split("\n")[3]) + "\n"
            else:

                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.id) + "\n"
                    filter_str = filter_str + str(read.seq) + "\n"

    else:
        with open(filename, "rt") as handle:
            #filterstr=""
            if option == 0:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.format("fastq"))
                    print("im in option 0")
                    #print(filter_str)

            elif option == 1:
                for read in SeqIO.parse(handle, "fastq"):
                    print("im in option 1")
                    filter_str = filter_str + str(read.seq) + "\n"
            elif option == 2:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.id) + "\n"
            elif option == 3:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(
                        read.format("fastq").split("\n")[3]) + "\n"
            else:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.id) + "\n"
                    filter_str = filter_str + str(read.seq) + "\n"
    return filter_str
コード例 #4
0
def trimoffprimer(filename, primersequence):
    trim_str = ""

    if isCompressed(filename):
        with gzip.open(filename, "rt") as handle:
            for read in SeqIO.parse(handle, "fastq"):
                trim_str = trim_str + str(trim_primer(read, primersequence))
    #return trim_str

    else:
        with open(filename, "rt") as handle:
            for read in SeqIO.parse(handle, "fastq"):
                trim_str = trim_str + str(trim_primer(read, primersequence))
    return trim_str
コード例 #5
0
def isFastq(filename):
    #status=True
    if isCompressed(filename):
        try:
            with gzip.open(filename, "rt") as handle:
                read = SeqIO.parse(handle, "fastq")
                return any(read)
        except:
            return False

    else:
        try:
            with open(filename, "rt") as handle:
                read = SeqIO.parse(handle, "fastq")
                return any(read)
        except:
            return False
コード例 #6
0
def search(filename, sequence):
    filesearch = ""
    if isCompressed(filename):
        with gzip.open(filename, "rt") as handle:
            for read in SeqIO.parse(handle, "fastq"):
                if (sequence in read.seq):
                    filesearch = str(filesearch + read.id + "\n" + read.seq +
                                     "\n")
        return filesearch

    else:
        with open(filename, "rt") as handle:
            for read in SeqIO.parse(handle, "fastq"):
                if sequence in read.seq:
                    filesearch = str(filesearch + read.id + "\n" + read.seq +
                                     "\n")
        return filesearch


#a=search("t1.fastq.gz","CCGC")
#print(a)
#print(type(a))
#file_save(a)
コード例 #7
0
def statistics(filename):
    count=1
    rlength=0
    gc=0
    if isCompressed(filename):
        with gzip.open(filename, "rt") as handle:
            for read in SeqIO.parse(handle,"fastq"):
                count=count+1
                gc=gc+GC(read.seq)
                rlength=rlength+len(read.seq)
            count=count-1
            average=round(rlength/count)
            gcper=round(gc/count)
            return [count,average,"Compressed",gcper]
    else:
        with open(filename, "rt") as handle:
            for read in SeqIO.parse(handle,"fastq"):
                count=count+1
                gc=gc+GC(read.seq)
                rlength=rlength+len(read.seq)
            count=count-1
            average=round(rlength/count)
            gcper=round(gc/count)
            return [count,average,"Simple Text",gcper]
コード例 #8
0
def fetch_head_reads_only(filename, option):
    filter_str = ""
    count = 0
    #record_length = len(list(SeqIO.parse("test.fastq", "fastq")))
    if isCompressed(filename):
        with gzip.open(filename, "rt") as handle:
            #filterstr=""
            if option == 0:
                for read in SeqIO.parse(handle, "fastq"):

                    filter_str = filter_str + str(read.format("fastq"))
                    count = count + 1
                    if count >= 10:
                        break

            elif option == 1:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.seq) + "\n"
                    if count >= 10:
                        break
            elif option == 2:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.id) + "\n"
                    if count >= 10:
                        break
            elif option == 3:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(
                        read.format("fastq").split("\n")[3]) + "\n"
                    if count >= 10:
                        break
            else:

                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.id) + "\n"
                    filter_str = filter_str + str(read.seq) + "\n"
                    if count >= 10:
                        break

    else:
        with open(filename, "rt") as handle:
            #filterstr=""
            if option == 0:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.format("fastq"))
                    count = count + 1
                    print(count)
                    if count >= 10:
                        break

            elif option == 1:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.seq) + "\n"
                    if count >= 10:
                        break
            elif option == 2:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.id) + "\n"
                    if count >= 10:
                        break
            elif option == 3:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(
                        read.format("fastq").split("\n")[3]) + "\n"
                    if count >= 10:
                        break
            else:
                for read in SeqIO.parse(handle, "fastq"):
                    filter_str = filter_str + str(read.id) + "\n"
                    filter_str = filter_str + str(read.seq) + "\n"
                    if count >= 10:
                        break
    return filter_str
コード例 #9
0
def fetch_inrange_reads_only(filename, option, l_range, u_range):
    filter_str = ""
    count = 0
    print("hello")
    print(l_range, u_range)
    record_length = len(list(SeqIO.parse("test.fastq", "fastq")))
    if isCompressed(filename):
        with gzip.open(filename, "rt") as handle:
            #filterstr=""
            if option == 0:
                for read in SeqIO.parse(handle, "fastq"):
                    if count >= l_range - 1 and count <= u_range + 1:
                        filter_str = filter_str + str(read.format("fastq"))
                        count = count + 1
                    count = count + 1

            elif option == 1:
                for read in SeqIO.parse(handle, "fastq"):
                    if count >= l_range - 1 and count <= u_range + 1:
                        filter_str = filter_str + str(read.seq) + "\n"
                        count = count + 1
                    count = count + 1

            elif option == 2:
                for read in SeqIO.parse(handle, "fastq"):
                    if count >= l_range - 1 and count <= u_range + 1:
                        filter_str = filter_str + str(read.id) + "\n"
                        count = count + 1
                    count = count + 1
            elif option == 3:
                for read in SeqIO.parse(handle, "fastq"):
                    if count >= l_range - 1 and count <= u_range + 1:
                        filter_str = filter_str + str(
                            read.format("fastq").split("\n")[3]) + "\n"
                        count = count + 1
                    count = count + 1
            else:
                for read in SeqIO.parse(handle, "fastq"):
                    if count >= l_range - 1 and count <= u_range + 1:
                        filter_str = filter_str + str(read.id) + "\n"
                        filter_str = filter_str + str(read.seq) + "\n"
                        count = count + 1
                    count = count + 1

    else:
        with open(filename, "rt") as handle:
            if option == 0:
                for read in SeqIO.parse(handle, "fastq"):
                    if count >= l_range - 1 and count <= u_range + 1:
                        filter_str = filter_str + str(read.format("fastq"))
                        count = count + 1
                    count = count + 1

            elif option == 1:
                for read in SeqIO.parse(handle, "fastq"):
                    if count >= l_range - 1 and count <= u_range + 1:
                        filter_str = filter_str + str(read.seq) + "\n"
                        count = count + 1
                    count = count + 1

            elif option == 2:
                for read in SeqIO.parse(handle, "fastq"):
                    if count >= l_range - 1 and count <= u_range + 1:
                        filter_str = filter_str + str(read.id) + "\n"
                        count = count + 1
                    count = count + 1
            elif option == 3:
                for read in SeqIO.parse(handle, "fastq"):
                    if count >= l_range - 1 and count <= u_range + 1:
                        filter_str = filter_str + str(
                            read.format("fastq").split("\n")[3]) + "\n"
                        count = count + 1
                    count = count + 1
            else:
                for read in SeqIO.parse(handle, "fastq"):
                    if count >= l_range - 1 and count <= u_range + 1:
                        filter_str = filter_str + str(read.id) + "\n"
                        filter_str = filter_str + str(read.seq) + "\n"
                        count = count + 1
                    count = count + 1
    return filter_str