コード例 #1
0
 def __init__(self, pairQ2, overlap_baseNumber=10):
     self._merged = []
     self._fastQ1 = pairQ2[0]
     self._fastQ2 = translator.reverse_complement_FastQ(
         pairQ2[1])  # from now on, fastQ are sense strand
     self._overlapNumber = overlap_baseNumber
     self._assemble_seq = ""
コード例 #2
0
ファイル: ReadSanger.py プロジェクト: zhaiqt/parse_sanger
def concentrate2single(infilepath,outputfilename):  #also reverse translate
    #combinedfile_name = os.path.basename(infilepath)
    raw_single_fasta_name =outputfilename.rstrip('.fastq')+'.fasta'
    raw_single_fasta_file = open(raw_single_fasta_name, 'wb')

    all_fastq = {}
    count_file =0
    #combinedfile_name =os.path.join(outputfilename,combinedfile_name+'.fastq')
    combinedfile_name = outputfilename
    print "concentrate2single name is" +combinedfile_name
    combinedfile = open(combinedfile_name , 'wb')

    print outputfilename
    for filename in os.listdir(infilepath):
        single_fastq = []
        flag_reverse = False

        if filename.endswith('.fastq'):
            seq_name = filename.split(';')[0]
            with open(os.path.join(infilepath,filename)) as f:
                for row in f:
                    row = row.strip('\n')
                    if row.startswith("@"):
                        fasta_output = '\n>'+row.lstrip('@')+'\n'
                        # row = row.split(";")
                        if "QB6179" in row or "QB5506" in row or 'QB6178' in row or 'Rev' in row or 'rev' in row:
                            flag_reverse = True
                        IDs=re.search('\D(\d{4})\D',row)  ###### Extract 4 numbers , return
                        ID=IDs.group(0)  #####
                        #row= row[0]+"\n"
                        #print ID
                        row = "@"+ID
                        #ID=str(ID) +'\n'
                    combinedfile.write(row+'\n')
                    single_fastq.append(row.lstrip('@'))

                single_fastq[-1] = covert_Qscore(single_fastq[-1])
                fasta_output +=single_fastq[1] + '\n'
                raw_single_fasta_file.write(fasta_output)


                if flag_reverse == True:
                    try:
                        single_fastq =translator.reverse_complement_FastQ(single_fastq)
                    except:
                        print single_fastq
                all_fastq[ID] = all_fastq.get(ID, [])
                all_fastq[ID].append(single_fastq)

                f.close()
    combinedfile.close()
    print "All the fastqs were concentrated into a single file ------%s. And the ID was extracted." +combinedfile_name
    return all_fastq
コード例 #3
0
    def anneal_1direction(self, inputfastq1, inputfastq2):
        print "$$$$$$"
        print inputfastq1
        print inputfastq2
        firstSeq = inputfastq1[1]
        secondSeq = inputfastq2[1]
        fastQ1 = inputfastq1
        fastQ2 = inputfastq2
        print len(fastQ1[1])
        print len(fastQ2[1])
        if not firstSeq and not secondSeq:
            print " firstSeq and not secondSeq empty"
            return ' '
        elif not firstSeq or len(
                firstSeq) <= self._overlapNumber or firstSeq in secondSeq:
            print "lif not firstSeq or len(firstSeq) <= self._overlapNumber"
            return secondSeq
        elif not secondSeq or len(
                secondSeq) <= self._overlapNumber or secondSeq in firstSeq:
            print "firtseq> 2"
            return firstSeq

        couple_list = []
        print "-------- caculate assemb1--------"
        (overlap_seq1, assembleSeq1) = self.find_overlap(fastQ1, fastQ2)
        couple_list.append((overlap_seq1, assembleSeq1))
        print "-------- caculate assemb2--------"
        (overlap_seq2, assembleSeq2) = self.find_overlap(fastQ2, fastQ1)
        couple_list.append((overlap_seq2, assembleSeq2))
        print "-------- caculate assemb3--------"
        antiSense_fastQ2 = translator.reverse_complement_FastQ(inputfastq2)
        (overlap_seq3,
         assembleSeq3) = self.find_overlap(fastQ1, antiSense_fastQ2)
        couple_list.append((overlap_seq3, assembleSeq3))

        longest_overlap = ('', '')
        print couple_list
        for tmp_couple in couple_list:
            if len(tmp_couple[0]) > len(longest_overlap):
                longest_overlap = tmp_couple
        assembled_fasta = longest_overlap[1]

        print "assembled_fasta: " + assembled_fasta
        self._assemble_seq = assembled_fasta
        return assembled_fasta
コード例 #4
0
ファイル: Merge2FastQ.py プロジェクト: zhaiqt/parse_sanger
    def anneal2fastq(self):
        #print "$$$$$$"
        couple_list = []
        #for cut_off in [15, 20, 25, 30, 35, 40]:
        fastQ1 = self._fastQ1
        fastQ2 = self._fastQ2
        firstSeq = fastQ1[1]
        secondSeq = fastQ2[1]

        for cut_off in [15, 20, 25, 30, 35, 40]:

            object1 = TrimEnds.TrimEnds(fastQ1)
            object1.trimEnds(minimum_quality=cut_off)
            fastQ1 = object1.output_trimed_fastq()
            firstSeq = fastQ1[1]

            if not firstSeq and not secondSeq:
                print " firstSeq and not secondSeq empty"
                couple_list.append(('', '', 0))
                continue
            elif not firstSeq or len(
                    firstSeq) <= self._overlapNumber or firstSeq in secondSeq:
                print "lif not firstSeq or len(firstSeq) <= self._overlapNumber"
                couple_list.append((secondSeq, secondSeq, 0))
                continue
            elif not secondSeq or len(
                    secondSeq) <= self._overlapNumber or secondSeq in firstSeq:
                print "firtseq> 2"
                couple_list.append((firstSeq, firstSeq, 0))
                continue

            #print "-------- caculate assemb1--------"
            (overlap_seq1, assembleSeq1,
             mismatch1) = self.find_overlap(fastQ1, fastQ2, cut_off)
            couple_list.append((overlap_seq1, assembleSeq1, mismatch1))
            #print "-------- caculate assemb2--------"
            (overlap_seq2, assembleSeq2,
             mismatch2) = self.find_overlap(fastQ2, fastQ1, cut_off)
            couple_list.append((overlap_seq2, assembleSeq2, mismatch2))
            #print "-------- caculate assemb3--------"
            antiSense_fastQ2 = translator.reverse_complement_FastQ(fastQ2)
            (overlap_seq3, assembleSeq3,
             mismatch3) = self.find_overlap(fastQ1, antiSense_fastQ2, cut_off)
            couple_list.append((overlap_seq3, assembleSeq3, mismatch3))
            #print "-------- caculate assemb4 reverse both A and B--------"
            antiSense_fastQ1 = translator.reverse_complement_FastQ(fastQ1)
            (overlap_seq4, assembleSeq4,
             mismatch4) = self.find_overlap(antiSense_fastQ2, antiSense_fastQ1,
                                            cut_off)
            couple_list.append((overlap_seq4, assembleSeq4, mismatch4))
            #print "-------- caculate assemb5 reverse both A and B--------"
            (overlap_seq5, assembleSeq5,
             mismatch5) = self.find_overlap(antiSense_fastQ1, antiSense_fastQ2,
                                            cut_off)
            couple_list.append((overlap_seq5, assembleSeq5, mismatch5))

        longest_overlap = ('', '', 0)
        #print couple_list
        for tmp_couple in couple_list:
            if len(tmp_couple[0]) > 200 and len(tmp_couple[1]) > len(
                    longest_overlap[1]):
                longest_overlap = tmp_couple
        '''
        if longest_overlap[0] ==0:
            better_fastq =['','','','']
            better_fastq = choose_one_read(self._fastQ1, self._fastQ2)
            if better_fastq:
                longest_overlap=('1',better_fastq[1],1)
        '''
        # if len(assembled_fasta) > 100:
        #     print "assembled_fasta: "+ assembled_fasta
        self._overlap_seq = longest_overlap[0]
        self._assemble_seq = longest_overlap[1]
        self._mismatch_count = longest_overlap[2]

        return
コード例 #5
0
    def anneal2fastq(self):
        #print "$$$$$$"
        couple_list = []
        #for cut_off in [15, 20, 25, 30, 35, 40]:
        for cut_off in [20]:
            object1 = TrimEnds.TrimEnds(self._fastQ1)
            object1.trimEnds(minimum_quality=cut_off)
            fastQ1 = object1.output_trimed_fastq()
            firstSeq = fastQ1[1]

            object2 = TrimEnds.TrimEnds(self._fastQ2)
            object2.trimEnds(minimum_quality=cut_off)
            fastQ2 = object2.output_trimed_fastq()
            secondSeq = fastQ2[1]

            if not firstSeq and not secondSeq:
                print " firstSeq and not secondSeq empty"
                couple_list.append(('', ''))
                continue
            elif not firstSeq or len(
                    firstSeq) <= self._overlapNumber or firstSeq in secondSeq:
                print "lif not firstSeq or len(firstSeq) <= self._overlapNumber"
                couple_list.append((secondSeq, secondSeq))
                continue
            elif not secondSeq or len(
                    secondSeq) <= self._overlapNumber or secondSeq in firstSeq:
                print "firtseq> 2"
                couple_list.append((firstSeq, firstSeq))
                continue

            #print "-------- caculate assemb1--------"
            (overlap_seq1, assembleSeq1) = self.find_overlap(fastQ1, fastQ2)
            couple_list.append((overlap_seq1, assembleSeq1))
            #print "-------- caculate assemb2--------"
            (overlap_seq2, assembleSeq2) = self.find_overlap(fastQ2, fastQ1)
            couple_list.append((overlap_seq2, assembleSeq2))
            #print "-------- caculate assemb3--------"
            antiSense_fastQ2 = translator.reverse_complement_FastQ(fastQ2)
            (overlap_seq3,
             assembleSeq3) = self.find_overlap(fastQ1, antiSense_fastQ2)
            couple_list.append((overlap_seq3, assembleSeq3))
            #print "-------- caculate assemb4 reverse both A and B--------"
            antiSense_fastQ1 = translator.reverse_complement_FastQ(fastQ1)
            (overlap_seq4,
             assembleSeq4) = self.find_overlap(antiSense_fastQ2,
                                               antiSense_fastQ1)
            couple_list.append((overlap_seq4, assembleSeq4))
            #print "-------- caculate assemb5 reverse both A and B--------"
            (overlap_seq5,
             assembleSeq5) = self.find_overlap(antiSense_fastQ1,
                                               antiSense_fastQ2)
            couple_list.append((overlap_seq5, assembleSeq5))

        longest_overlap = ('', '')
        print couple_list
        for tmp_couple in couple_list:
            if len(tmp_couple[0]) > 200 and len(tmp_couple[1]) > len(
                    longest_overlap[1]):
                longest_overlap = tmp_couple
        assembled_fasta = longest_overlap[1]

        # if len(assembled_fasta) > 100:
        #     print "assembled_fasta: "+ assembled_fasta
        self._assemble_seq = assembled_fasta
        return