コード例 #1
0
    def test_does_not_readd_headers(self):
        # Make sure headers that exist are not duplicated
        from ngs_mapper.tagreads import get_bam_header
        from ngs_mapper import samtools
        self.temp_copy_files()
        # Create a new bam file that the header already has a read group in it
        # as well as a new header
        hdr = get_bam_header(self.bam)
        hdr += '\n' + '@RG\tID:Test\tCN:cn\tSM:sm\tPL:ILLUMINA\n'
        hdr += '@RG\tID:Roche454\tSM:312\tPL:L454\n'
        hdr += '@RG\tID:IonTorrent\tSM:312\tPL:IONTORRENT\n'
        hdr += '@RG\tID:MiSeq\tSM:312\tPL:ILLUMINA\n'
        hdr += '@RG\tID:Sanger\tSM:312\tPL:CAPILLARY\n'
        # Read the pipe which should be sam input and output bam
        s = samtools.view(self.bam)
        # Put in new header and sam output after it
        with open('t.sam', 'w') as fh:
            # New header
            fh.write(hdr)
            # Then reads
            fh.write(s.read())
        # Now convert that pipe to bam
        b = samtools.view('t.sam', h=True, S=True, b=True)
        # Write the bam output
        with open('hasrg.bam', 'wb') as bamfh:
            bamfh.write(b.read())
        # Close the file handles
        b.close()
        s.close()
        # Now we have a bamfile with an existing header that we can test
        r = self._C('hasrg.bam', 'sm', 'cn')
        # Make sure that the new header made it in and that the MiSeq header was not duplicated
        header_lines = r.splitlines(True)
        read_groups = [rg for rg in header_lines if rg.startswith('@RG')]
        num_miseq = 0
        num_test = 0
        for rg in read_groups:
            if 'ID:Test\t' in rg:
                num_test += 1
            if 'ID:MiSeq\t' in rg:
                num_miseq += 1

        eq_(1, num_miseq, "Header was duplicated which is incorrect")
        eq_(1, num_test, "Existing header was removed somehow")
        # How many platform readgroups to expect(includes the MiSeq one that we are testing)
        i = len(self.read_group_ids)
        # Now increase that by HD, SQ and Test RG
        i += 3
        eq_(i, len(header_lines), "Incorrect number of header lines")
コード例 #2
0
 def test_header_correct(self):
     hdr = self._C(self.bam)
     with open('t.sam', 'w') as fh:
         fh.write(hdr)
     from ngs_mapper import samtools
     h = samtools.view('t.sam', S=True, H=True)
     eq_(hdr, h.read().rstrip())
コード例 #3
0
 def test_seqencingcenter_argument(self):
     from ngs_mapper import samtools
     self.temp_copy_files()
     self._C([self.bam], ['-CN', 'seqcenter'])
     s = samtools.view(self.bam, H=True)
     rgs = s.readlines()
     s.close()
     # Ensure each read group contains the samplename set
     for rg in [r for r in rgs if r.startswith('@RG')]:
         ok_('CN:seqcenter\t' in rg,
             "Sequencing center did not make it into the headers")
コード例 #4
0
 def count_rg(self, bam):
     ''' Count how many of each uniq read group id '''
     from ngs_mapper import samtools
     s = samtools.view(bam)
     counts = {}
     for read in s:
         aread = samtools.SamRow(read)
         tags = dict(aread.TAGS)
         id = tags['RG']
         if id not in counts:
             counts[id] = 0
         counts[id] += 1
     return counts
コード例 #5
0
 def test_does_multiple_bams(self):
     from ngs_mapper import samtools
     self.temp_copy_files()
     bam2 = join(self.tempdir, 'sample2.bam')
     bai2 = join(self.tempdir, 'sample2.bam.bai')
     shutil.copy(self.bam, bam2)
     shutil.copy(self.bai, bai2)
     self._C([self.bam, bam2])
     for b in [self.bam, bam2]:
         self.check_tagreadcounts(b)
         n = basename(b).replace('.bam', '')
         s = samtools.view(b, H=True)
         rg = [
             header.split('\t') for header in s if header.startswith('@RG')
         ]
         for rgline in rg:
             eq_(
                 'SM:' + n, rgline[2],
                 "Did not set {0} as SM for {1}. Header: {2}".format(
                     n, b, rgline))