def test_map_reads(self): """test map_reads""" reads1 = os.path.join(data_dir, "reads.1.fq") reads2 = os.path.join(data_dir, "reads.2.fq") ref_fasta = os.path.join(data_dir, "ref.fa") tmp_sam = "tmp.test_map_reads.sam" if os.path.exists(tmp_sam): os.unlink(tmp_sam) read_map.map_reads(ref_fasta, reads1, reads2, tmp_sam, read_group=("1", "GROUP_NAME")) self.assertTrue(os.path.exists(tmp_sam)) tmp_stats = tmp_sam + ".stats" expected_stats = os.path.join(data_dir, "flagstat") utils.syscall("samtools flagstat " + tmp_sam + " | grep -v primary > " + tmp_stats) self.assertTrue(filecmp.cmp(expected_stats, tmp_stats, shallow=False)) found_rg_line = False with open(tmp_sam) as f: for line in f: if line == "@RG\tLB:LIB\tID:1\tSM:GROUP_NAME\n": found_rg_line = True break self.assertTrue(found_rg_line) os.unlink(tmp_sam) os.unlink(tmp_stats)
def test_map_reads(self): '''test map_reads''' reads1 = os.path.join(data_dir, 'reads.1.fq') reads2 = os.path.join(data_dir, 'reads.2.fq') ref_fasta = os.path.join(data_dir, 'ref.fa') tmp_sam = 'tmp.test_map_reads.sam' if os.path.exists(tmp_sam): os.unlink(tmp_sam) read_map.map_reads(ref_fasta, reads1, reads2, tmp_sam, read_group=('1', 'GROUP_NAME')) self.assertTrue(os.path.exists(tmp_sam)) tmp_stats = tmp_sam + '.stats' expected_stats = os.path.join(data_dir, 'flagstat') utils.syscall('samtools flagstat ' + tmp_sam + ' > ' + tmp_stats) self.assertTrue(filecmp.cmp(expected_stats, tmp_stats, shallow=False)) found_rg_line = False with open(tmp_sam) as f: for line in f: if line == '@RG\tLB:LIB\tID:1\tSM:GROUP_NAME\n': found_rg_line = True break self.assertTrue(found_rg_line) os.unlink(tmp_sam) os.unlink(tmp_stats)
def test_map_reads_markdup_and_rmdup(self): """test map_reads rmdup and markdup""" with self.assertRaises(Exception): read_map.map_reads("ref_fasta", "reads1", "reads2", "sam", rmdup=True, markdup=True)
def test_map_reads_markdup_and_rmdup(self): '''test map_reads rmdup and markdup''' with self.assertRaises(read_map.Error): read_map.map_reads('ref_fasta', 'reads1', 'reads2', 'sam', rmdup=True, markdup=True)
def test_map_reads_secondary_hits_removed(self): """test map_reads secondary hits get removed""" reads1 = os.path.join(data_dir, "secondary_hits_removed.reads_1.fq") reads2 = os.path.join(data_dir, "secondary_hits_removed.reads_2.fq") ref_fasta = os.path.join(data_dir, "secondary_hits_removed.ref.fa") tmp_sam = "tmp.test_map_reads.sam" if os.path.exists(tmp_sam): os.unlink(tmp_sam) read_map.map_reads(ref_fasta, reads1, reads2, tmp_sam) # bwa mem reports one secondary alignment, so the 1 read pair makes # 3 SAM records. So should have 2 records after removing secondary match. self.assertEqual(2, utils.sam_record_count(tmp_sam)) os.unlink(tmp_sam)
def test_map_reads_markdup(self): '''test map_reads markdup''' reads1 = os.path.join(data_dir, 'reads.1.fq') reads2 = os.path.join(data_dir, 'reads.2.fq') ref_fasta = os.path.join(data_dir, 'ref.fa') tmp_sam = 'tmp.test_map_reads.sam' if os.path.exists(tmp_sam): os.unlink(tmp_sam) read_map.map_reads(ref_fasta, reads1, reads2, tmp_sam, markdup=True) self.assertTrue(os.path.exists(tmp_sam)) tmp_stats = tmp_sam + '.stats' expected_stats = os.path.join(data_dir, 'markdup.flagstat') utils.syscall('samtools flagstat ' + tmp_sam + ' > ' + tmp_stats) self.assertTrue(filecmp.cmp(expected_stats, tmp_stats, shallow=False)) os.unlink(tmp_sam) os.unlink(tmp_stats)
def test_map_reads_markdup(self): """test map_reads markdup""" reads1 = os.path.join(data_dir, "reads.1.fq") reads2 = os.path.join(data_dir, "reads.2.fq") ref_fasta = os.path.join(data_dir, "ref.fa") tmp_sam = "tmp.test_map_reads.sam" if os.path.exists(tmp_sam): os.unlink(tmp_sam) read_map.map_reads(ref_fasta, reads1, reads2, tmp_sam, markdup=True) self.assertTrue(os.path.exists(tmp_sam)) tmp_stats = tmp_sam + ".stats" expected_stats = os.path.join(data_dir, "markdup.flagstat") utils.syscall("samtools flagstat " + tmp_sam + " > " + tmp_stats) self.assertTrue(filecmp.cmp(expected_stats, tmp_stats, shallow=False)) os.unlink(tmp_sam) os.unlink(tmp_stats)
def _map_reads(cls, ref_fasta, reads1, reads2, outfile): read_map.map_reads(ref_fasta, reads1, reads2, outfile, markdup=True)