예제 #1
0
    def test_pos_neg_strand(self):
        """
        positive, negative strnad
        len %2 == 0  na negativnem stnadu
        and not %2
        """
        bam_fname = make_bam_file({
            'chromosomes': [('chr1', 3000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('_:rbc:AAA', 16, 0, 50, 255, [(0, 100)], {
                    'NH': 1
                }),
                ('_:rbc:CCC', 0, 0, 50, 255, [(0, 101)], {
                    'NH': 1
                }),
            ],
        })
        grouped = xlsites._processs_bam_file(bam_fname, self.metrics, 10,
                                             self.tmp)

        expected = {
            ('chr1', '-'): {
                150: {
                    'AAA': [(99, 50, 100, 1, 0)]
                }
            },
            ('chr1', '+'): {
                49: {
                    'CCC': [(100, 150, 101, 1, 0)]
                }
            },
        }
        self.assertEqual(grouped, expected)
예제 #2
0
    def test_run_simple(self):
        bam_fname = make_bam_file(self.data)
        unique_fname = get_temp_file_name(extension='.bed.gz')
        multi_fname = get_temp_file_name(extension='.bed.gz')
        strange_fname = get_temp_file_name(extension='.bam.gz')

        result = xlsites.run(bam_fname,
                             unique_fname,
                             multi_fname,
                             strange_fname,
                             mapq_th=5,
                             report_progress=True)

        # pylint: disable=no-member
        self.assertEqual(result.all_recs, 6)
        # Unmapped records:
        self.assertEqual(result.notmapped_recs, 1)
        # Mapped records:
        self.assertEqual(result.mapped_recs, 5)
        # Records with too low quality:
        self.assertEqual(result.lowmapq_recs, 1)
        # Records used in analysis
        self.assertEqual(result.used_recs, 4)
        # Records with invalid randomers
        self.assertEqual(result.invalidrandomer_recs, 1)
        # Records with no randomers:
        self.assertEqual(result.norandomer_recs, 1)
        # Barcode counter:
        self.assertEqual(result.bc_cn, {'': 2, 'ACG': 1, 'CCCC': 1})
        # Strange counter:
        self.assertEqual(result.strange_recs, 1)
예제 #3
0
    def test_explicit_whole_in(self):
        """
        Whole read is in single transcript and is crossing the exon-intron
        landmark (it is explicit). Provide three reads, with two different
        cross-links. One cross-link has two distinct randomers.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 140, 255, [(0, 50)], {
                    'NH': 1
                }),
                ('name2:rbc:AAAA', 0, 0, 142, 255, [(0, 50)], {
                    'NH': 1
                }),
                ('name2:rbc:CCCC', 0, 0, 142, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['UTR5-intron', '-10', '1', '1'],
            ['UTR5-intron', '-8', '2', '2'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))
예제 #4
0
    def test_implicit_intergenic(self):
        """
        Whole read is in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 530, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '30', '0.5', '0'],
            ['intergenic-CDS', '-70', '0.5', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
예제 #5
0
    def test_negative_strand(self):
        """
        Whole read is in single transcript, single segment. But the segment
        borders on intergenic (downstream).
        """
        gtf_neg_data = [
            i[:6] + ['-'] + i[7:] for i in intervals_to_list(self.gtf_data)
        ]
        gtf_neg = make_file_from_list(gtf_neg_data)
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 16, 0, 549, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '20', '0.5', '0'],
            ['intergenic-CDS', '-80', '0.5', '0'],
        ]

        rnamaps.run(bam,
                    gtf_neg,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
예제 #6
0
    def test_implicit_whole_in(self):
        """
        Whole read is in single transcript and in single segment. Also, this
        segment is the "middle" segment in transcript. Provide three reads, with
        two different cross-links. One cross-link has two distinct randomers.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 160, 255, [(0, 30)], {
                    'NH': 1
                }),
                ('name2:rbc:CCCC', 0, 0, 163, 255, [(0, 30)], {
                    'NH': 1
                }),
                ('name2:rbc:GGGG', 0, 0, 163, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['UTR5-intron', '10', '1', '0'],
            ['UTR5-intron', '13', '2', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))
예제 #7
0
    def test_implicit_exons(self):
        """
        Whole read is in single transcript and in single segment. Also, this
        segment is of EXON_TYPE in the "middle" segment in transcript. Only one read.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 205, 255, [(0, 20)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-UTR3', '-25', '0.25', '0'],
            ['CDS-intron', '-25', '0.25', '0'],
            ['UTR5-CDS', '5', '0.25', '0'],
            ['intron-CDS', '5', '0.25', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
예제 #8
0
    def test_explicit_intergenic_right(self):
        """
        Read is half in transcript region and half in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 480, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '-20', '1', '1'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))
예제 #9
0
    def test_cross_transcript_read(self):
        """
        Read is half in transcript region and half in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 235, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            [
                'chrom', 'strand', 'xlink', 'second-start', 'end-position',
                'read_len'
            ],
            ['1', '+', '234', '0', '284', '50'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.cross_tr))
예제 #10
0
    def test_implicit_inter_tr(self):
        """
        Whole read is in single transcript, single segment. But the segment
        borders on intergenic (downstream).
        """
        bam = make_bam_file(
            {
                'chromosomes': [('1', 1000)],
                'segments': [
                    # (qname, flag, refname, pos, mapq, cigar, tags)
                    ('name2:rbc:CCCC', 0, 0, 610, 255, [(0, 30)], {
                        'NH': 1
                    }),
                ]
            },
            rnd_seed=0)

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-CDS', '-40', '0.3333', '0'],
            ['CDS-intron', '-40', '0.3333', '0'],
            ['intergenic-CDS', '10', '0.3333', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
예제 #11
0
    def test_diff_barcodes(self):
        """
        Different barcodes on same position.
        """
        bam_fname = make_bam_file({
            'chromosomes': [('chr1', 3000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('_:rbc:AAA', 0, 0, 50, 255, [(0, 101)], {'NH': 1}),
                ('_:rbc:CCC', 0, 0, 50, 255, [(0, 101)], {'NH': 1}),
                ('_:rbc:CCC', 0, 0, 50, 255, [(0, 101)], {'NH': 1}),
                ('_:rbc:GGG', 0, 0, 50, 255, [(0, 101)], {'NH': 1}),
            ],
        }, rnd_seed=0)
        grouped = list(xlsites._processs_bam_file(bam_fname, self.metrics, 10, self.tmp))

        expected = [
            (('chr1', '+'), 0.0167, {
                49: {
                    'AAA': [(100, 150, 101, 1, 0)],
                    'CCC': [(100, 150, 101, 1, 0), (100, 150, 101, 1, 0)],
                    'GGG': [(100, 150, 101, 1, 0)],
                }
            }),
        ]
        self.assertEqual(grouped, expected)
예제 #12
0
파일: test_cli.py 프로젝트: bakerwm/iCount
    def setUp(self):
        warnings.simplefilter("ignore", ResourceWarning)

        # Temporary file names to use for output:
        self.tmp1 = get_temp_file_name()
        self.tmp2 = get_temp_file_name()
        self.dir = get_temp_dir()
        self.dir2 = get_temp_dir()

        self.cross_links = make_file_from_list([
            ['1', '16', '17', '.', '5', '+'],
            ['1', '14', '15', '.', '5', '+'],
            ['1', '15', '16', '.', '5', '+'],
        ],
                                               extension='bed')

        self.peaks = make_file_from_list([
            ['1', '15', '16', '.', '15', '+'],
        ])

        self.annotation = make_file_from_list([
            ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'ncRNA', '10', '20', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "B";'],
            ['1', '.', 'CDS', '10', '20', '.', '-', '.', 'biotype "C";'],
            ['1', '.', 'CDS', '12', '18', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'CDS', '30', '40', '.', '+', '.', 'biotype "D";'],
        ])

        self.gtf = make_file_from_list([
            ['1', '.', 'gene', '10', '20', '.', '+', '.', 'gene_id "A";'],
            [
                '1', '.', 'transcript', '10', '20', '.', '+', '.',
                'gene_id "A"; transcript_id "AA";'
            ],
            [
                '1', '.', 'exon', '10', '20', '.', '+', '.',
                'gene_id "A"; transcript_id "AA"; exon_number "1";'
            ],
        ])

        self.bam = make_bam_file(
            {
                'chromosomes': [
                    ('1', 3000),
                    ('2', 2000),
                ],
                'segments': [
                    ('name3:rbc:CCCC:', 0, 0, 100, 20, [(0, 100)], {
                        'NH': 1
                    }),
                    ('name4:ABC', 0, 0, 300, 20, [(0, 200)], {
                        'NH': 11
                    }),
                ]
            },
            rnd_seed=0)
예제 #13
0
    def test_no_nh_tag(self):
        data_no_nh = {
            'chromosomes': [('chr1', 3000)],
            'segments': [
                # No NH tag is set
                ('name5', 0, 0, 0, 50, [(0, 100)], {})]}
        bam_fname = make_bam_file(data_no_nh, rnd_seed=0)

        message = r'"NH" tag not set for record: .*'
        with self.assertRaisesRegex(ValueError, message):
            list(xlsites._processs_bam_file(bam_fname, self.metrics, 10, self.tmp))
예제 #14
0
 def test_low_quality(self):
     """
     Unmapped read (FLAG=4):
     """
     bam_fname = make_bam_file({
         'chromosomes': [('chr1', 3000)],
         'segments': [('name1', 0, 0, 0, 3, [(0, 0)], {})],
     })
     self.metrics.lowmapq_recs = 0
     self.metrics.used_recs = 0
     xlsites._processs_bam_file(bam_fname, self.metrics, 10, self.tmp)
     self.assertEqual(self.metrics.lowmapq_recs, 1)
     self.assertEqual(self.metrics.used_recs, 0)
예제 #15
0
 def test_unmapped(self):
     """
     Unmapped read (FLAG=4):
     """
     bam_fname = make_bam_file({
         'chromosomes': [('chr1', 3000)],
         'segments': [('name1', 4, 0, 0, 0, [(0, 0)], {})],
     }, rnd_seed=0)
     self.metrics.all_recs = 0
     self.metrics.notmapped_recs = 0
     self.metrics.used_recs = 0
     list(xlsites._processs_bam_file(bam_fname, self.metrics, 0, self.tmp))
     self.assertEqual(self.metrics.notmapped_recs, 1)
     self.assertEqual(self.metrics.all_recs, 1)
     self.assertEqual(self.metrics.used_recs, 0)