Beispiel #1
0
 def test_exons_returns_exon_sequence_ranges(self):
     self.assertEqual(self.transcript.exons(), [
         SequenceRange('0', 3, 5),
         SequenceRange('0', 10, 20),
         SequenceRange('0', 30, 40),
         SequenceRange('0', 50, 60)
     ])
 def test_condense(self):
     self.assertEqual(SequenceRange.condense(self.range_12, self.range_24),
                      [SequenceRange('0', 1, 4)])
     self.assertEqual(
         SequenceRange.condense(self.range_12, self.range_24,
                                self.range_56),
         [SequenceRange('0', 1, 4), self.range_56])
 def test_concat(self):
     self.assertEqual(self.range_12.concat(self.range_24),
                      SequenceRange('0', 1, 4))
     self.assertEqual(self.range_24.concat(self.range_12),
                      SequenceRange('0', 1, 4))
     self.assertRaises(ValueError,
                       lambda: self.range_12.concat(self.range_56))
Beispiel #4
0
 def test_transcript_range(self):
     self.assertEqual(self.transcript.transcript_range(1, 2),
                      [SequenceRange('0', 11, 12)])
     self.assertEqual(self.transcript.transcript_range(1, 6),
                      [SequenceRange('0', 11, 16)])
     self.assertEqual(self.transcript.transcript_range(
         1, 16), [SequenceRange('0', 11, 20),
                  SequenceRange('0', 30, 36)])
 def test_bases_returns_base_pair_range(self):
     self.assertEqual(
         reference.bases(SequenceRange('1', 2, 8), self.ref_genome),
         'AACCCC')
     self.assertEqual(
         reference.bases(SequenceRange('X', 15, 16), self.ref_genome), 't')
     self.assertEqual(
         reference.bases(SequenceRange('1', 0, 16), self.ref_genome),
         'AAAACCCCGGGGTTTT')
Beispiel #6
0
 def test_exons_returns_reversed_positions_when_strand_minus(self):
     self.transcript.plus_strand = False  # Note: never do this outside of a
     # test.
     self.assertEqual(self.transcript.exons(), [
         SequenceRange('0', 50, 60),
         SequenceRange('0', 30, 40),
         SequenceRange('0', 10, 20),
         SequenceRange('0', 3, 5)
     ])
Beispiel #7
0
    def get_ranges(self):
        """Return the sequence ranges for an exon probe.

        If necessary, each probe half-sequence will be reverse-complemented so
        that the breakpoint is in the centre of the probe. We
        reverse-complement the first half-sequence if it's the start of an exon
        on the plus strand, or the end of an exon on the minus strand. The
        second half-sequence is reverse-complemented if it's the start of an
        exon on the minus strand or the end of an exon on the plus strand.

        If the arrow separator is used, the two sides of the probe will be
        rearranged if necessary so that the reading frames of the two exons
        will be preserved.

        For example:
                                                BAR
                                               |=========>
            ..............................................
            ..............................................
            <-------|
                 FOO


            FOO-/BAR+
                    <----|====
            FOO->BAR
                    ====|<----

        """
        chromosome1 = self._spec["chromosome1"]
        chromosome2 = self._spec["chromosome2"]
        strand1 = self._spec["strand1"]
        strand2 = self._spec["strand2"]
        side1 = self._spec["side1"]
        side2 = self._spec["side2"]
        start1, end1, start2, end2 = self._get_ranges()
        if self._spec['separator'] == '->' and self._spec['strand1'] == '-':
            start1, start2 = start2, start1
            end1, end2 = end2, end1
            chromosome1, chromosome2 = chromosome2, chromosome1
            strand1, strand2 = strand2, strand1
            side1, side2 = side2, side1
        return (
            SequenceRange(
                chromosome1,
                start1,
                end1,
                reverse_complement=(side1 == strand1),
            ),
            SequenceRange(
                chromosome2,
                start2,
                end2,
                reverse_complement=(strand2 != side2),
            ),
        )
Beispiel #8
0
 def test_nucleotide_index(self):
     transcript1, transcript2, transcript3, *rest = ANNOTATION
     self.assertEqual(transcript1.nucleotide_index(1),
                      SequenceRange('1', 1, 2))
     self.assertEqual(transcript2.nucleotide_index(2),
                      SequenceRange('2', 9, 10))
     transcript_3_indices = [22, 21, 20, 14, 13, 12, 11, 10]
     for base_pair, index in zip(transcript_3_indices, range(1, 11)):
         self.assertEqual(transcript3.nucleotide_index(index),
                          SequenceRange('3', base_pair, base_pair + 1))
Beispiel #9
0
    def codon_index(self, index):
        """Given a 1-based codon index, return a SequenceRange object
        representing that codon.

        """
        base_index = self._transcript_index(index*3)
        if self.plus_strand:
            return SequenceRange(self.chromosome, base_index-2, base_index+1)
        else:
            return SequenceRange(self.chromosome, base_index, base_index+3)
 def test_bases_with_reverse_complement(self):
     self.assertEqual(
         reference.bases(SequenceRange('1', 2, 8, reverse_complement=True),
                         self.ref_genome), 'GGGGTT')
     self.assertEqual(
         reference.bases(
             SequenceRange('X', 15, 16, reverse_complement=True),
             self.ref_genome), 'a')
     self.assertEqual(
         reference.bases(SequenceRange('1', 0, 16, reverse_complement=True),
                         self.ref_genome), 'AAAACCCCGGGGTTTT')
Beispiel #11
0
 def test_condense(self):
     self.assertEqual(
         SequenceRange.condense(
             self.range_12,
             self.range_24),
         [SequenceRange('0', 1, 4)])
     self.assertEqual(
         SequenceRange.condense(
             self.range_12,
             self.range_24,
             self.range_56),
         [SequenceRange('0', 1, 4),
          self.range_56])
Beispiel #12
0
 def get_ranges(self):
     start1, end1 = _parse_range(self._spec['index1'],
                                 self._spec['operation1'],
                                 self._spec['bases1'])
     start2, end2 = _parse_range(self._spec['index2'],
                                 self._spec['operation2'],
                                 self._spec['bases2'])
     return (SequenceRange(self._spec['chromosome1'],
                           start1,
                           end1,
                           reverse_complement=self._spec['rc_side_1']),
             SequenceRange(self._spec['chromosome2'],
                           start2,
                           end2,
                           reverse_complement=self._spec['rc_side_2']))
Beispiel #13
0
    def nucleotide_index(self, index):
        """Given a 1-based base pair index, return a SequenceRange object
        representing the base pair at that index in the transcript.

        """
        base_index = self._transcript_index(index)
        return SequenceRange(self.chromosome, base_index, base_index+1)
Beispiel #14
0
    def coding_exons(self):
        """As in `exons`, but with the UTRs trimmed out.

        """
        cds_start = int(self._spec['cdsStart'])
        cds_end = int(self._spec['cdsEnd'])
        exon_positions = self.exons()
        positions = []

        if not self.plus_strand:
            exon_positions.reverse()

        for exon in exon_positions:
            if exon.end < cds_start:
                pass
            elif exon.start <= cds_start <= cds_end <= exon.end:
                positions.append((cds_start, cds_end))
                break
            elif exon.start <= cds_start <= exon.end:
                positions.append((cds_start, exon.end))
            elif cds_start <= exon.start <= exon.end <= cds_end:
                positions.append((exon.start, exon.end))
            elif exon.start <= cds_end <= exon.end:
                positions.append((exon.start, cds_end))
                break
            elif cds_end <= exon.start:
                break
            else:
                assert False, "unreachable: {}/{}".format(self.name, self.gene_id)
        if not self.plus_strand:
            positions.reverse()
        return [SequenceRange(self.chromosome, start, end)
                for start, end in positions]
Beispiel #15
0
    def exons(self):
        """Return the exon positions of a UCSC annotation feature.

        In a UCSC annotation file, the positions of the starts and ends of exons
        are stored as comma-separated strings:

            '20,30,40,'

        Given a dictionary with this data, we return a list of tuples:

            (exonStart, exonEnd)

        If the 'strand' of the row is '-', the function return the exons in
        reversed order. In this case, the first exon relative to the direction
        of transcription (which is probably what the user means), is the last
        exon along the chromosome reading from left to right along the '+'
        strand (which is how the data are stored in UCSC tables).

        Raises a FormattingError when the `row` does not appear to come from a
        valid UCSC gene table.

        """
        exon_starts = self._spec['exonStarts'].split(',')
        exon_ends = self._spec['exonEnds'].split(',')
        positions = []
        for start, end in zip(exon_starts, exon_ends):
            if start != '' and end != '':
                start, end = int(start), int(end)
                positions.append((start, end))
        if not self.plus_strand:
            positions.reverse()
        return [SequenceRange(self.chromosome, start, end)
                for start, end in positions]
Beispiel #16
0
 def get_ranges(self):
     bases = self._spec['bases']
     chromosome = self._spec['chromosome']
     index = self._spec['index'] - 1 # Convert from 0- to 1-based indexing
     left_buffer = bases // 2 - 1
     right_buffer = bases - left_buffer
     return (
         SequenceRange(chromosome,
                       index-left_buffer,
                       index),
         SequenceRange(chromosome,
                       index,
                       index+1,
                       mutation=self._spec["mutation"]),
         SequenceRange(chromosome,
                       index+1,
                       index+right_buffer))
Beispiel #17
0
class TestSequenceRange(unittest.TestCase):
    def setUp(self):
        self.range_12 = SequenceRange('0', 1, 2)
        self.range_24 = SequenceRange('0', 2, 4)
        self.range_56 = SequenceRange('0', 5, 6)

    def test_concat(self):
        self.assertEqual(self.range_12.concat(self.range_24),
                         SequenceRange('0', 1, 4))
        self.assertEqual(self.range_24.concat(self.range_12),
                         SequenceRange('0', 1, 4))
        self.assertRaises(ValueError,
                          lambda: self.range_12.concat(self.range_56))

    def test_adjacent(self):
        self.assertTrue(self.range_12.adjacent(self.range_24))
        self.assertTrue(self.range_24.adjacent(self.range_12))
        self.assertFalse(self.range_12.adjacent(self.range_56))

    def test_condense(self):
        self.assertEqual(SequenceRange.condense(self.range_12, self.range_24),
                         [SequenceRange('0', 1, 4)])
        self.assertEqual(
            SequenceRange.condense(self.range_12, self.range_24,
                                   self.range_56),
            [SequenceRange('0', 1, 4), self.range_56])
Beispiel #18
0
    def transcript_range(self, start, end):
        """Return a list of SequenceRange objects representing the genomic
        location(s) of the transcript from `start` to `end`.

        More than one SequenceRange is returned if the requested range crosses
        exon boundaries.

        The `start` and `end` variables are 1-based left-inclusive,
        right-exclusive.

        """
        ranges = [self.nucleotide_index(i) for i in range(start, end)]
        return SequenceRange.condense(*ranges)
Beispiel #19
0
    def sequence_ranges(self):
        """Return a list of SequenceRange objects representing the variant with
        buffering sequence taken from the surrounding genomic sequence.

        """
        chromosome, start, end, _, _ = self.index

        reference_length = len(self.reference)
        mutation_length = len(self.mutation)

        total_buffer = len(self) - mutation_length
        left_buffer = total_buffer // 2
        right_buffer = total_buffer - left_buffer

        return [
            SequenceRange(chromosome, start - left_buffer, start),
            SequenceRange(chromosome,
                          start,
                          start + reference_length,
                          mutation=self.mutation,
                          reverse_complement=not self.transcript.plus_strand),
            SequenceRange(chromosome, start + reference_length,
                          start + reference_length + right_buffer)
        ]
    def assert_mock_gene_in_file(self, annotation_file):
        """Assert that the mock gene is found in the `annotation_file`.

        `annotation_file` is a handle to one of the UCSC annotation files used
        for testing.

        """
        annotations = annotation.parse_ucsc_file(annotation_file)
        matching_features = annotation.lookup_gene("MOCK_GENE", annotations)
        try:
            mock_row, = tuple(matching_features)
            exons = mock_row.exons()
            self.assertEqual(exons, [SequenceRange('0', 2, 3)])
        except ValueError as error:
            self.fail("Unexpected number of mock genes: {}".format(error))
Beispiel #21
0
    def sequence_ranges(self):
        """Return a list of SequenceRange objects representing the variant and
        a buffer sequence taken from the surrounding transcript sequence (i.e.,
        intronic sequences are skipped).

        Raises an OutOfRange exception when the buffer sequences strays outside
        the range of the transcript.

        """
        chromosome, start, end, _, _ = self.index

        reference_length = len(self.reference)
        mutation_length = len(self.mutation)

        total_buffer = len(self) - mutation_length
        left_buffer = total_buffer // 2
        right_buffer = total_buffer - left_buffer

        reverse_complement = not self.transcript.plus_strand

        base = self.transcript.base_index(self.index)

        if not self.transcript.plus_strand:
            left_buffer, right_buffer = right_buffer, left_buffer

        sequence = (
            self.transcript.transcript_range(base - left_buffer, base) + [
                SequenceRange(chromosome,
                              start,
                              start + reference_length,
                              mutation=self.mutation,
                              reverse_complement=reverse_complement)
            ] + self.transcript.transcript_range(
                base + reference_length,
                base + reference_length + right_buffer))

        if self.transcript.plus_strand:
            return sequence
        else:
            return reversed(sequence)
Beispiel #22
0
class TestSequenceRange(unittest.TestCase):
    def setUp(self):
        self.range_12 = SequenceRange('0', 1, 2)
        self.range_24 = SequenceRange('0', 2, 4)
        self.range_56 = SequenceRange('0', 5, 6)

    def test_concat(self):
        self.assertEqual(
            self.range_12.concat(self.range_24),
            SequenceRange('0', 1, 4))
        self.assertEqual(
            self.range_24.concat(self.range_12),
            SequenceRange('0', 1, 4))
        self.assertRaises(
            ValueError,
            lambda: self.range_12.concat(self.range_56))

    def test_adjacent(self):
        self.assertTrue(
            self.range_12.adjacent(self.range_24))
        self.assertTrue(
            self.range_24.adjacent(self.range_12))
        self.assertFalse(
            self.range_12.adjacent(self.range_56))

    def test_condense(self):
        self.assertEqual(
            SequenceRange.condense(
                self.range_12,
                self.range_24),
            [SequenceRange('0', 1, 4)])
        self.assertEqual(
            SequenceRange.condense(
                self.range_12,
                self.range_24,
                self.range_56),
            [SequenceRange('0', 1, 4),
             self.range_56])
 def test_bases_raises_NonContainedRange_on_range_outside_of_chromosome(
         self):
     message = "range \[1:100\] outside the range of chromosome '1'"
     with self.assertRaisesRegex(reference.NonContainedRange, message):
         reference.bases(SequenceRange('1', 1, 100), self.ref_genome)
 def test_bases_raises_MissingChromosome_when_chromosome_key_missing(self):
     message = "no such chromosome: 'banana'"
     with self.assertRaisesRegex(reference.MissingChromosome, message):
         reference.bases(SequenceRange('banana', 1, 2), self.ref_genome)
Beispiel #25
0
 def setUp(self):
     self.range_12 = SequenceRange('0', 1, 2)
     self.range_24 = SequenceRange('0', 2, 4)
     self.range_56 = SequenceRange('0', 5, 6)
Beispiel #26
0
 def test_codon_index(self):
     transcript1, transcript2, transcript3, *rest = ANNOTATION
     self.assertEqual(transcript3.codon_index(1),
                      SequenceRange('3', 20, 23)),
     self.assertEqual(transcript3.codon_index(2),
                      SequenceRange('3', 12, 15)),
Beispiel #27
0
 def setUp(self):
     self.range_12 = SequenceRange('0', 1, 2)
     self.range_24 = SequenceRange('0', 2, 4)
     self.range_56 = SequenceRange('0', 5, 6)
Beispiel #28
0
 def test_coding_exons_returns_coding_sequence_ranges(self):
     self.assertEqual(self.transcript.coding_exons(), [
         SequenceRange('0', 11, 20),
         SequenceRange('0', 30, 40),
         SequenceRange('0', 50, 59)
     ])
Beispiel #29
0
 def test_exon_returns_exon_sequence_range_at_one_based_index(self):
     self.assertEqual(self.transcript.exon(2), SequenceRange('0', 10, 20))