Python Region Exemples, medaka.common.Region Python Exemples

Exemple #1

0

Afficher le fichier

    def test_030_bams_to_training_samples_simple(self):
        reads_bam = tempfile.NamedTemporaryFile(suffix='.bam').name
        truth_bam = tempfile.NamedTemporaryFile(suffix='.bam').name

        # we had a bug caused by missing qualities and bad indexing...
        data = copy.deepcopy(simple_data['calls'])
        data[0]['quality'] = None

        create_simple_bam(reads_bam, data)
        create_simple_bam(
            truth_bam, [simple_data['truth']])
        encoder = medaka.features.CountsFeatureEncoder(normalise='total')
        label_scheme = medaka.labels.HaploidLabelScheme()
        region = Region('ref', 0, 100)
        result = encoder.bams_to_training_samples(
            truth_bam, reads_bam, region, label_scheme, min_length=0)[0]

        expected = Sample(
            ref_name='ref',
            features=np.array([
                [0.5 , 0.  , 0.  , 0.  , 0.5 , 0.  , 0.  , 0.  , 0.  , 0.  ],
                [0.  , 0.5 , 0.  , 0.  , 0.  , 0.5 , 0.  , 0.  , 0.  , 0.  ],
                [0.5 , 0.  , 0.  , 0.  , 0.5 , 0.  , 0.  , 0.  , 0.  , 0.  ],
                [0.  , 0.25, 0.  , 0.25, 0.  , 0.  , 0.  , 0.25, 0.  , 0.25],
                [0.25, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
                [0.  , 0.  , 0.5 , 0.  , 0.  , 0.  , 0.5 , 0.  , 0.  , 0.  ],
                [0.5 , 0.  , 0.  , 0.  , 0.5 , 0.  , 0.  , 0.  , 0.  , 0.  ],
                [0.  , 0.  , 0.  , 0.5 , 0.  , 0.  , 0.  , 0.5 , 0.  , 0.  ],
                [0.  , 0.  , 0.5 , 0.  , 0.  , 0.  , 0.5 , 0.  , 0.  , 0.  ]],
                dtype='float32'),
            # the two insertions with respect to the draft are dropped
            labels=np.array([1, 2, 1, 4, 1, 3, 1, 4, 3]),  # A C A T A G A T C
            ref_seq=None,
            positions=np.array([
                (0, 0), (1, 0), (2, 0), (3, 0), (3, 1), (4, 0), (5, 0), (6, 0), (7, 0)],
                dtype=[('major', '<i8'), ('minor', '<i8')]),
            label_probs=None
        )

        np.testing.assert_equal(result.labels, expected.labels)
        np.testing.assert_equal(result.positions, expected.positions)
        np.testing.assert_equal(result.features, expected.features)

Exemple #2

0

Afficher le fichier

 def test_size(self):
     a = Region('contig1', 50, 100)
     self.assertEqual(a.size, 50)

Exemple #3

0

Afficher le fichier

    def bams_to_training_samples(self,
                                 truth_bam,
                                 bam,
                                 region,
                                 reference=None,
                                 read_fraction=None):
        """Prepare training data chunks.

        :param truth_bam: .bam file of truth aligned to ref to generate labels.
        :param bam: input .bam file.
        :param region: `Region` obj.
            the reference will be parsed.
        :param reference: reference `.fasta`, should correspond to `bam`.

        :returns: tuple of `Sample` objects.

        .. note:: Chunks might be missing if `truth_bam` is provided and
            regions with multiple mappings were encountered.

        """
        ref_rle = self.process_ref_seq(region.ref_name, reference)

        # filter truth alignments to restrict ourselves to regions of the ref where the truth
        # in unambiguous
        alignments = TruthAlignment.bam_to_alignments(truth_bam,
                                                      region.ref_name,
                                                      start=region.start,
                                                      end=region.end)
        filtered_alignments = TruthAlignment.filter_alignments(
            alignments, start=region.start, end=region.end)
        if len(filtered_alignments) == 0:
            self.logger.info(
                "Filtering removed all alignments of truth to ref from {}.".
                format(region))

        samples = []
        for aln in filtered_alignments:
            mock_compr = self.max_hp_len > 1 and not self.is_compressed
            truth_pos, truth_labels = aln.get_positions_and_labels(
                ref_compr_rle=ref_rle,
                mock_compr=mock_compr,
                is_compressed=self.is_compressed,
                rle_dtype=True)
            aln_samples = self.bam_to_sample(bam,
                                             Region(region.ref_name, aln.start,
                                                    aln.end),
                                             ref_rle,
                                             read_fraction=read_fraction)
            for sample in aln_samples:
                # Create labels according to positions in pileup
                pad = (encoding[_gap_],
                       1) if len(truth_labels.dtype) > 0 else encoding[_gap_]
                padder = itertools.repeat(pad)
                position_to_label = defaultdict(
                    padder.__next__,
                    zip([tuple(p) for p in truth_pos],
                        [a for a in truth_labels]))
                padded_labels = np.fromiter(
                    (position_to_label[tuple(p)] for p in sample.positions),
                    dtype=truth_labels.dtype,
                    count=len(sample.positions))

                sample = sample._asdict()
                sample['labels'] = padded_labels
                samples.append(Sample(**sample))
        return tuple(samples)

Exemple #4

0

Afficher le fichier

                 ref_start=None,
                 ref_end=None,
                 ref_len=None,
                 query_seq='ACATGCAAGACACGAT',
                 ref_seq='AAAGGCAAGACACGAT'):
        self.reference_start = ref_start
        self.reference_end = ref_end
        self.reference_length = ref_len
        self.query_sequence = query_seq
        self.reference_sequence = ref_seq

    def get_reference_sequence(self):
        return self.reference_sequence


full_region = Region('Mock', 0, float('inf'))


class TruthAlignmentTest(unittest.TestCase):
    def test_case1(self):
        # case 1: longer < 2 x len shorter and >= 50% of shorter overlaps longer both should be removed
        starts_ends = [(2000, 2999), (2500, 3000)]
        expected = []

        alignments = [
            TruthAlignment(MockAlignment(start, end, end - start))
            for start, end in starts_ends
        ]
        filtered = [
            (f.start, f.end)
            for f in TruthAlignment._filter_alignments(alignments, full_region)

Exemple #5

0

Afficher le fichier

Fichier : test_counts.py Projet : ssghost/medaka

import numpy as np
import os
import unittest
from medaka.features import FeatureEncoder, pileup_counts
from medaka.common import Region

__reads_bam__ = os.path.join(os.path.dirname(__file__), 'data',
                             'test_reads.bam')
__two_type_bam__ = os.path.join(os.path.dirname(__file__), 'data',
                                'test_two_type.bam')
__gapped_bam__ = os.path.join(os.path.dirname(__file__), 'data',
                              'reads_gapped.bam')
__region__ = Region('Consensus_Consensus_Consensus_Consensus_utg000001l',
                    start=50000,
                    end=100000)
__region_start__ = Region('Consensus_Consensus_Consensus_Consensus_utg000001l',
                          start=0,
                          end=200)

__kwargs__ = {
    'consensus_as_ref': False,
    'is_compressed': False,
    'log_min': None,
    'max_hp_len': 1,
    'normalise': 'total',
    'ref_mode': None,
    'with_depth': False
}


class CountsTest(unittest.TestCase):

Exemple #6

0

Afficher le fichier