Example #1
0
    def test_alignment_to_pairs_001(self):
        """Check output of alignment agrees.

        To fully test the functionality, the alignment contains:
            - insertions
            - deletions
            - a run length larger than `self.max_run`, that will be capped
                 to the max_run
        """
        query_name = 'query'
        reference_id = 1
        reference_start = 10
        query_sequence = 'ACATGATGTAC'
        cigarstring = '3=1I2=1D5='
        flag = 0
        qualities = array.array('B', [2, 1, 4, 5, 1, 1, 2, 16, 2, 3, 4])
        aln = common.initialise_alignment(
            query_name, reference_id, reference_start, query_sequence,
            cigarstring, flag, query_qualities=qualities)
        expected = (
            (10, ('A', 2)), (11, ('C', 1)), (12, ('A', 3)), (None, ('T', 3)),
            (13, ('G', 1)), (14, ('A', 1)), (15, ('*', 1)), (16, ('T', 2)),
            (17, ('G', 3)), (18, ('T', 2)), (19, ('A', 3)), (20, ('C', 3)))

        got = tuple(self.ls._alignment_to_pairs(aln))
        self.assertEqual(got, expected)
Example #2
0
def create_simple_bam(fname, calls):
    """Create a small bam file with RLE encoding coded in the qscores."""
    ref_len = len(simple_data['ref'])

    header = {'HD': {'VN': '1.0'}, 'SQ': [{'LN': ref_len, 'SN': 'ref'}]}

    tmp_file = '{}.tmp'.format(fname)
    with pysam.AlignmentFile(tmp_file,
                             'wb',
                             reference_names=[
                                 'ref',
                             ],
                             reference_lengths=[
                                 ref_len,
                             ],
                             header=header) as output:
        for index, basecall in enumerate(calls):
            a = common.initialise_alignment(
                query_name=basecall['query_name'],
                reference_id=0,
                reference_start=0,
                query_sequence=basecall['seq'],
                cigarstring=basecall['cigarstring'],
                flag=basecall['flag'],
                query_qualities=basecall['quality'],
                tags=basecall['tags'])

            output.write(a)

    pysam.sort("-o", fname, tmp_file)
    os.remove(tmp_file)
    pysam.index(fname)
Example #3
0
    def test_derived(self):
        """Test arguments derived from inputs."""
        alignment = common.initialise_alignment(**self.input_kwargs)
        expected_kwargs = {
            'query_alignment_start': 1,
            'query_alignment_end': 12,
            'query_alignment_sequence': 'CCCTGTTGATC'}

        for key, expected in expected_kwargs.items():
            got = getattr(alignment, key)
            self.assertEqual(got, expected)
Example #4
0
    def setUpClass(cls):
        """Create temporary files and bam file

        Ref     T  T  A  A    C  T  T  T  G
        Read1         A  A    C  T  T  T  G
        Read2      T  A  A  A C  T  T  T  G
        """
        cls.bam_input = tempfile.NamedTemporaryFile(suffix='.bam').name
        cls.bam_output = tempfile.NamedTemporaryFile(suffix='.bam').name
        cls.ref_fname = tempfile.NamedTemporaryFile(suffix='.fasta').name

        with open(cls.ref_fname, 'w') as fasta:
            fasta.write('>ref\n')
            fasta.write('TTAACTTTG\n')

        header = {
            'HD': {'VN': '1.0'},
            'SQ': [{'LN': 9, 'SN': 'ref'}, ]}

        basecalls = {
            'read1': {
                'query_name': 'read1',
                'reference_id': 0,
                'reference_start': 2,
                'query_sequence': 'AACTTTG',
                'cigarstring': '7=',
                'flag': 0,
                'mapping_quality': 50},
            'read2': {
                'query_name': 'read2',
                'reference_id': 0,
                'reference_start': 1,
                'query_sequence': 'TAAACTTTG',
                'cigarstring': '3=1I5=',
                'flag': 0,
                'mapping_quality': 50}}

        tmp_file = '{}.tmp'.format(cls.bam_input)
        with pysam.AlignmentFile(tmp_file, 'wb', header=header) as bam:
            for basecall in basecalls.values():
                record = common.initialise_alignment(**basecall)
                bam.write(record)

        pysam.sort("-o", cls.bam_input, tmp_file)
        os.remove(tmp_file)
        pysam.index(cls.bam_input)
Example #5
0
    def test_compression(self):
        """Compress alignment.

        ref: TACCCATGTTGATCG  --> TACATGTGATCG
        seq:  gCCCA*GTTGATCtt -->  gCA*GTGATCt
        cigar:    1S4=1D7=2S  -->  1S2=1D6=1S
        """
        alignment = common.initialise_alignment(**self.alignment_kwargs)
        ref_rle = medaka.rle.RLEConverter(self.ref)
        compressed_alignment = medaka.rle._compress_alignment(
            alignment, ref_rle)
        real_outputs = {
            'cigarstring': '1S2=1D6=1S',
            'query_sequence': 'GCAGTGATCT',
            'query_alignment_start': 1,
            'query_alignment_end': 9,
            'query_alignment_sequence': 'CAGTGATC',
            'reference_start': 2,
            'reference_end': 11}
        for key, expected in real_outputs.items():
            got = getattr(compressed_alignment, key)
            self.assertEqual(got, expected)
Example #6
0
 def test_inputs(self):
     """Test inputs are correctly passed to alignment."""
     alignment = common.initialise_alignment(**self.input_kwargs)
     for key, expected in self.input_kwargs.items():
         got = getattr(alignment, key)
         self.assertEqual(expected, got)