Esempio n. 1
0
 def test_io(self):
     bdiff = BdiffIO(self._bdiff_file)
     self.assertTrue(bdiff.is_read_mode)
     self.assertDictEqual(self._header, bdiff.header)
     self.assertEqual(4, bdiff.snv_count)
     self.assertEqual(3, bdiff.indel_count)
     self.assertTupleEqual(
         (1010, True, 0, ZygosityChange.HOMO_TO_HOMO.value,
          ['C', 'A', 'T', 'G'], ['C', 'A', 'T', 'G'], ()),
         bdiff.read_record())
     self.assertTupleEqual(
         (1020, True, 1, ZygosityChange.HOMO_TO_HOMO.value,
          ['G', 'A', 'T', 'C'], ['G', 'A', 'T', 'C'], ()),
         bdiff.read_record())
     self.assertTupleEqual(
         (1030, False, 0, ZygosityChange.HOMO_TO_HOMO.value,
          ['AT', 'ATT', 'A'], ['AT', 'ATT', 'A'], ()), bdiff.read_record())
     self.assertTupleEqual(
         (1040, True, 2, ZygosityChange.HOMO_TO_HOMO.value,
          ['T', 'G', 'A', 'C'], ['T', 'G', 'A', 'C'], ()),
         bdiff.read_record())
     self.assertTupleEqual(
         (1050, False, 1, ZygosityChange.HOMO_TO_HOMO.value, ['T', 'TT'],
          ['T', 'TT'], ()), bdiff.read_record())
     self.assertTupleEqual(
         (1060, False, 2, ZygosityChange.HOMO_TO_HOMO.value,
          ['GCG', 'G', 'GCGCG'], ['GCG', 'G', 'GCGCG'], ()),
         bdiff.read_record())
     self.assertTupleEqual(
         (1070, True, 3, ZygosityChange.HOMO_TO_HOMO.value,
          ['G', 'C', 'A', 'T'], ['G', 'C', 'A', 'T'], ()),
         bdiff.read_record())
     self.assertRaises(EOFError, lambda: bdiff.read_record())
Esempio n. 2
0
 def test_mutate_02(self):
     # EOF VAC case
     Vac.text2vac(self.RESOURCE_PATH + 'input_02.vac.txt', self.RESOURCE_PATH + 'input_02.vac')
     bdiff_file = self._mut.mutate(
         vac_filename=self.RESOURCE_PATH + 'input_02.vac',
         mut_bam_filename=self.RESOURCE_PATH + 'output_02.bam',
         secret=self.SECRET,
         mut_p=0,
         rng=self._rnd
     )
     
     self.assertEqual(21, self._mut.stat(BamMutator.STAT_ALIGNMENT_COUNT))
     self.assertEqual(13, self._mut.stat(BamMutator.STAT_COVERING_COUNT))
     self.assertEqual(7, self._mut.stat(BamMutator.STAT_VAC_COUNT))
     self.assertEqual(6, self._mut.stat(BamMutator.STAT_MUT_COUNT))
     self.assertEqual(4, self._mut.stat(BamMutator.STAT_DIFF_COUNT))
     self.assertEqual(6, self._mut.stat(BamMutator.STAT_ALIGNMENT_MUT_COUNT))
     
     cmn.bam2sam(self.RESOURCE_PATH + 'output_02.bam', self.RESOURCE_PATH + 'output_02.sam')
     self.assertTrue(filecmp.cmp(
         self.RESOURCE_PATH + 'desired_02.sam',
         self.RESOURCE_PATH + 'output_02.sam'
     ))
     
     BdiffIO.to_text_file(bdiff_file, self.RESOURCE_PATH + 'output_02.diff.txt')
     is_equal = filecmp.cmp(self.RESOURCE_PATH + 'desired_02.diff.txt', self.RESOURCE_PATH + 'output_02.diff.txt')
     self.assertTrue(is_equal)
Esempio n. 3
0
 def _write_bdiff_record(file: BdiffIO,
                         index,
                         ref_id,
                         perm,
                         is_snv: bool = True):
     if is_snv:
         file.write_snv(index, ref_id, ZygosityChange.HOMO_TO_HOMO, perm,
                        perm, ())
     else:
         file._write_indel(index, ref_id, perm)
Esempio n. 4
0
    def setUpClass(cls):
        bdiff = BdiffIO()

        cls._write_bdiff_record(bdiff, 1010, 0, ['C', 'A', 'T', 'G'])
        cls._write_bdiff_record(bdiff, 1020, 1, ['G', 'A', 'T', 'C'])
        cls._write_bdiff_record(bdiff, 1030, 0, ['AT', 'ATT', 'A'], False)
        cls._write_bdiff_record(bdiff, 1040, 2, ['T', 'G', 'A', 'C'])
        cls._write_bdiff_record(bdiff, 1050, 1, ['T', 'TT'], False)
        cls._write_bdiff_record(bdiff, 1060, 2, ['GCG', 'G', 'GCGCG'], False)
        cls._write_bdiff_record(bdiff, 1070, 3, ['G', 'C', 'A', 'T'])

        cls._bdiff_file = bdiff.file(cls._header)
Esempio n. 5
0
    def test_single_file(self):
        wbdiff = BdiffIO()
        self._write_bdiff_record(wbdiff, 1010, 0, ['C', 'A', 'T', 'G'])

        rbdiff = wbdiff.readcopy()
        self.assertFalse(rbdiff.is_empty())

        self.assertEqual(1010, rbdiff.first_index)
        self.assertEqual(1010, rbdiff.last_index)

        self.assertEqual(rbdiff.data_offset, rbdiff.tell_index_gte(1000))
        self.assertIsNone(rbdiff.tell_index_gte(2000))

        self.assertIsNone(rbdiff.tell_index_lte(1000))
        self.assertEqual(rbdiff.data_offset, rbdiff.tell_index_lte(2000))
Esempio n. 6
0
 def test_seq_perm(self):
     self.assertListEqual(['T', 'TT', 'A', 'AA'],
                          BdiffIO.seq_perm({
                              'T': 'A',
                              'AA': 'TT',
                              'TT': 'AA',
                              'A': 'T'
                          }))
     self.assertListEqual(['C', 'A', 'T', 'G'],
                          BdiffIO.seq_perm({
                              'A': 'C',
                              'T': 'G',
                              'G': 'T',
                              'C': 'A'
                          }))
Esempio n. 7
0
    def _record2pos(self, record_id: int):
        """
        Record with record_id must be present.
        :param record_id: 1-based
        :return:
        """
        saved_pos = self._bdiff_file.tell()
        bdiff = BdiffIO(self._bdiff_file)

        curr_id = 1
        while curr_id < record_id:
            bdiff.read_record()
            curr_id += 1

        record_pos = self._bdiff_file.tell()
        self._bdiff_file.seek(saved_pos)
        return record_pos
Esempio n. 8
0
    def _index2pos(bdiff: BdiffIO, search_index: int):
        """
        Record with search_index must be present.
        :param bdiff:
        :param search_index:
        :return:
        """
        saved_pos = bdiff.tell()
        pos = saved_pos
        while True:
            index = bdiff.read_record()[0]
            if index == search_index:
                break
            pos = bdiff.tell()

        bdiff.seek(saved_pos)
        return pos
Esempio n. 9
0
    def test_to_text_file(self):
        bdiff = BdiffIO()

        self._write_bdiff_record(bdiff, 11011, 2, ['A', 'G', 'T', 'C'])
        bdiff.write_snv(11015,
                        3,
                        ZygosityChange.HOMO_TO_HETERO, ['C', 'A', 'T', 'G'],
                        ['C', 'A', 'G', 'T'],
                        beta_indices=[2, 4, 6])
        self._write_bdiff_record(bdiff, 11020, 2, ['G', 'T', 'A', 'C'])
        self._write_bdiff_record(bdiff, 11027, 2, ['A', 'AGT', 'AG'], False)
        self._write_bdiff_record(bdiff, 11031, 2, ['G', 'A', 'T', 'C'])
        self._write_bdiff_record(bdiff, 11037, 0, ['GCG', 'G'], False)

        BdiffIO.to_text_file(
            bdiff.file({
                "_from_index": 0,
                "_to_index": 19999,
                "mb_checksum": "f05027eaa1923a6aab76bf7ead4fe976",
                "secret": "ffffffffffffffffffffffffffffffff"
            }), self.RESOURCE_PATH + 'output.diff.txt')

        self.assertTrue(
            filecmp.cmp(self.RESOURCE_PATH + 'input.diff.txt',
                        self.RESOURCE_PATH + 'output.diff.txt'))
Esempio n. 10
0
    def test_mask(self):
        # EOF BAM case
        Vac.text2vac(self.RESOURCE_PATH + 'input.vac.txt',
                     self.RESOURCE_PATH + 'input.vac')
        bdiff_file = self._mut.mutate(
            vac_filename=self.RESOURCE_PATH + 'input.vac',
            mut_bam_filename=self.RESOURCE_PATH + 'output.bam',
            secret=self.SECRET,
            mut_p=0,
            rng=self._rng)
        cmn.bam2sam(self.RESOURCE_PATH + 'output.bam',
                    self.RESOURCE_PATH + 'output.sam')
        BdiffIO.to_text_file(bdiff_file,
                             self.RESOURCE_PATH + 'output.diff.txt')

        self.assertTrue(
            filecmp.cmp(self.RESOURCE_PATH + 'desired.sam',
                        self.RESOURCE_PATH + 'output.sam'))

        self.assertTrue(
            filecmp.cmp(self.RESOURCE_PATH + 'desired.diff.txt',
                        self.RESOURCE_PATH + 'output.diff.txt'))
Esempio n. 11
0
    def test_unmask(self):
        # all mapped
        with BdiffIO.from_text_file(self.RESOURCE_PATH +
                                    'input.diff.txt') as bdiff_file:
            self.mut.unmutate(bdiff_file=bdiff_file,
                              out_bam_filename=self.RESOURCE_PATH +
                              'output.bam')

        bam2sam(self.RESOURCE_PATH + 'output.bam',
                self.RESOURCE_PATH + 'output.sam')
        is_equal = filecmp.cmp(self.RESOURCE_PATH + 'desired.sam',
                               self.RESOURCE_PATH + 'output.sam')
        self.assertEqual(True, is_equal)
Esempio n. 12
0
    def _mask_indel_pos(self, bdiff_io: bdiff.BdiffIO, allele_queue: list,
                        variant: VariantOccurrence, rnd: VeryRandom) -> bool:
        # # TODO
        # return False

        pileup = pileup_alleles(allele_queue)
        alt_freq_map = cmn.freq_map(pileup)

        mut_map = cmn.indel_mut_map(private_freq_map=alt_freq_map,
                                    public_freq_map=dict(
                                        zip(variant.alleles, variant.freqs)),
                                    rnd=rnd)

        # # BEGIN temp code
        # personal_allele = sorted(alt_freq_map.keys(), key=lambda key: alt_freq_map[key], reverse=True)[0]
        # # personal allele is mutated
        # ref_id = variant.alleles.index(variant.ref_allele)
        # max_id = variant.alleles.index(personal_allele)  # type: int
        # personal_freq = variant.freqs[max_id] / sum(variant.freqs)
        #
        # is_mutated = personal_allele != mut_map[personal_allele]
        # is_reference = ref_id == max_id
        #
        # record = '%d\t%f\t%d\t%d\n' % (
        #     variant.pos.index, personal_freq, is_reference, is_mutated
        # )
        # self._freqs_file.write(record)
        # # END temp code

        is_masked = False
        for allele in allele_queue:  # type: AlleleAlignment
            is_masked |= self._mutate_allele(allele, mut_map)

        if is_masked:
            bdiff_io.write_indel(variant.pos.index, variant.ref_allele,
                                 mut_map)

        return is_masked
Esempio n. 13
0
    def test_empty_file(self):
        # empty file
        bdiff = BdiffIO().readcopy()
        self.assertTrue(bdiff.is_empty())

        self.assertIsNone(bdiff.first_index)
        self.assertIsNone(bdiff.last_index)

        self.assertIsNone(bdiff.tell_index_gte(0))
        self.assertIsNone(bdiff.tell_index_gte(1000))

        self.assertIsNone(bdiff.tell_index_lte(0))
        self.assertIsNone(bdiff.tell_index_lte(1000))
Esempio n. 14
0
    def test_from_text_file(self):
        bdiff_file = BdiffIO.from_text_file(self.RESOURCE_PATH +
                                            'input.diff.txt')
        bdiff = BdiffIO(bdiff_file)

        self.assertDictEqual(
            {
                "_from_index": 0,
                "_to_index": 19999,
                "mb_checksum": "f05027eaa1923a6aab76bf7ead4fe976",
                "secret": "ffffffffffffffffffffffffffffffff"
            }, bdiff.header)

        self.assertEqual(1000, bdiff.index_resolution)
        self.assertEqual(11011, bdiff.first_index)
        self.assertEqual(11037, bdiff.last_index)
        self.assertEqual(4, bdiff.snv_count)
        self.assertEqual(2, bdiff.indel_count)

        self.assertTupleEqual(
            (11011, True, 2, ZygosityChange.HOMO_TO_HOMO.value,
             ['A', 'G', 'T', 'C'], ['A', 'G', 'T', 'C'], ()),
            bdiff.read_record())
        self.assertTupleEqual(
            (11015, True, 3, ZygosityChange.HOMO_TO_HETERO.value,
             ['C', 'A', 'T', 'G'], ['C', 'A', 'G', 'T'], (2, 4, 6)),
            bdiff.read_record())
        self.assertTupleEqual(
            (11020, True, 2, ZygosityChange.HOMO_TO_HOMO.value,
             ['G', 'T', 'A', 'C'], ['G', 'T', 'A', 'C'], ()),
            bdiff.read_record())
        self.assertTupleEqual(
            (11027, False, 2, ZygosityChange.HOMO_TO_HOMO.value,
             ['A', 'AGT', 'AG'], ['A', 'AGT', 'AG'], ()), bdiff.read_record())
        self.assertTupleEqual(
            (11031, True, 2, ZygosityChange.HOMO_TO_HOMO.value,
             ['G', 'A', 'T', 'C'], ['G', 'A', 'T', 'C'], ()),
            bdiff.read_record())
        self.assertTupleEqual(
            (11037, False, 0, ZygosityChange.HOMO_TO_HOMO.value, ['GCG', 'G'],
             ['GCG', 'G'], ()), bdiff.read_record())
Esempio n. 15
0
    def test_unmutate_01(self):
        # all mapped
        with BdiffIO.from_text_file(self.RESOURCE_PATH +
                                    'input.diff.txt') as bdiff_file:
            self.mut.unmutate(bdiff_file=bdiff_file,
                              out_bam_filename=self.RESOURCE_PATH +
                              'output_01.bam')

        self.assertEqual(17, self.mut.stat(BamMutator.STAT_ALIGNMENT_COUNT))
        self.assertEqual(11, self.mut.stat(BamMutator.STAT_COVERING_COUNT))
        self.assertEqual(8, self.mut.stat(BamMutator.STAT_MUT_COUNT))
        self.assertEqual(5, self.mut.stat(BamMutator.STAT_DIFF_COUNT))
        self.assertEqual(8, self.mut.stat(BamMutator.STAT_ALIGNMENT_MUT_COUNT))

        bam2sam(self.RESOURCE_PATH + 'output_01.bam',
                self.RESOURCE_PATH + 'output_01.sam')
        is_equal = filecmp.cmp(self.RESOURCE_PATH + 'desired_01.sam',
                               self.RESOURCE_PATH + 'output_01.sam')
        self.assertEqual(True, is_equal)
Esempio n. 16
0
    def test_unmutate_07(self):
        # include unmapped
        with BdiffIO.from_text_file(self.RESOURCE_PATH +
                                    'input.diff.txt') as bdiff_file:
            self.mut.unmutate(bdiff_file=bdiff_file,
                              out_bam_filename=self.RESOURCE_PATH +
                              'output_07.bam',
                              unmapped_only=True)

        self.assertEqual(4, self.mut.stat(BamMutator.STAT_ALIGNMENT_COUNT))
        self.assertEqual(0, self.mut.stat(BamMutator.STAT_COVERING_COUNT))
        self.assertEqual(0, self.mut.stat(BamMutator.STAT_MUT_COUNT))
        # only the first diff record is read
        self.assertEqual(1, self.mut.stat(BamMutator.STAT_DIFF_COUNT))
        self.assertEqual(0, self.mut.stat(BamMutator.STAT_ALIGNMENT_MUT_COUNT))

        bam2sam(self.RESOURCE_PATH + 'output_07.bam',
                self.RESOURCE_PATH + 'output_07.sam')
        self.assertTrue(
            filecmp.cmp(self.RESOURCE_PATH + 'desired_07.sam',
                        self.RESOURCE_PATH + 'output_07.sam'))
Esempio n. 17
0
    def test_unmutate_05(self):
        # include unmapped
        with BdiffIO.from_text_file(self.RESOURCE_PATH +
                                    'input.diff.txt') as bdiff_file:
            self.mut.unmutate(bdiff_file=bdiff_file,
                              out_bam_filename=self.RESOURCE_PATH +
                              'output_05.bam',
                              include_unmapped=True)

        self.assertEqual(21, self.mut.stat(BamMutator.STAT_ALIGNMENT_COUNT))
        # one synonymous mutation is present
        self.assertEqual(11, self.mut.stat(BamMutator.STAT_COVERING_COUNT))
        self.assertEqual(8, self.mut.stat(BamMutator.STAT_MUT_COUNT))
        self.assertEqual(5, self.mut.stat(BamMutator.STAT_DIFF_COUNT))
        self.assertEqual(8, self.mut.stat(BamMutator.STAT_ALIGNMENT_MUT_COUNT))

        bam2sam(self.RESOURCE_PATH + 'output_05.bam',
                self.RESOURCE_PATH + 'output_05.sam')
        self.assertTrue(
            filecmp.cmp(self.RESOURCE_PATH + 'desired_05.sam',
                        self.RESOURCE_PATH + 'output_05.sam'))
Esempio n. 18
0
    def test_unmutate_06(self):
        # include unmapped with range
        with BdiffIO.from_text_file(self.RESOURCE_PATH +
                                    'input.diff.txt') as bdiff_file:
            self.mut.unmutate(
                bdiff_file=bdiff_file,
                out_bam_filename=self.RESOURCE_PATH + 'output_06.bam',
                start_ref_name='chr2',
                start_ref_pos=1015,  # inclusive
                end_ref_name='chr2',
                end_ref_pos=1028,  # non-inclusive
                include_unmapped=True)

        self.assertEqual(13, self.mut.stat(BamMutator.STAT_ALIGNMENT_COUNT))
        self.assertEqual(3, self.mut.stat(BamMutator.STAT_COVERING_COUNT))
        self.assertEqual(2, self.mut.stat(BamMutator.STAT_MUT_COUNT))
        self.assertEqual(1, self.mut.stat(BamMutator.STAT_DIFF_COUNT))
        self.assertEqual(2, self.mut.stat(BamMutator.STAT_ALIGNMENT_MUT_COUNT))

        bam2sam(self.RESOURCE_PATH + 'output_06.bam',
                self.RESOURCE_PATH + 'output_06.sam')
        self.assertTrue(
            filecmp.cmp(self.RESOURCE_PATH + 'desired_06.sam',
                        self.RESOURCE_PATH + 'output_06.sam'))
Esempio n. 19
0
    def test_file(self):
        """
        Testing only branch with inner method BdiffIO._file_from_slice.
        """
        bdiff = BdiffIO(self._bdiff_file)

        # invalid range
        self.assertRaises(
            AssertionError, lambda: bdiff.file(
                {
                    BdiffIO.FROM_INDEX: 2000,
                    BdiffIO.TO_INDEX: 1000
                }, False))

        # out of range
        self.assertRaises(
            IndexError, lambda: bdiff.file(
                {
                    BdiffIO.FROM_INDEX: 1071,
                    BdiffIO.TO_INDEX: 2000
                }, False))
        self.assertRaises(
            IndexError,
            lambda: bdiff.file({
                BdiffIO.FROM_INDEX: 0,
                BdiffIO.TO_INDEX: 1009
            }, False))

        # full range
        bdiff_io = BdiffIO(
            bdiff.file({
                BdiffIO.FROM_INDEX: 0,
                BdiffIO.TO_INDEX: 2000
            }, False))
        self.assertDictEqual({
            BdiffIO.FROM_INDEX: 0,
            BdiffIO.TO_INDEX: 2000
        }, bdiff_io.header)
        self.assertEqual(1010, bdiff_io.first_index)
        self.assertEqual(1070, bdiff_io.last_index)
        self.assertEqual(4, bdiff_io.snv_count)
        self.assertEqual(3, bdiff_io.indel_count)

        # exact range
        bdiff_io = BdiffIO(
            bdiff.file({
                BdiffIO.FROM_INDEX: 1010,
                BdiffIO.TO_INDEX: 1070
            }, False))
        self.assertDictEqual({
            BdiffIO.FROM_INDEX: 1010,
            BdiffIO.TO_INDEX: 1070
        }, bdiff_io.header)
        self.assertEqual(1010, bdiff_io.first_index)
        self.assertEqual(1070, bdiff_io.last_index)
        self.assertEqual(4, bdiff_io.snv_count)
        self.assertEqual(3, bdiff_io.indel_count)

        # inner range
        bdiff_io = BdiffIO(
            bdiff.file({
                BdiffIO.FROM_INDEX: 1020,
                BdiffIO.TO_INDEX: 1060
            }, False))
        self.assertDictEqual({
            BdiffIO.FROM_INDEX: 1020,
            BdiffIO.TO_INDEX: 1060
        }, bdiff_io.header)
        self.assertEqual(1020, bdiff_io.first_index)
        self.assertEqual(1060, bdiff_io.last_index)
        self.assertEqual(2, bdiff_io.snv_count)
        self.assertEqual(3, bdiff_io.indel_count)

        # left intersect range
        bdiff_io = BdiffIO(
            bdiff.file({
                BdiffIO.FROM_INDEX: 0,
                BdiffIO.TO_INDEX: 1020
            }, False))
        self.assertDictEqual({
            BdiffIO.FROM_INDEX: 0,
            BdiffIO.TO_INDEX: 1020
        }, bdiff_io.header)
        self.assertEqual(1010, bdiff_io.first_index)
        self.assertEqual(1020, bdiff_io.last_index)
        self.assertEqual(2, bdiff_io.snv_count)
        self.assertEqual(0, bdiff_io.indel_count)

        bdiff_io = BdiffIO(
            bdiff.file({
                BdiffIO.FROM_INDEX: 0,
                BdiffIO.TO_INDEX: 1010
            }, False))
        self.assertDictEqual({
            BdiffIO.FROM_INDEX: 0,
            BdiffIO.TO_INDEX: 1010
        }, bdiff_io.header)
        self.assertEqual(1010, bdiff_io.first_index)
        self.assertEqual(1010, bdiff_io.last_index)
        self.assertEqual(1, bdiff_io.snv_count)
        self.assertEqual(0, bdiff_io.indel_count)

        # right intersect range
        bdiff_io = BdiffIO(
            bdiff.file({
                BdiffIO.FROM_INDEX: 1060,
                BdiffIO.TO_INDEX: 2000
            }, False))
        self.assertDictEqual({
            BdiffIO.FROM_INDEX: 1060,
            BdiffIO.TO_INDEX: 2000
        }, bdiff_io.header)
        self.assertEqual(1060, bdiff_io.first_index)
        self.assertEqual(1070, bdiff_io.last_index)
        self.assertEqual(1, bdiff_io.snv_count)
        self.assertEqual(1, bdiff_io.indel_count)

        bdiff_io = BdiffIO(
            bdiff.file({
                BdiffIO.FROM_INDEX: 1070,
                BdiffIO.TO_INDEX: 2000
            }, False))
        self.assertDictEqual({
            BdiffIO.FROM_INDEX: 1070,
            BdiffIO.TO_INDEX: 2000
        }, bdiff_io.header)
        self.assertEqual(1070, bdiff_io.first_index)
        self.assertEqual(1070, bdiff_io.last_index)
        self.assertEqual(1, bdiff_io.snv_count)
        self.assertEqual(0, bdiff_io.indel_count)
Esempio n. 20
0
    def test_tell_range(self):
        bdiff = BdiffIO(self._bdiff_file)
        # invalid range
        self.assertRaises(AssertionError, lambda: bdiff.tell_range(2000, 1000))

        # outer range
        self.assertRaises(IndexError, lambda: bdiff.tell_range(0, 1000))
        self.assertRaises(IndexError, lambda: bdiff.tell_range(2000, 3000))
        self.assertRaises(IndexError, lambda: bdiff.tell_range(0, 1009))
        self.assertRaises(IndexError, lambda: bdiff.tell_range(1071, 2000))

        bdiff = BdiffIO(self._bdiff_file)

        # inner range
        self.assertTupleEqual(self._range2pos(1, 7),
                              bdiff.tell_range(1000, 2000))
        self.assertTupleEqual(self._range2pos(1, 7),
                              bdiff.tell_range(1010, 1070))
        self.assertTupleEqual(self._range2pos(1, 2),
                              bdiff.tell_range(1010, 1020))
        self.assertTupleEqual(self._range2pos(2, 3),
                              bdiff.tell_range(1020, 1030))
        self.assertTupleEqual(self._range2pos(3, 4),
                              bdiff.tell_range(1030, 1040))
        self.assertTupleEqual(self._range2pos(1, 1),
                              bdiff.tell_range(1010, 1019))
        self.assertTupleEqual(self._range2pos(1, 1),
                              bdiff.tell_range(1010, 1010))
        self.assertTupleEqual(self._range2pos(2, 6),
                              bdiff.tell_range(1011, 1069))

        # right outer range
        self.assertTupleEqual(self._range2pos(1, 7),
                              bdiff.tell_range(1010, 3000))
        self.assertTupleEqual(self._range2pos(2, 7),
                              bdiff.tell_range(1020, 3000))
        self.assertTupleEqual(self._range2pos(3, 7),
                              bdiff.tell_range(1030, 3000))

        # left outer range
        self.assertTupleEqual(self._range2pos(1, 1),
                              bdiff.tell_range(1000, 1010))
        self.assertTupleEqual(self._range2pos(1, 2),
                              bdiff.tell_range(1000, 1020))
        self.assertTupleEqual(self._range2pos(1, 3),
                              bdiff.tell_range(1000, 1030))
Esempio n. 21
0
    def test_tell_index_lte(self):
        bdiff = BdiffIO(self._bdiff_file)

        # one before first
        self.assertIsNone(bdiff.tell_index_lte(1008))
        # first
        self.assertEqual(self._record2pos(1), bdiff.tell_index_lte(1010))
        # one after first
        self.assertEqual(self._record2pos(1), bdiff.tell_index_lte(1011))
        # between first and second
        self.assertEqual(self._record2pos(1), bdiff.tell_index_lte(1015))
        # on before second
        self.assertEqual(self._record2pos(1), bdiff.tell_index_lte(1019))
        # second
        self.assertEqual(self._record2pos(2), bdiff.tell_index_lte(1020))
        # one after second
        self.assertEqual(self._record2pos(2), bdiff.tell_index_lte(1021))
        # one before third
        self.assertEqual(self._record2pos(2), bdiff.tell_index_lte(1029))
        # third
        self.assertEqual(self._record2pos(3), bdiff.tell_index_lte(1030))
        # one after third
        self.assertEqual(self._record2pos(3), bdiff.tell_index_lte(1031))
        # one before last
        self.assertEqual(self._record2pos(6), bdiff.tell_index_lte(1069))
        # last
        self.assertEqual(self._record2pos(7), bdiff.tell_index_lte(1070))
        # one after last
        self.assertEqual(self._record2pos(7), bdiff.tell_index_lte(1071))
Esempio n. 22
0
    def reencrypt(self,
                  rsa_key: RSA,
                  rsa_enc_key: RSA,
                  bam_filename: str,
                  enc_diff_filename: str,
                  out_enc_diff_filename: str,
                  start_ref_name: str = None,
                  start_ref_pos: int = None,
                  end_ref_name: str = None,
                  end_ref_pos: int = None,
                  include_unmapped: bool = False,
                  unmapped_only: bool = False,
                  rsa_ver_key: RSA = None):
        """
        Reencrypt DIFF file with supplied public_key.
        Output formats:
        .diff.enc
        :param rsa_key: private key to decrypt DIFF and sign new DIFF
        :param rsa_enc_key: public key to encrypt new DIFF
        :param bam_filename: mutated BAM
        :param enc_diff_filename: diff to reencrypt
        :param out_enc_diff_filename: reencrypted DIFF
        :param start_ref_name: inclusive
        :param start_ref_pos: 0-based, inclusive
        :param end_ref_name: inclusive
        :param end_ref_pos: 0-based, inclusive
        :param include_unmapped: Include all unplaced unmapped reads.
        :param unmapped_only: Only unmapped reads - both placed and unplaced.
         Overrides other parameters.
        :param rsa_ver_key: RSA key with public key to verify DIFF
        """
        # TODO verify if bam_filename is mutated
        # make sure that BAM is indexed
        pysam.index(bam_filename)
        with open(enc_diff_filename, 'rb') as enc_diff_file, \
                io.BytesIO() as diff_file:
            aes_key = self._read_aes_key(enc_diff_file, rsa_key)
            signature = self._read_signature(enc_diff_file)
            self._decrypt(enc_diff_file, aes_key, diff_file)
            self._verify(diff_file, signature, rsa_ver_key)
            bdiff = BdiffIO(diff_file)
            bam_mut = BamMutator(bam_filename)
            if bam_mut.checksum != bdiff.header.get(
                    BamMutator.BDIFF_CHECKSUM_TAG):
                # checksum mismatch
                raise ValueError(
                    "Provided BDIFF is not associated with this BAM."
                    " Reason: checksum mismatch.")

            from_index, to_index = bam_mut.resolve_range(
                bdiff.header[BdiffIO.FROM_INDEX],
                bdiff.header[BdiffIO.TO_INDEX], start_ref_name, start_ref_pos,
                end_ref_name, end_ref_pos)
            # use actual effective range
            bdiff.header[BdiffIO.FROM_INDEX] = from_index
            bdiff.header[BdiffIO.TO_INDEX] = to_index

            if (unmapped_only or include_unmapped
                ) and BamMutator.BDIFF_SECRET_TAG not in bdiff.header:
                raise ValueError(
                    'BDIFF must contain secret to decrypt unmapped reads.')

            if unmapped_only:
                del bdiff.header[BdiffIO.FROM_INDEX]
                del bdiff.header[BdiffIO.TO_INDEX]
                out_diff = bdiff.file(bdiff.header)
            elif include_unmapped:
                out_diff = bdiff.file(bdiff.header)
            else:
                # mapped only
                del bdiff.header[BamMutator.BDIFF_SECRET_TAG]
                out_diff = bdiff.file(bdiff.header)

            with out_diff, open(out_enc_diff_filename,
                                'wb') as out_enc_diff_file:
                out_signature = self._sign(out_diff, rsa_key)
                self._write_aes_key(out_enc_diff_file, aes_key, rsa_enc_key)
                self._write_signature(out_enc_diff_file, out_signature)
                self._encrypt(out_diff, aes_key, out_enc_diff_file)
Esempio n. 23
0
    def _mask_snv_pos(self, bdiff_io: bdiff.BdiffIO,
                      allele_queue: List[AlleleAlignment],
                      variant: VariantOccurrence, rng: VeryRandom) -> bool:
        is_masked = False

        from_allele_a, from_allele_b = self._private_allele_pair(allele_queue)
        if from_allele_a is not None and from_allele_b is not None:
            # personal alleles are found

            to_allele_a, to_allele_b = self._public_allele_pair(
                variant.freqs, rng)
            mask_map_a, mask_map_b, zygosity = self._create_masking(
                from_allele_a, from_allele_b, to_allele_a, to_allele_b)
            #
            beta_indices = []

            if zygosity == ZygosityChange.HOMO_TO_HETERO:
                for i in range(len(allele_queue)):
                    aligned_allele = allele_queue[i]  # type: AlleleAlignment
                    # use random masking map from the pair
                    if rng.random() < 0.5:
                        is_masked |= self._mutate_allele(
                            aligned_allele, mask_map_a)
                    else:
                        is_masked |= self._mutate_allele(
                            aligned_allele, mask_map_b)
                        beta_indices.append(i)

            elif zygosity == ZygosityChange.HETERO_TO_HOMO:
                for i in range(len(allele_queue)):
                    aligned_allele = allele_queue[i]  # type: AlleleAlignment
                    if aligned_allele.allele == from_allele_b:
                        # secondary allele
                        is_masked |= self._mutate_allele(
                            aligned_allele, mask_map_b)
                        beta_indices.append(i)
                    else:
                        is_masked |= self._mutate_allele(
                            aligned_allele, mask_map_a)

            else:
                # zygosity is preserved
                for i in range(len(allele_queue)):
                    aligned_allele = allele_queue[i]  # type: AlleleAlignment
                    is_masked |= self._mutate_allele(aligned_allele,
                                                     mask_map_a)

            if is_masked:
                # at least one alignment has been masked
                bdiff_io.write_snv(index=variant.pos.index,
                                   ref_id=cmn.BASES.index(variant.ref_allele),
                                   zygosity=zygosity,
                                   perm_a=[
                                       mask_map_a['A'], mask_map_a['T'],
                                       mask_map_a['G'], mask_map_a['C']
                                   ],
                                   perm_b=[
                                       mask_map_b['A'], mask_map_b['T'],
                                       mask_map_b['G'], mask_map_b['C']
                                   ],
                                   beta_indices=beta_indices)

        return is_masked
Esempio n. 24
0
    def unmutate(self,
                 bdiff_file: io.BytesIO,
                 out_bam_filename: str,
                 start_ref_name: str = None,
                 start_ref_pos: int = None,
                 end_ref_name: str = None,
                 end_ref_pos: int = None,
                 include_unmapped: bool = False,
                 unmapped_only: bool = False):
        """
        Unmutate BAM file in range specified by DIFF file or by parameters.
        :param rng:
        :param bdiff_file:
        :param out_bam_filename:
        :param start_ref_name: inclusive
        :param start_ref_pos: 0-based, inclusive
        :param end_ref_name: inclusive
        :param end_ref_pos: 0-based, inclusive
        :param include_unmapped: Include all unplaced unmapped reads.
        :param unmapped_only: Only unmapped reads - both placed and unplaced.
         Overrides other parameters.

        When range is supplied partialy covered reads are also included,
        but only variants within range are unmutated.
        """
        self._stats = {}

        with bam.open_bam(self._bam_filename,
                          'rb') as bam_file:  # type: pysam.AlignmentFile
            header = bam.unmut_header(bam_file.header)
            mut = Mutator(fai=self._fai, verbose=self._verbose)

            with bam.open_bam(out_bam_filename, 'wb',
                              header=header) as out_bam_file:
                bdiff_io = BdiffIO(bdiff_file)
                if self._verbose:
                    print('SNV diff count %d' % bdiff_io.snv_count)
                    print('INDEL diff count %d' % bdiff_io.indel_count)

                secret = self.extract_secret_bytes(bdiff_io)
                if (include_unmapped or unmapped_only) and secret is None:
                    raise ValueError(
                        'BDIFF must contain secret to decrypt unmapped reads.')

                # validate checksum
                if self.checksum != bdiff_io.header[self.BDIFF_CHECKSUM_TAG]:
                    print(self.checksum)
                    print(bdiff_io.header[self.BDIFF_CHECKSUM_TAG])

                    raise ValueError('BDIFF does not refer to this BAM')

                # TODO user friendly exception on missing bdiff_io header value
                start_index, end_index = self.resolve_range(
                    bdiff_from_index=bdiff_io.header[BdiffIO.FROM_INDEX],
                    bdiff_to_index=bdiff_io.header[BdiffIO.TO_INDEX],
                    start_ref_name=start_ref_name,
                    start_ref_pos=start_ref_pos,
                    end_ref_name=end_ref_name,
                    end_ref_pos=end_ref_pos)

                # TODO move iterators to with statement
                mut.unmutate(
                    bam_iter=iters.bam_iterator(self._bam_filename,
                                                start_index, end_index,
                                                unmapped_only,
                                                include_unmapped),
                    bdiff_iter=iters.BdiffIterator(bdiff_io=bdiff_io,
                                                   fai=self._fai,
                                                   start_index=start_index,
                                                   end_index=end_index),
                    out_bam_file=out_bam_file,
                    secret=secret)

            self._stats = {
                self.STAT_ALIGNMENT_COUNT: mut.alignment_counter,
                self.STAT_COVERING_COUNT: mut.covering_counter,
                self.STAT_MUT_COUNT: mut.mut_counter,
                self.STAT_DIFF_COUNT: mut.diff_counter,
                self.STAT_ALIGNMENT_MUT_COUNT: mut.alignment_mut_counter
            }
Esempio n. 25
0
    def test_file_index(self):
        write_io = BdiffIO(index_resolution=3)
        self._write_bdiff_record(write_io, 1000, 0, ['C', 'A', 'T', 'G'])
        self._write_bdiff_record(write_io, 1010, 0, ['C', 'A', 'T', 'G'])
        self._write_bdiff_record(write_io, 1020, 0, ['C', 'A', 'T', 'G'])
        self._write_bdiff_record(write_io, 1030, 0, ['C', 'A', 'T', 'G'])
        self._write_bdiff_record(write_io, 1040, 0, ['C', 'A', 'T', 'G'])
        self._write_bdiff_record(write_io, 1050, 0, ['C', 'A', 'T', 'G'])
        self._write_bdiff_record(write_io, 1060, 0, ['C', 'A', 'T', 'G'])
        self._write_bdiff_record(write_io, 1070, 0, ['C', 'A', 'T', 'G'])
        self._write_bdiff_record(write_io, 1080, 0, ['C', 'A', 'T', 'G'])
        self._write_bdiff_record(write_io, 1090, 0, ['C', 'A', 'T', 'G'])

        read_io = BdiffIO(write_io.file())

        self.assertEqual(1000, read_io.first_index)
        self.assertEqual(1090, read_io.last_index)
        self.assertEqual(10, read_io.snv_count)
        self.assertEqual(0, read_io.indel_count)
        self.assertEqual(True, read_io.is_read_mode)
        self.assertEqual(3, read_io.index_resolution)
        self.assertListEqual([(1020, 16), (1050, 40), (1080, 64)],
                             read_io.file_index)

        self.assertEqual(self._index2pos(read_io, 1000),
                         read_io._indexed_pos(0))
        self.assertEqual(self._index2pos(read_io, 1000),
                         read_io._indexed_pos(1000))
        self.assertEqual(self._index2pos(read_io, 1000),
                         read_io._indexed_pos(1019))
        self.assertEqual(self._index2pos(read_io, 1020),
                         read_io._indexed_pos(1020))
        self.assertEqual(self._index2pos(read_io, 1020),
                         read_io._indexed_pos(1021))
        self.assertEqual(self._index2pos(read_io, 1020),
                         read_io._indexed_pos(1049))
        self.assertEqual(self._index2pos(read_io, 1050),
                         read_io._indexed_pos(1050))
        self.assertEqual(self._index2pos(read_io, 1000),
                         read_io._indexed_pos(1010))