Example #1
0
    def test_five_signatures_perturbed(self):
        weights = np.zeros((30, ))
        weights[3] = .4
        weights[7] = .2
        weights[13] = .2
        weights[17] = .1
        weights[23] = .1
        tumor_profile, context_counts = generate_tumor_profile(weights)

        # Perturb the counts by up to 6% more or less of value
        for k, v in context_counts.items():
            context_counts[k] = v * (1 + (np.random.rand() * 0.12) - 0.06)

        ds = DeconstructSigs(context_counts=context_counts)

        # Limit to two deconstructed signatures
        reconstructed_weights = ds.which_signatures(5)
        self.assertAlmostEqual(reconstructed_weights[3], .4, places=1)
        self.assertAlmostEqual(reconstructed_weights[7], .2, places=1)
        self.assertAlmostEqual(reconstructed_weights[13], .2, places=1)
        self.assertAlmostEqual(reconstructed_weights[17], .1, places=1)
        self.assertAlmostEqual(reconstructed_weights[23], .1, places=1)

        # Don't impose a signature limit to test that algorithm correctly deduces only five are necessary
        reconstructed_weights = ds.which_signatures()
        self.assertAlmostEqual(reconstructed_weights[3], .4, places=1)
        self.assertAlmostEqual(reconstructed_weights[7], .2, places=1)
        self.assertAlmostEqual(reconstructed_weights[13], .2, places=1)
        self.assertAlmostEqual(reconstructed_weights[17], .1, places=1)
        self.assertAlmostEqual(reconstructed_weights[23], .1, places=1)
Example #2
0
    def test_five_signatures_discard_insignificant(self):
        weights = np.zeros((30, ))
        weights[3] = .19
        weights[7] = .41
        weights[13] = .24
        weights[17] = .14
        weights[23] = .02
        tumor_profile, context_counts = generate_tumor_profile(weights)
        ds = DeconstructSigs(context_counts=context_counts)

        # Limit to five deconstructed signatures
        reconstructed_weights = ds.which_signatures(5)
        self.assertAlmostEqual(reconstructed_weights[3], .19, places=2)
        self.assertAlmostEqual(reconstructed_weights[7], .41, places=2)
        self.assertAlmostEqual(reconstructed_weights[13], .24, places=2)
        self.assertAlmostEqual(reconstructed_weights[17], .14, places=2)
        self.assertAlmostEqual(reconstructed_weights[23], 0, places=2)

        # Don't impose a signature limit to test that algorithm correctly deduces only five necessary and significant
        reconstructed_weights = ds.which_signatures()
        self.assertAlmostEqual(reconstructed_weights[3], .19, places=2)
        self.assertAlmostEqual(reconstructed_weights[7], .41, places=2)
        self.assertAlmostEqual(reconstructed_weights[13], .24, places=2)
        self.assertAlmostEqual(reconstructed_weights[17], .14, places=2)
        self.assertAlmostEqual(reconstructed_weights[23], 0, places=2)
Example #3
0
    def test_one_signature(self):
        weights = np.zeros((30, ))
        weights[0] = 1
        tumor_profile, context_counts = generate_tumor_profile(weights)
        ds = DeconstructSigs(context_counts=context_counts)

        # Limit to one deconstructed signatures
        reconstructed_weights = ds.which_signatures(1)
        self.assertAlmostEqual(reconstructed_weights[0], weights[0], places=2)

        # Don't impose a signature limit to test that algorithm correctly deduces only one are necessary
        reconstructed_weights = ds.which_signatures()
        self.assertAlmostEqual(reconstructed_weights[0], weights[0], places=2)
Example #4
0
    def test_three_signatures_with_associated(self):
        weights = np.zeros((30, ))
        weights[3] = .6
        weights[11] = .25
        weights[27] = .15
        tumor_profile, context_counts = generate_tumor_profile(weights)
        ds = DeconstructSigs(context_counts=context_counts)

        # Limit to two deconstructed signatures
        reconstructed_weights = ds.which_signatures(3, associated=[11, 27])
        self.assertAlmostEqual(reconstructed_weights[3], 0, places=2)

        # Don't impose a signature limit to test that algorithm correctly deduces only three are necessary
        reconstructed_weights = ds.which_signatures(associated=[11, 27])
        self.assertAlmostEqual(reconstructed_weights[3], 0, places=2)
Example #5
0
    def test_four_signatures(self):
        weights = np.zeros((30, ))
        weights[6] = .43
        weights[10] = .27
        weights[25] = .20
        weights[29] = .1
        tumor_profile, context_counts = generate_tumor_profile(weights)
        ds = DeconstructSigs(context_counts=context_counts)

        # Limit to four deconstructed signatures
        reconstructed_weights = ds.which_signatures(4)
        self.assertAlmostEqual(reconstructed_weights[6], .43, places=2)
        self.assertAlmostEqual(reconstructed_weights[10], .27, places=2)
        self.assertAlmostEqual(reconstructed_weights[25], .20, places=2)
        self.assertAlmostEqual(reconstructed_weights[29], .1, places=2)

        # Don't impose a signature limit to test that algorithm correctly deduces only fiour are necessary
        reconstructed_weights = ds.which_signatures()
        self.assertAlmostEqual(reconstructed_weights[6], .43, places=2)
        self.assertAlmostEqual(reconstructed_weights[10], .27, places=2)
        self.assertAlmostEqual(reconstructed_weights[25], .20, places=2)
        self.assertAlmostEqual(reconstructed_weights[29], .1, places=2)
Example #6
0
    def test_trinucleotide_standardization(self):
        weights = np.zeros((30, ))
        weights[0] = 1
        tumor_profile, context_counts = generate_tumor_profile(weights)
        ds = DeconstructSigs(context_counts=context_counts)

        pair = {'A': 'T', 'C': 'G', 'T': 'A', 'G': 'C'}

        # Standard form means that a pyrimidine is at the center position of the trinucleotide string
        already_standard_form = [
            'ACA',
            'ACC',
            'ACG',
            'ACT',
            'CCA',
            'CCC',
            'CCG',
            'CCT',
            'GCA',
            'GCC',
            'GCG',
            'GCT',
            'TCA',
            'TCC',
            'TCG',
            'TCT',
            'ATA',
            'ATC',
            'ATG',
            'ATT',
            'CTA',
            'CTC',
            'CTG',
            'CTT',
            'GTA',
            'GTC',
            'GTG',
            'GTT',
            'TTA',
            'TTC',
            'TTG',
            'TTT',
        ]

        for trinuc in already_standard_form:
            standardized_trinuc = ds._DeconstructSigs__standardize_trinuc(
                trinuc)
            self.assertEqual(trinuc, standardized_trinuc)

        # Non-standard form means that a purine is a the center position of the trinucleotide string
        non_standard_form = [
            '{}{}{}'.format(pair[trinuc[2]], pair[trinuc[1]], pair[trinuc[0]])
            for trinuc in already_standard_form
        ]

        for i, trinuc in enumerate(non_standard_form):
            standardized_trinuc = ds._DeconstructSigs__standardize_trinuc(
                trinuc)
            self.assertEqual(already_standard_form[i], standardized_trinuc)

        # Test that lower case trinucleotide strings get uppercased as well
        for trinuc in already_standard_form:
            standardized_trinuc = ds._DeconstructSigs__standardize_trinuc(
                trinuc.lower())
            self.assertEqual(trinuc, standardized_trinuc)
        for i, trinuc in enumerate(non_standard_form):
            standardized_trinuc = ds._DeconstructSigs__standardize_trinuc(
                trinuc.lower())
            self.assertEqual(already_standard_form[i], standardized_trinuc)
Example #7
0
    def test_substitution_standardization(self):
        weights = np.zeros((30, ))
        weights[0] = 1
        tumor_profile, context_counts = generate_tumor_profile(weights)
        ds = DeconstructSigs(context_counts=context_counts)

        # Standard form means that the ref is a pyrimidine
        standardized = ds._DeconstructSigs__standardize_subs('G', 'C')
        self.assertEqual(standardized, 'C>G')

        standardized = ds._DeconstructSigs__standardize_subs('G', 'A')
        self.assertEqual(standardized, 'C>T')

        standardized = ds._DeconstructSigs__standardize_subs('G', 'T')
        self.assertEqual(standardized, 'C>A')

        standardized = ds._DeconstructSigs__standardize_subs('A', 'C')
        self.assertEqual(standardized, 'T>G')

        standardized = ds._DeconstructSigs__standardize_subs('A', 'G')
        self.assertEqual(standardized, 'T>C')

        standardized = ds._DeconstructSigs__standardize_subs('A', 'T')
        self.assertEqual(standardized, 'T>A')

        standardized = ds._DeconstructSigs__standardize_subs('C', 'A')
        self.assertEqual(standardized, 'C>A')

        standardized = ds._DeconstructSigs__standardize_subs('C', 'G')
        self.assertEqual(standardized, 'C>G')

        standardized = ds._DeconstructSigs__standardize_subs('C', 'T')
        self.assertEqual(standardized, 'C>T')

        standardized = ds._DeconstructSigs__standardize_subs('T', 'A')
        self.assertEqual(standardized, 'T>A')

        standardized = ds._DeconstructSigs__standardize_subs('T', 'C')
        self.assertEqual(standardized, 'T>C')

        standardized = ds._DeconstructSigs__standardize_subs('T', 'G')
        self.assertEqual(standardized, 'T>G')