Beispiel #1
0
 def test_against_matlab_example(self):
     from multicov.alignment import Alignment
     from multicov.alphabet import protein_alphabet
     from multicov.statistics import Statistics, MaxentModel
     from scipy.io import loadmat
     import os.path
     matlab = loadmat(os.path.join('test_data', 'maxent_sample.mat'),
                      squeeze_me=True)
     align = Alignment(matlab['alignment']['data'][()], protein_alphabet)
     stats = Statistics(align, regularization_amount=0.5)
     maxent = MaxentModel(stats)
     energies = maxent.score(align)
     self.assertTrue(np.allclose(energies, matlab['energies'][()]))
Beispiel #2
0
 def test_with_matrix(self):
     from multicov.alignment import Alignment
     from multicov.binary import binary_index_map
     from multicov.alphabet import protein_alphabet
     from multicov.statistics import Statistics, MaxentModel
     align = Alignment([
         'WKHNAY', 'KHRCDA', 'LGVVGY', 'LIGDDH', 'CMPRYW', 'QWFWRA',
         'VTMPEG', 'LNYINM', 'WHV-EW', 'PIWGGF', 'PPCWVE', 'E-MWRG',
         'RFGKFT', 'CGRCGS', 'T-PMVW', 'LNCPYA'
     ], protein_alphabet)
     stats = Statistics(align, regularization_amount=0.1)
     maxent = MaxentModel(stats)
     energies = maxent.score(align.data)
     energies0 = maxent.score(align)
     self.assertTrue(np.allclose(energies, energies0))
Beispiel #3
0
 def test_multi_alpha_diagonalness_of_blockdiagonal_blocks(self):
     from multicov.alignment import Alignment
     from multicov.alphabet import protein_alphabet, dna_alphabet, rna_alphabet
     from multicov.align_io import load_fasta
     from multicov.binary import binary_index_map
     from multicov.statistics import Statistics, MaxentModel
     from os.path import join
     align1 = load_fasta(join('test_data', 'test_aln1.fasta'),
                         protein_alphabet,
                         invalid_letter_policy='gap')
     align2 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         dna_alphabet,
                         invalid_letter_policy='gap')
     align3 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         rna_alphabet,
                         invalid_letter_policy='gap')
     align = Alignment(align1)
     align.add(align2).add(align3)
     stats = Statistics(align, regularization_amount=0.5)
     maxent = MaxentModel(stats)
     bin_map = binary_index_map(stats)
     for crt_range in bin_map:
         crt_slice = slice(crt_range[0], crt_range[1])
         crt_block = maxent.couplings[crt_slice, crt_slice]
         self.assertLess(
             np.max(np.abs(crt_block - np.diag(np.diag(crt_block)))), 1e-10)
Beispiel #4
0
 def test_multi_alpha_shape_and_symmetry_of_couplings(self):
     from multicov.alignment import Alignment
     from multicov.alphabet import protein_alphabet, dna_alphabet, rna_alphabet
     from multicov.align_io import load_fasta
     from multicov.statistics import Statistics, MaxentModel
     from os.path import join
     align1 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         dna_alphabet,
                         invalid_letter_policy='gap')
     align2 = load_fasta(join('test_data', 'test_aln1.fasta'),
                         protein_alphabet,
                         invalid_letter_policy='gap')
     align3 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         rna_alphabet,
                         invalid_letter_policy='uppergap')
     align = Alignment(align1)
     align.add(align2).add(align3)
     stats = Statistics(align, regularization_amount=0.5)
     maxent = MaxentModel(stats)
     self.assertLess(np.max(np.abs(maxent.couplings - maxent.couplings.T)),
                     1e-10)
     self.assertSequenceEqual(
         np.shape(maxent.couplings), 2 * [
             4 * (align1.get_width() + align3.get_width()) +
             20 * align2.get_width()
         ])
Beispiel #5
0
 def test_gap_gauge(self):
     from multicov.alignment import Alignment
     from multicov.alphabet import protein_alphabet, dna_alphabet, rna_alphabet
     from multicov.align_io import load_fasta
     from multicov.statistics import Statistics, MaxentModel
     from os.path import join
     align1 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         dna_alphabet,
                         invalid_letter_policy='gap')
     align2 = load_fasta(join('test_data', 'test_aln1.fasta'),
                         protein_alphabet,
                         invalid_letter_policy='gap')
     align3 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         rna_alphabet,
                         invalid_letter_policy='uppergap')
     align = Alignment(align1)
     align.add(align2).add(align3)
     stats = Statistics(align, regularization_amount=0.5)
     maxent = MaxentModel(stats)
     energies = maxent.score([align.get_width() * '-'])
     self.assertLess(np.max(np.abs(energies)), 1e-10)
Beispiel #6
0
    def test_on_empty(self):
        from multicov.alignment import Alignment
        from multicov.statistics import Statistics, MaxentModel
        stats = Statistics(Alignment())
        maxent = MaxentModel(stats)
        self.assertTrue(hasattr(maxent, 'stats'))
        self.assertTrue(hasattr(maxent, 'alphabets'))
        self.assertTrue(hasattr(maxent, 'annotations'))
        self.assertTrue(hasattr(maxent, 'reference'))

        self.assertIs(maxent.stats, stats)
        self.assertIs(maxent.alphabets, stats.alphabets)
        self.assertIs(maxent.annotations, stats.annotations)
        self.assertIs(maxent.reference, stats.reference)
        self.assertEqual(np.size(maxent.couplings), 0)
Beispiel #7
0
 def test_against_matlab_example(self):
     from multicov.alignment import Alignment
     from multicov.alphabet import protein_alphabet
     from multicov.statistics import Statistics, MaxentModel
     from scipy.io import loadmat
     import os.path
     matlab = loadmat(os.path.join('test_data', 'maxent_sample.mat'),
                      squeeze_me=True)
     align = Alignment(matlab['alignment']['data'][()], protein_alphabet)
     stats = Statistics(align, regularization_amount=0.5)
     maxent = MaxentModel(stats)
     self.assertTrue(np.allclose(stats.cmat, matlab['dca']['cmat'][()]))
     self.assertTrue(
         np.allclose(
             maxent.couplings,
             matlab['params_nogap_nofc_nodiagtrick']['couplings'][()]))
Beispiel #8
0
 def test_on_protein(self):
     from multicov.alignment import Alignment
     from multicov.binary import binary_index_map
     from multicov.alphabet import protein_alphabet
     from multicov.statistics import Statistics, MaxentModel
     align = Alignment([
         'WKHNAY', 'KHRCDA', 'LGVVGY', 'LIGDDH', 'CMPRYW', 'QWFWRA',
         'VTMPEG', 'LNYINM', 'WHV-EW', 'PIWGGF', 'PPCWVE', 'E-MWRG',
         'RFGKFT', 'CGRCGS', 'T-PMVW', 'LNCPYA'
     ], protein_alphabet)
     stats = Statistics(align, regularization_amount=0.1)
     maxent = MaxentModel(stats)
     invC = -np.linalg.inv(stats.cmat)
     bin_map = binary_index_map(stats)
     for crt_range in bin_map:
         invC[crt_range[0]:crt_range[1], crt_range[0]:crt_range[1]] = 0
     self.assertTrue(
         np.allclose(invC,
                     maxent.couplings - np.diag(np.diag(maxent.couplings))))