Beispiel #1
0
    def test_get_descriptor_encoding(self):
        """ Testing get_descriptor_encoding function. """

        valid_desc = [
            'aa_composition', 'dipeptide_composition', \
            'moreaubroto_autocorrelation', 'moran_autocorrelation', 'geary_autocorrelation', \
            'ctd', 'comp', 'transition', 'distribution', 'conjoint_triad', \
            'seq_order_coupling_number', 'tripeptide_composition', 'seq_order_coupling_number', \
                'quasi_seq_order', 'pseudo_aa_composition'
        ]
        #1.)
        #run tests on all test datasets
        for dataset in range(0, len(self.all_config_files)):
            desc = descr.Descriptors(self.all_config_files[dataset])

            #test each descriptor is calculated when string of its name passed to func
            for val_desc in range(0, len(valid_desc)):

                encoded_desc = desc.get_descriptor_encoding(
                    valid_desc[val_desc])
                self.assertIsNotNone(
                    encoded_desc, 'Descriptor attribute should not be none.')
                self.assertFalse(encoded_desc.empty,
                                 'Descriptor attribute should not be empty.')
                self.assertTrue(
                    encoded_desc.shape[0], len(desc.protein_seqs),
                    '1st dimension of descriptor attribute should equal num of protein seqs.'
                )
Beispiel #2
0
    def test_ctd(self):
        """ Testing CTD descriptor attributes and methods. """
        #run tests on all test datasets
        for dataset in range(0, len(self.all_config_files)):
            desc = descr.Descriptors(self.all_config_files[dataset])

            #get descriptor values
            ctd = desc.get_ctd()
            #1.)
            self.assertFalse(ctd.empty,
                             'Descriptor dataframe should not be empty')
            self.assertEqual(
                ctd.shape, (self.num_seqs[dataset], 147),
                'Descriptor not of correct ({},147)'.format(
                    self.num_seqs[dataset]))
            self.assertIsInstance(ctd, pd.DataFrame)
            self.assertTrue(ctd.any().isnull().sum() == 0,
                            'Descriptor should not contain any null values.')

            #get descriptor values
            comp = desc.get_composition()
            #2.)
            self.assertFalse(comp.empty,
                             'Descriptor dataframe should not be empty')
            self.assertEqual(
                comp.shape, (self.num_seqs[dataset], 3),
                'Descriptor not of correct ({},3)'.format(
                    self.num_seqs[dataset]))
            self.assertIsInstance(comp, pd.DataFrame)
            self.assertTrue(comp.any().isnull().sum() == 0,
                            'Descriptor should not contain any null values.')

            #get descriptor values
            trans = desc.get_transition()
            #3.)
            self.assertFalse(trans.empty,
                             'Descriptor dataframe should not be empty')
            self.assertEqual(
                trans.shape, (self.num_seqs[dataset], 3),
                'Descriptor not of correct ({},3)'.format(
                    self.num_seqs[dataset]))
            self.assertIsInstance(trans, pd.DataFrame)
            self.assertTrue(trans.any().isnull().sum() == 0,
                            'Descriptor should not contain any null values.')

            #get descriptor values
            distr = desc.get_distribution()
            #4.)
            self.assertFalse(distr.empty,
                             'Descriptor dataframe should not be empty')
            self.assertEqual(
                distr.shape, (self.num_seqs[dataset], 15),
                'Descriptor not of correct ({},15)'.format(
                    self.num_seqs[dataset]))
            self.assertIsInstance(distr, pd.DataFrame)
            self.assertTrue(distr.any().isnull().sum() == 0,
                            'Descriptor should not contain any null values.')
Beispiel #3
0
 def test_all_descriptors_list(self):
     """ Testing function that returns various combinations of available descriptors 
         using built-in itertools library. """
     desc = descr.Descriptors(desc_config=self.all_config_files[0])
     #1.)
     self.assertEqual(len(desc.all_descriptors_list(desc_combo=1)), 15)
     self.assertEqual(len(desc.all_descriptors_list(desc_combo=2)), 105)
     self.assertEqual(len(desc.all_descriptors_list(desc_combo=3)), 455)
     #2.)
     self.assertIsInstance(desc.all_descriptors_list(desc_combo=1), list)
     self.assertIsInstance(desc.all_descriptors_list(desc_combo=2), list)
     self.assertIsInstance(desc.all_descriptors_list(desc_combo=3), list)
Beispiel #4
0
 def test_valid_descriptors(self):
     """ Testing function that returns the list of valid descriptors available in descriptors module. """
     desc = descr.Descriptors(desc_config=self.all_config_files[0])
     valid_desc = desc.valid_descriptors()
     #1.)
     self.assertEqual(len(valid_desc), 15)
     self.assertIsInstance(valid_desc, list)
     self.assertIn('seq_order_coupling_number', valid_desc)
     self.assertIn('geary_autocorrelation', valid_desc)
     self.assertIn('moreaubroto_autocorrelation', valid_desc)
     self.assertIn('distribution', valid_desc)
     self.assertIn('pseudo_aa_composition', valid_desc)
     self.assertIn('aa_composition', valid_desc)
     self.assertIn('tripeptide_composition', valid_desc)
     self.assertIn('dipeptide_composition', valid_desc)
Beispiel #5
0
    def test_amp_pseudo_AAC(self):
        """ Testing Amphiphilic Pseudo Amino Acid Composition descriptor attributes and methods. """
        #run tests on all test datasets
        for dataset in range(0, len(self.all_config_files)):
            desc = descr.Descriptors(self.all_config_files[dataset])

            #get descriptor values
            amp_pseudo_AAC = desc.get_amp_pseudo_aa_composition()
            #1.)
            self.assertFalse(amp_pseudo_AAC.empty)
            self.assertEqual(amp_pseudo_AAC.shape,
                             (self.num_seqs[dataset], 80),
                             'Descriptor not of correct shape (1,80)')
            self.assertIsInstance(amp_pseudo_AAC, pd.DataFrame)
            self.assertTrue(amp_pseudo_AAC.any().isnull().sum() == 0,
                            'Descriptor should not contain any null values.')
Beispiel #6
0
    def test_quasi_seq_order(self):
        """ Testing Quasi sequence order descriptor attributes and methods. """
        #run tests on all test datasets
        for dataset in range(0, len(self.all_config_files)):
            desc = descr.Descriptors(self.all_config_files[dataset])

            #get descriptor values
            quasi_seq_order = desc.get_quasi_seq_order()
            #1.)
            self.assertFalse(quasi_seq_order.empty,
                             'Descriptor dataframe should not be empty')
            self.assertEqual(quasi_seq_order.shape,
                             (self.num_seqs[dataset], 30),
                             'Descriptor not of correct shape (1,100)')
            self.assertIsInstance(quasi_seq_order, pd.DataFrame,
                                  'Descriptor not of type DataFrame')
            self.assertTrue(quasi_seq_order.any().isnull().sum() == 0,
                            'Descriptor should not contain any null values.')
Beispiel #7
0
    def test_conjoint_triad(self):
        """ Testing Conjoint Triad descriptor attributes and methods. """
        #run tests on all test datasets
        for dataset in range(0, len(self.all_config_files)):
            desc = descr.Descriptors(self.all_config_files[dataset])

            #get descriptor values
            conjoint_triad = desc.get_conjoint_triad()
            #1.)
            self.assertFalse(conjoint_triad.empty,
                             'Descriptor dataframe should not be empty')
            self.assertEqual(conjoint_triad.shape,
                             (self.num_seqs[dataset], 343),
                             'Descriptor not of correct shape (1,343)')
            self.assertIsInstance(conjoint_triad, pd.DataFrame,
                                  'Descriptor not of type DataFrame')
            self.assertTrue(conjoint_triad.any().isnull().sum() == 0,
                            'Descriptor should not contain any null values.')
Beispiel #8
0
    def test_geary_autocorrelation(self):
        """ Testing Geary autocorrelation descriptor attributes and methods. """
        #run tests on all test datasets
        for dataset in range(0, len(self.all_config_files)):
            desc = descr.Descriptors(self.all_config_files[dataset])

            #get descriptor values
            geary = desc.get_geary_autocorrelation()
            #1.)
            self.assertFalse(geary.empty,
                             'Descriptor dataframe should not be empty')
            self.assertEqual(
                geary.shape, (self.num_seqs[dataset], 240),
                'Descriptor not of correct ({},240)'.format(
                    self.num_seqs[dataset]))
            self.assertIsInstance(geary, pd.DataFrame)
            self.assertTrue(geary.any().isnull().sum() == 0,
                            'Descriptor should not contain any null values.')
Beispiel #9
0
    def test_tripeptide_composition(self):
        """ Testing Tripeptide Composition protein descriptor attributes and methods. """
        #run tests on all test datasets
        for dataset in range(0, len(self.all_config_files)):
            desc = descr.Descriptors(self.all_config_files[dataset])

            #get descriptor values
            tripeptide_comp = desc.get_tripeptide_composition()
            #1.)
            self.assertFalse(tripeptide_comp.empty,
                             'Descriptor dataframe should not be empty')
            self.assertEqual(
                tripeptide_comp.shape, (self.num_seqs[dataset], 8000),
                'Descriptor not of correct ({},8000)'.format(
                    self.num_seqs[dataset]))
            self.assertTrue(
                desc.tripeptide_composition.equals(tripeptide_comp),
                'Output dataframe and class attribute dataframes must be the same.'
            )
            self.assertIsInstance(tripeptide_comp, pd.DataFrame,
                                  'Descriptor not of type DataFrame.')
            self.assertTrue(tripeptide_comp.any().isnull().sum() == 0,
                            'Descriptor should not contain any null values.')
Beispiel #10
0
    def test_aa_composition(self):
        """ Testing Amino Acid Composition protein descriptor attributes and methods. """
        #run tests on all test datasets
        for dataset in range(0, len(self.all_config_files)):
            desc = descr.Descriptors(self.all_config_files[dataset])

            #get descriptor values
            aa_comp = desc.get_aa_composition()
            #1.)
            self.assertFalse(aa_comp.empty,
                             'Descriptor dataframe should not be empty')
            self.assertTrue(
                desc.aa_composition.equals(aa_comp),
                'Output dataframe and class attribute dataframes must be the same.'
            )
            self.assertEqual(aa_comp.shape, (self.num_seqs[dataset], 20),
                             'Descriptor not of correct shape.')
            self.assertIsInstance(aa_comp, pd.DataFrame,
                                  'Descriptor not of type DataFrame.')
            self.assertEqual(descriptor_comp.aminoAcids, list(aa_comp.columns),
                             'Incorrect column values found.')
            self.assertTrue(aa_comp.any().isnull().sum() == 0,
                            'Descriptor should not contain any null values.')
Beispiel #11
0
    def test_descriptor(self):
        """ Test descriptor initialisation process. Verify the initial input parameters
            and descriptor attributes are correct. """
        #testing on all 4 datasets/config files
        for dataset in range(0, len(self.all_config_files)):
            desc = descr.Descriptors(
                desc_config=self.all_config_files[dataset])
            #1.)
            #verify num_seqs descriptors attribute is correct
            self.assertEqual(
                desc.num_seqs, self.num_seqs[dataset],
                'num_seqs attribute not equal to the number of sequences found.'
            )

            #verify that all input sequences dont have any gaps/missing amino acids
            for seq in desc.protein_seqs:
                self.assertNotIn(
                    '-', seq, 'There should be no gaps (-) in the sequences.')
#2.)
#verify all descriptor attributes are initialised to empty dataframes
            self.assertTrue(
                desc.aa_composition.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.dipeptide_composition.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.tripeptide_composition.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.moreaubroto_autocorrelation.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.moran_autocorrelation.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.geary_autocorrelation.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.ctd.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.comp.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.transition.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.distribution.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.conjoint_triad.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.seq_order_coupling_number.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.quasi_seq_order.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.pseudo_aa_composition.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.amp_pseudo_aa_composition.empty,
                'Attribute should be initialised to an empty dataframe')
            self.assertTrue(
                desc.all_descriptors.empty,
                'Attribute should be initialised to an empty dataframe')
            #3.)
            #test Value and Type error exceptions are thrown if invalid parameters input
            with self.assertRaises(
                    TypeError,
                    msg='Type Error raised, incorrect datatype input to class.'
            ):
                fail_desc = descr.Descriptors(desc_config=123)

            with self.assertRaises(
                    TypeError,
                    msg='Type Error raised, incorrect datatype input to class.'
            ):
                fail_desc = descr.Descriptors(desc_config=None)

            with self.assertRaises(
                    OSError,
                    msg='OS Error raised, filepath to config file not found.'):
                fail_desc = descr.Descriptors(
                    desc_config="incorrect_filepath.json")

            with self.assertRaises(
                    OSError,
                    msg='OS Error raised, filepath to config file not found.'):
                fail_desc = descr.Descriptors(desc_config="")
Beispiel #12
0
 def test_descriptor_groups(self):
     """ Testing the descriptor groups dictionary which stores the specific group
     that a descriptor attribute is a member of. """
     #testing on all 4 datasets
     for dataset in range(0, len(self.all_config_files)):
         desc = descr.Descriptors(
             desc_config=self.all_config_files[dataset])
         #1.)
         self.assertEqual(list(desc.descriptor_groups.keys()),
                          desc.all_descriptors_list())
         self.assertEqual(
             list(desc.descriptor_groups.values()).count("Composition"), 3)
         self.assertEqual(
             list(desc.descriptor_groups.values()).count("Autocorrelation"),
             3)
         self.assertEqual(
             list(desc.descriptor_groups.values()).count("Conjoint Triad"),
             1)
         self.assertEqual(
             list(desc.descriptor_groups.values()).count(
                 "Quasi-Sequence-Order"), 2)
         self.assertEqual(
             list(desc.descriptor_groups.values()).count("CTD"), 4)
         self.assertEqual(
             list(desc.descriptor_groups.values()).count(
                 "Pseudo Composition"), 2)
         self.assertEqual(
             list(desc.descriptor_groups.values()).count("Conjoint Triad"),
             1)
         self.assertEqual(len(desc.descriptor_groups.keys()),
                          len(desc.all_descriptors_list()))
         #2.)
         self.assertEqual(desc.descriptor_groups['aa_composition'],
                          "Composition")
         self.assertEqual(desc.descriptor_groups['dipeptide_composition'],
                          "Composition")
         self.assertEqual(desc.descriptor_groups['tripeptide_composition'],
                          "Composition")
         self.assertEqual(desc.descriptor_groups['moran_autocorrelation'],
                          "Autocorrelation")
         self.assertEqual(desc.descriptor_groups['geary_autocorrelation'],
                          "Autocorrelation")
         self.assertEqual(
             desc.descriptor_groups['moreaubroto_autocorrelation'],
             "Autocorrelation")
         self.assertEqual(desc.descriptor_groups['ctd'], "CTD")
         self.assertEqual(desc.descriptor_groups['comp'], "CTD")
         self.assertEqual(desc.descriptor_groups['distribution'], "CTD")
         self.assertEqual(desc.descriptor_groups['transition'], "CTD")
         self.assertEqual(desc.descriptor_groups['conjoint_triad'],
                          "Conjoint Triad")
         self.assertEqual(desc.descriptor_groups['pseudo_aa_composition'],
                          "Pseudo Composition")
         self.assertEqual(desc.descriptor_groups['quasi_seq_order'],
                          "Quasi-Sequence-Order")
         self.assertEqual(
             desc.descriptor_groups['seq_order_coupling_number'],
             "Quasi-Sequence-Order")
         self.assertEqual(
             desc.descriptor_groups['amp_pseudo_aa_composition'],
             "Pseudo Composition")
         #3.)
         self.assertIsInstance(desc.descriptor_groups, dict)