Esempio n. 1
0
def get_cmpd_information(molec):
    """Get information from CHEBI Database of a compound from CHEBI ID.

    Online using libChEBIpy (https://github.com/libChEBI/libChEBIpy)

    """
    if molec.chebiID is None and molec.iupac_name is not None:
        # try one more time for chebi ID
        chebiID = get_chebiID(mol_name=molec.name, iupac_name=molec.iupac_name)
        if chebiID is None:
            print('cannot get structure from chebi')
            return None
        molec.chebiID = [chebiID]
    # at this point, molec.chebiID will be a list - iterarte over it
    # the iteration stops if any CHEBI ID produces a structure
    for CID in molec.chebiID:
        if CID == '' or ' ' in CID or 'null' in CID:
            print(CID, '- not a real CHEBI ID')
            continue
        # get entity with chebiID
        entity = ChebiEntity(CID)
        # check for parent ID
        entity, CID = convert_entity_to_parent(entity, ID=CID, CID=CID)
        # attemp to get structure
        # SMILES
        smile = entity.get_smiles()
        print('libchebipy result:', smile)
        if smile is not None:
            rdkitmol = Chem.MolFromSmiles(smile)
            if rdkitmol is None:
                print('structure could not be deciphered')
                molec.SMILES = smile
                molec.mol = None
                continue
            else:
                rdkitmol.Compute2DCoords()
                molec.SMILES = smile
                # remove molecules with generalised atoms
                if '*' in smile:
                    molec.mol = None
                else:
                    molec.mol = rdkitmol
        elif smile is None:
            print('molecule does not have recorded structure in CHEBI DB')
            print('probably a generic structure - skipping.')
            molec.SMILES = smile
            molec.mol = None
            continue

        # set passed = True if this chebi ID produced a structure
        # would not get up to this point if it didnt
        # if not CIDs pass then the chebiIDs remain a list and will
        # fail the
        # next step
        passed = True
        # set molecule properties
        if passed:
            molec.chebiID = CID
            molec.DB_ID = CID
            # save InChiKey
            iKEY = entity.get_inchi_key()
            if iKEY is not None:
                molec.InChiKey = iKEY
            # save inchi
            inchi = entity.get_inchi()
            if inchi is not None:
                molec.InChi = inchi
            # set name if name is only a code at this point
            try:
                if molec.change_name is True:
                    molec.name = entity.get_name()
                    molec.change_name = False
            except AttributeError:
                molec.change_name = False
            return None
Esempio n. 2
0
class TestChebiEntity(unittest.TestCase):
    '''COMMENT'''

    def setUp(self):
        '''COMMENT'''
        self.__existing = ChebiEntity('4167')
        self.__secondary = ChebiEntity('CHEBI:5585')

    def test_get_non_existing(self):
        '''COMMENT'''
        self.assertRaises(ChebiException, ChebiEntity, '-1')

    def test_get_id_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_id() == 'CHEBI:4167')

    def test_get_id_secondary(self):
        '''COMMENT'''
        self.assertTrue(self.__secondary.get_id() == 'CHEBI:5585')

    def test_get_formulae_existing(self):
        '''COMMENT'''
        this_formula = Formula('C6H12O6', 'KEGG COMPOUND')
        self.assertTrue(this_formula in self.__existing.get_formulae())

    def test_get_formulae_secondary(self):
        '''COMMENT'''
        this_formula = Formula('H2O', 'ChEBI')
        self.assertTrue(this_formula in self.__secondary.get_formulae())

    def test_get_formula_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_formula() == 'C6H12O6')

    def test_get_formula_secondary(self):
        '''COMMENT'''
        self.assertTrue(self.__secondary.get_formula() == 'H2O')

    def test_get_mass_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_mass(), 180.15588)

    def test_get_mass_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_mass(), 18.01530)

    def test_get_charge_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_charge(), 0)

    def test_get_charge_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_charge(), 0)

    def test_get_charge_secondary2(self):
        '''COMMENT'''
        self.assertEquals(-2, ChebiEntity('43474').get_charge())

    def test_get_comments_existing(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('29044')
        this_comment = Comment('General', 'General',
                               'The substituent name \'3-oxoprop-2-enyl\' is '
                               'incorrect but is used in various databases.',
                               datetime.datetime.strptime('2005-03-18',
                                                          '%Y-%M-%d'))
        self.assertTrue(this_comment in this_chebi_entity.get_comments())

    def test_get_comments_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('11505')
        this_comment = Comment('General', 'General',
                               'The substituent name \'3-oxoprop-2-enyl\' is '
                               'incorrect but is used in various databases.',
                               datetime.datetime.strptime('2005-03-18',
                                                          '%Y-%M-%d'))
        self.assertTrue(this_comment in this_chebi_entity.get_comments())

    def test_get_source_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_source(), 'KEGG COMPOUND')

    def test_get_source_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_source(), 'KEGG COMPOUND')

    def test_get_prnt_id_existing(self):
        '''COMMENT'''
        self.assertIsNone(self.__existing.get_parent_id())

    def test_get_prnt_id_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_parent_id(), 'CHEBI:15377')

    def test_get_name_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_name(), 'D-glucopyranose')

    def test_get_name_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_name(), 'water')

    def test_get_definition_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_definition(),
                         'A glucopyranose having D-configuration.')

    def test_get_definition_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('41140')
        self.assertEqual(this_chebi_entity.get_definition(),
                         'D-Glucopyranose with beta configuration at the '
                         'anomeric centre.')

    def test_get_mod_on_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_modified_on() >
                        datetime.datetime.strptime('2014-01-01',
                                                   '%Y-%M-%d'))

    def test_get_mod_on_secondary(self):
        '''COMMENT'''
        self.assertIsNotNone(self.__secondary.get_modified_on())

    def test_get_created_by_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_created_by(), 'CHEBI')

    def test_get_created_by_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_created_by(), 'ops$mennis')

    def test_get_star_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_star(), 3)

    def test_get_star_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_star(), 3)

    def test_get_db_acc_existing(self):
        '''COMMENT'''
        dat_acc = DatabaseAccession('MetaCyc accession', 'D-Glucose',
                                    'MetaCyc')
        self.assertTrue(dat_acc in self.__existing.get_database_accessions())

    def test_get_db_acc_secondary(self):
        '''COMMENT'''
        dat_acc = DatabaseAccession('MetaCyc accession', 'WATER', 'MetaCyc')
        self.assertTrue(dat_acc in self.__secondary.get_database_accessions())

    def test_get_inchi_existing(self):
        '''COMMENT'''
        inchi = 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/' + \
            'h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1'
        self.assertEqual(self.__existing.get_inchi(), inchi)

    def test_get_inchi_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_inchi(), 'InChI=1S/H2O/h1H2')

    def test_get_inchi_key_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_inchi_key(),
                         'WQZGKKKJIJFFOK-GASJEMHNSA-N')

    def test_get_inchi_key_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_inchi_key(),
                         'XLYOFNOQVPJJNP-UHFFFAOYSA-N')

    def test_get_smiles_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_smiles(),
                         'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O')

    def test_get_smiles_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_smiles(), '[H]O[H]')

    def test_get_mol_existing(self):
        '''COMMENT'''
        chebi_id = 73938
        this_chebi_entity = ChebiEntity(str(chebi_id))
        self.assertEqual(this_chebi_entity.get_mol(),
                         _read_mol_file(chebi_id))

    def test_get_mol_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_mol(), _read_mol_file(15377))

    def test_get_mol_file_existing(self):
        '''COMMENT'''
        chebi_id = 73938
        self.__get_mol_file(chebi_id, chebi_id)

    def test_get_mol_file_secondary(self):
        '''COMMENT'''
        read_id = 15377
        retrieved_id = 42857
        self.__get_mol_file(read_id, retrieved_id)

    def test_get_names_existing(self):
        '''COMMENT'''
        this_name = Name('Grape sugar', 'SYNONYM', 'KEGG COMPOUND', False,
                         'en')
        self.assertTrue(this_name in self.__existing.get_names())

    def test_get_names_secondary(self):
        '''COMMENT'''
        this_name = Name('eau', 'SYNONYM', 'ChEBI', False, 'fr')
        self.assertTrue(this_name in self.__secondary.get_names())

    def test_get_references_existing(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('15347')
        this_reference = Reference('WO2006008754', 'Patent', '',
                                   'NOVEL INTERMEDIATES FOR LINEZOLID '
                                   'AND RELATED COMPOUNDS')

        self.assertTrue(this_reference in this_chebi_entity.get_references())

    def test_get_references_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('22182')
        this_reference = Reference('WO2006008754', 'Patent', '',
                                   'NOVEL INTERMEDIATES FOR LINEZOLID '
                                   'AND RELATED COMPOUNDS')
        self.assertTrue(this_reference in this_chebi_entity.get_references())

    def test_get_cmp_orig_existing(self):
        '''COMMENT'''
        this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606',
                                              None, None, None, None,
                                              'DOI', '10.1038/nbt.2488', None)
        self.assertTrue(this_compound_origin
                        in self.__existing.get_compound_origins())

    def test_get_cmp_orig_secondary(self):
        '''COMMENT'''
        this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606',
                                              None, None, None, None,
                                              'DOI', '10.1038/nbt.2488', None)
        self.assertTrue(this_compound_origin
                        in self.__secondary.get_compound_origins())

    def test_get_out_existing(self):
        '''COMMENT'''
        this_relation = Relation('is_a', '17634', 'C')
        self.assertTrue(this_relation in self.__existing.get_outgoings())

    def test_get_out_secondary(self):
        '''COMMENT'''
        this_relation = Relation('has_role', 'CHEBI:48360', 'C')
        self.assertTrue(this_relation in self.__secondary.get_outgoings())

    def test_get_in_existing(self):
        '''COMMENT'''
        this_relation = Relation('has_functional_parent', 'CHEBI:15866', 'C')
        self.assertTrue(this_relation in self.__existing.get_incomings())

    def test_get_in_secondary(self):
        '''COMMENT'''
        this_relation = Relation('is_conjugate_acid_of', '29412', 'C')
        self.assertTrue(this_relation in self.__secondary.get_incomings())

    def __get_mol_file(self, read_id, retrieved_id):
        '''COMMENT'''
        mol_read = _read_mol_file(read_id)
        this_chebi_entity = ChebiEntity(str(retrieved_id))
        textfile_retrieved = open(this_chebi_entity.get_mol_filename(), 'r')
        mol_retrieved = textfile_retrieved.read()
        textfile_retrieved.close()
        self.assertEquals(mol_read, mol_retrieved)
Esempio n. 3
0
class TestChebiEntity(unittest.TestCase):
    '''COMMENT'''
    def setUp(self):
        '''COMMENT'''
        self.__existing = ChebiEntity('4167')
        self.__secondary = ChebiEntity('CHEBI:5585')

    def test_get_non_existing(self):
        '''COMMENT'''
        self.assertRaises(ChebiException, ChebiEntity, '-1')

    def test_get_id_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_id() == 'CHEBI:4167')

    def test_get_id_secondary(self):
        '''COMMENT'''
        self.assertTrue(self.__secondary.get_id() == 'CHEBI:5585')

    def test_get_formulae_existing(self):
        '''COMMENT'''
        this_formula = Formula('C6H12O6', 'KEGG COMPOUND')
        self.assertTrue(this_formula in self.__existing.get_formulae())

    def test_get_formulae_secondary(self):
        '''COMMENT'''
        this_formula = Formula('H2O', 'ChEBI')
        self.assertTrue(this_formula in self.__secondary.get_formulae())

    def test_get_formula_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_formula() == 'C6H12O6')

    def test_get_formula_secondary(self):
        '''COMMENT'''
        self.assertTrue(self.__secondary.get_formula() == 'H2O')

    def test_get_mass_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_mass(), 180.15588)

    def test_get_mass_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_mass(), 18.01530)

    def test_get_charge_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_charge(), 0)

    def test_get_charge_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_charge(), 0)

    def test_get_charge_secondary2(self):
        '''COMMENT'''
        self.assertEquals(-2, ChebiEntity('43474').get_charge())

    def test_get_comments_existing(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('29044')
        this_comment = Comment(
            'General', 'General',
            'The substituent name \'3-oxoprop-2-enyl\' is '
            'incorrect but is used in various databases.',
            datetime.datetime.strptime('2005-03-18', '%Y-%M-%d'))
        self.assertTrue(this_comment in this_chebi_entity.get_comments())

    def test_get_comments_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('11505')
        this_comment = Comment(
            'General', 'General',
            'The substituent name \'3-oxoprop-2-enyl\' is '
            'incorrect but is used in various databases.',
            datetime.datetime.strptime('2005-03-18', '%Y-%M-%d'))
        self.assertTrue(this_comment in this_chebi_entity.get_comments())

    def test_get_source_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_source(), 'KEGG COMPOUND')

    def test_get_source_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_source(), 'KEGG COMPOUND')

    def test_get_prnt_id_existing(self):
        '''COMMENT'''
        self.assertIsNone(self.__existing.get_parent_id())

    def test_get_prnt_id_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_parent_id(), 'CHEBI:15377')

    def test_get_name_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_name(), 'D-glucopyranose')

    def test_get_name_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_name(), 'water')

    def test_get_definition_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_definition(),
                         'A glucopyranose having D-configuration.')

    def test_get_definition_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('41140')
        self.assertEqual(
            this_chebi_entity.get_definition(),
            'D-Glucopyranose with beta configuration at the '
            'anomeric centre.')

    def test_get_mod_on_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_modified_on() >
                        datetime.datetime.strptime('2014-01-01', '%Y-%M-%d'))

    def test_get_mod_on_secondary(self):
        '''COMMENT'''
        self.assertIsNotNone(self.__secondary.get_modified_on())

    def test_get_created_by_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_created_by(), 'CHEBI')

    def test_get_created_by_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_created_by(), 'ops$mennis')

    def test_get_star_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_star(), 3)

    def test_get_star_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_star(), 3)

    def test_get_db_acc_existing(self):
        '''COMMENT'''
        dat_acc = DatabaseAccession('MetaCyc accession', 'D-Glucose',
                                    'MetaCyc')
        self.assertTrue(dat_acc in self.__existing.get_database_accessions())

    def test_get_db_acc_secondary(self):
        '''COMMENT'''
        dat_acc = DatabaseAccession('MetaCyc accession', 'WATER', 'MetaCyc')
        self.assertTrue(dat_acc in self.__secondary.get_database_accessions())

    def test_get_inchi_existing(self):
        '''COMMENT'''
        inchi = 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/' + \
            'h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1'
        self.assertEqual(self.__existing.get_inchi(), inchi)

    def test_get_inchi_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_inchi(), 'InChI=1S/H2O/h1H2')

    def test_get_inchi_key_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_inchi_key(),
                         'WQZGKKKJIJFFOK-GASJEMHNSA-N')

    def test_get_inchi_key_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_inchi_key(),
                         'XLYOFNOQVPJJNP-UHFFFAOYSA-N')

    def test_get_smiles_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_smiles(),
                         'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O')

    def test_get_smiles_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_smiles(), '[H]O[H]')

    def test_get_mol_existing(self):
        '''COMMENT'''
        chebi_id = 73938
        this_chebi_entity = ChebiEntity(str(chebi_id))
        self.assertEqual(this_chebi_entity.get_mol(), _read_mol_file(chebi_id))

    def test_get_mol_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_mol(), _read_mol_file(15377))

    def test_get_mol_file_existing(self):
        '''COMMENT'''
        chebi_id = 73938
        self.__get_mol_file(chebi_id, chebi_id)

    def test_get_mol_file_secondary(self):
        '''COMMENT'''
        read_id = 15377
        retrieved_id = 42857
        self.__get_mol_file(read_id, retrieved_id)

    def test_get_names_existing(self):
        '''COMMENT'''
        this_name = Name('Grape sugar', 'SYNONYM', 'KEGG COMPOUND', False,
                         'en')
        self.assertTrue(this_name in self.__existing.get_names())

    def test_get_names_secondary(self):
        '''COMMENT'''
        this_name = Name('eau', 'SYNONYM', 'ChEBI', False, 'fr')
        self.assertTrue(this_name in self.__secondary.get_names())

    def test_get_references_existing(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('15347')
        this_reference = Reference(
            'WO2006008754', 'Patent', '', 'NOVEL INTERMEDIATES FOR LINEZOLID '
            'AND RELATED COMPOUNDS')

        self.assertTrue(this_reference in this_chebi_entity.get_references())

    def test_get_references_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('22182')
        this_reference = Reference(
            'WO2006008754', 'Patent', '', 'NOVEL INTERMEDIATES FOR LINEZOLID '
            'AND RELATED COMPOUNDS')
        self.assertTrue(this_reference in this_chebi_entity.get_references())

    def test_get_cmp_orig_existing(self):
        '''COMMENT'''
        this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606',
                                              None, None, None, None, 'DOI',
                                              '10.1038/nbt.2488', None)
        self.assertTrue(
            this_compound_origin in self.__existing.get_compound_origins())

    def test_get_cmp_orig_secondary(self):
        '''COMMENT'''
        this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606',
                                              None, None, None, None, 'DOI',
                                              '10.1038/nbt.2488', None)
        self.assertTrue(
            this_compound_origin in self.__secondary.get_compound_origins())

    def test_get_out_existing(self):
        '''COMMENT'''
        this_relation = Relation('is_a', '17634', 'C')
        self.assertTrue(this_relation in self.__existing.get_outgoings())

    def test_get_out_secondary(self):
        '''COMMENT'''
        this_relation = Relation('has_role', 'CHEBI:48360', 'C')
        self.assertTrue(this_relation in self.__secondary.get_outgoings())

    def test_get_in_existing(self):
        '''COMMENT'''
        this_relation = Relation('has_functional_parent', 'CHEBI:15866', 'C')
        self.assertTrue(this_relation in self.__existing.get_incomings())

    def test_get_in_secondary(self):
        '''COMMENT'''
        this_relation = Relation('is_conjugate_acid_of', '29412', 'C')
        self.assertTrue(this_relation in self.__secondary.get_incomings())

    def __get_mol_file(self, read_id, retrieved_id):
        '''COMMENT'''
        mol_read = _read_mol_file(read_id)
        this_chebi_entity = ChebiEntity(str(retrieved_id))
        textfile_retrieved = open(this_chebi_entity.get_mol_filename(), 'r')
        mol_retrieved = textfile_retrieved.read()
        textfile_retrieved.close()
        self.assertEquals(mol_read, mol_retrieved)