コード例 #1
0
class TestChebiEntity(unittest.TestCase):
    '''COMMENT'''

    def setUp(self):
        '''COMMENT'''
        self.__existing = ChebiEntity('4167')
        self.__secondary = ChebiEntity('CHEBI:5585')

    def test_get_non_existing(self):
        '''COMMENT'''
        self.assertRaises(ChebiException, ChebiEntity, '-1')

    def test_get_id_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_id() == 'CHEBI:4167')

    def test_get_id_secondary(self):
        '''COMMENT'''
        self.assertTrue(self.__secondary.get_id() == 'CHEBI:5585')

    def test_get_formulae_existing(self):
        '''COMMENT'''
        this_formula = Formula('C6H12O6', 'KEGG COMPOUND')
        self.assertTrue(this_formula in self.__existing.get_formulae())

    def test_get_formulae_secondary(self):
        '''COMMENT'''
        this_formula = Formula('H2O', 'ChEBI')
        self.assertTrue(this_formula in self.__secondary.get_formulae())

    def test_get_formula_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_formula() == 'C6H12O6')

    def test_get_formula_secondary(self):
        '''COMMENT'''
        self.assertTrue(self.__secondary.get_formula() == 'H2O')

    def test_get_mass_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_mass(), 180.15588)

    def test_get_mass_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_mass(), 18.01530)

    def test_get_charge_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_charge(), 0)

    def test_get_charge_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_charge(), 0)

    def test_get_charge_secondary2(self):
        '''COMMENT'''
        self.assertEquals(-2, ChebiEntity('43474').get_charge())

    def test_get_comments_existing(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('29044')
        this_comment = Comment('General', 'General',
                               'The substituent name \'3-oxoprop-2-enyl\' is '
                               'incorrect but is used in various databases.',
                               datetime.datetime.strptime('2005-03-18',
                                                          '%Y-%M-%d'))
        self.assertTrue(this_comment in this_chebi_entity.get_comments())

    def test_get_comments_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('11505')
        this_comment = Comment('General', 'General',
                               'The substituent name \'3-oxoprop-2-enyl\' is '
                               'incorrect but is used in various databases.',
                               datetime.datetime.strptime('2005-03-18',
                                                          '%Y-%M-%d'))
        self.assertTrue(this_comment in this_chebi_entity.get_comments())

    def test_get_source_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_source(), 'KEGG COMPOUND')

    def test_get_source_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_source(), 'KEGG COMPOUND')

    def test_get_prnt_id_existing(self):
        '''COMMENT'''
        self.assertIsNone(self.__existing.get_parent_id())

    def test_get_prnt_id_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_parent_id(), 'CHEBI:15377')

    def test_get_name_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_name(), 'D-glucopyranose')

    def test_get_name_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_name(), 'water')

    def test_get_definition_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_definition(),
                         'A glucopyranose having D-configuration.')

    def test_get_definition_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('41140')
        self.assertEqual(this_chebi_entity.get_definition(),
                         'D-Glucopyranose with beta configuration at the '
                         'anomeric centre.')

    def test_get_mod_on_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_modified_on() >
                        datetime.datetime.strptime('2014-01-01',
                                                   '%Y-%M-%d'))

    def test_get_mod_on_secondary(self):
        '''COMMENT'''
        self.assertIsNotNone(self.__secondary.get_modified_on())

    def test_get_created_by_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_created_by(), 'CHEBI')

    def test_get_created_by_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_created_by(), 'ops$mennis')

    def test_get_star_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_star(), 3)

    def test_get_star_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_star(), 3)

    def test_get_db_acc_existing(self):
        '''COMMENT'''
        dat_acc = DatabaseAccession('MetaCyc accession', 'D-Glucose',
                                    'MetaCyc')
        self.assertTrue(dat_acc in self.__existing.get_database_accessions())

    def test_get_db_acc_secondary(self):
        '''COMMENT'''
        dat_acc = DatabaseAccession('MetaCyc accession', 'WATER', 'MetaCyc')
        self.assertTrue(dat_acc in self.__secondary.get_database_accessions())

    def test_get_inchi_existing(self):
        '''COMMENT'''
        inchi = 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/' + \
            'h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1'
        self.assertEqual(self.__existing.get_inchi(), inchi)

    def test_get_inchi_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_inchi(), 'InChI=1S/H2O/h1H2')

    def test_get_inchi_key_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_inchi_key(),
                         'WQZGKKKJIJFFOK-GASJEMHNSA-N')

    def test_get_inchi_key_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_inchi_key(),
                         'XLYOFNOQVPJJNP-UHFFFAOYSA-N')

    def test_get_smiles_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_smiles(),
                         'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O')

    def test_get_smiles_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_smiles(), '[H]O[H]')

    def test_get_mol_existing(self):
        '''COMMENT'''
        chebi_id = 73938
        this_chebi_entity = ChebiEntity(str(chebi_id))
        self.assertEqual(this_chebi_entity.get_mol(),
                         _read_mol_file(chebi_id))

    def test_get_mol_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_mol(), _read_mol_file(15377))

    def test_get_mol_file_existing(self):
        '''COMMENT'''
        chebi_id = 73938
        self.__get_mol_file(chebi_id, chebi_id)

    def test_get_mol_file_secondary(self):
        '''COMMENT'''
        read_id = 15377
        retrieved_id = 42857
        self.__get_mol_file(read_id, retrieved_id)

    def test_get_names_existing(self):
        '''COMMENT'''
        this_name = Name('Grape sugar', 'SYNONYM', 'KEGG COMPOUND', False,
                         'en')
        self.assertTrue(this_name in self.__existing.get_names())

    def test_get_names_secondary(self):
        '''COMMENT'''
        this_name = Name('eau', 'SYNONYM', 'ChEBI', False, 'fr')
        self.assertTrue(this_name in self.__secondary.get_names())

    def test_get_references_existing(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('15347')
        this_reference = Reference('WO2006008754', 'Patent', '',
                                   'NOVEL INTERMEDIATES FOR LINEZOLID '
                                   'AND RELATED COMPOUNDS')

        self.assertTrue(this_reference in this_chebi_entity.get_references())

    def test_get_references_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('22182')
        this_reference = Reference('WO2006008754', 'Patent', '',
                                   'NOVEL INTERMEDIATES FOR LINEZOLID '
                                   'AND RELATED COMPOUNDS')
        self.assertTrue(this_reference in this_chebi_entity.get_references())

    def test_get_cmp_orig_existing(self):
        '''COMMENT'''
        this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606',
                                              None, None, None, None,
                                              'DOI', '10.1038/nbt.2488', None)
        self.assertTrue(this_compound_origin
                        in self.__existing.get_compound_origins())

    def test_get_cmp_orig_secondary(self):
        '''COMMENT'''
        this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606',
                                              None, None, None, None,
                                              'DOI', '10.1038/nbt.2488', None)
        self.assertTrue(this_compound_origin
                        in self.__secondary.get_compound_origins())

    def test_get_out_existing(self):
        '''COMMENT'''
        this_relation = Relation('is_a', '17634', 'C')
        self.assertTrue(this_relation in self.__existing.get_outgoings())

    def test_get_out_secondary(self):
        '''COMMENT'''
        this_relation = Relation('has_role', 'CHEBI:48360', 'C')
        self.assertTrue(this_relation in self.__secondary.get_outgoings())

    def test_get_in_existing(self):
        '''COMMENT'''
        this_relation = Relation('has_functional_parent', 'CHEBI:15866', 'C')
        self.assertTrue(this_relation in self.__existing.get_incomings())

    def test_get_in_secondary(self):
        '''COMMENT'''
        this_relation = Relation('is_conjugate_acid_of', '29412', 'C')
        self.assertTrue(this_relation in self.__secondary.get_incomings())

    def __get_mol_file(self, read_id, retrieved_id):
        '''COMMENT'''
        mol_read = _read_mol_file(read_id)
        this_chebi_entity = ChebiEntity(str(retrieved_id))
        textfile_retrieved = open(this_chebi_entity.get_mol_filename(), 'r')
        mol_retrieved = textfile_retrieved.read()
        textfile_retrieved.close()
        self.assertEquals(mol_read, mol_retrieved)
コード例 #2
0
def get_cmpd_information(molec):
    """Get information from CHEBI Database of a compound from CHEBI ID.

    Online using libChEBIpy (https://github.com/libChEBI/libChEBIpy)

    """
    if molec.chebiID is None and molec.iupac_name is not None:
        # try one more time for chebi ID
        chebiID = get_chebiID(mol_name=molec.name, iupac_name=molec.iupac_name)
        if chebiID is None:
            print('cannot get structure from chebi')
            return None
        molec.chebiID = [chebiID]
    # at this point, molec.chebiID will be a list - iterarte over it
    # the iteration stops if any CHEBI ID produces a structure
    for CID in molec.chebiID:
        if CID == '' or ' ' in CID or 'null' in CID:
            print(CID, '- not a real CHEBI ID')
            continue
        # get entity with chebiID
        entity = ChebiEntity(CID)
        # check for parent ID
        entity, CID = convert_entity_to_parent(entity, ID=CID, CID=CID)
        # attemp to get structure
        # SMILES
        smile = entity.get_smiles()
        print('libchebipy result:', smile)
        if smile is not None:
            rdkitmol = Chem.MolFromSmiles(smile)
            if rdkitmol is None:
                print('structure could not be deciphered')
                molec.SMILES = smile
                molec.mol = None
                continue
            else:
                rdkitmol.Compute2DCoords()
                molec.SMILES = smile
                # remove molecules with generalised atoms
                if '*' in smile:
                    molec.mol = None
                else:
                    molec.mol = rdkitmol
        elif smile is None:
            print('molecule does not have recorded structure in CHEBI DB')
            print('probably a generic structure - skipping.')
            molec.SMILES = smile
            molec.mol = None
            continue

        # set passed = True if this chebi ID produced a structure
        # would not get up to this point if it didnt
        # if not CIDs pass then the chebiIDs remain a list and will
        # fail the
        # next step
        passed = True
        # set molecule properties
        if passed:
            molec.chebiID = CID
            molec.DB_ID = CID
            # save InChiKey
            iKEY = entity.get_inchi_key()
            if iKEY is not None:
                molec.InChiKey = iKEY
            # save inchi
            inchi = entity.get_inchi()
            if inchi is not None:
                molec.InChi = inchi
            # set name if name is only a code at this point
            try:
                if molec.change_name is True:
                    molec.name = entity.get_name()
                    molec.change_name = False
            except AttributeError:
                molec.change_name = False
            return None
コード例 #3
0
def get_cmpd_information_offline(molec):
    """Get information from CHEBI Database of a compound from CHEBI ID.

    Done Offline unless necessary.
    molec must have attribute 'chebiID' as integer.

    """

    DB_prop = DB_functions.get_DB_prop('CHEBI')
    compounds_file = DB_prop[0] + DB_prop[1]['cmpds_file']
    names_file = DB_prop[0] + DB_prop[1]['names_file']
    structures_file = DB_prop[0] + DB_prop[1]['strct_file']

    # set name by searching compound file
    res = search_for_compound_by_id(compounds_file, molec.chebiID)
    if res is None:
        print('chebiID not found:', molec.chebiID)
        print('no match in DB - ' 'this should not happen for CHEBI ID search')
        print('check this!')
        print('Exitting....')
        import sys
        sys.exit()
    else:
        ID, parent_id, name, star = res
        molec.name = name
        molec.change_name = False

    # make sure is parent compound
    if parent_id != 'null':
        res = convert_nameID_to_parent(compounds_file, nameID=ID)
        if res is None:
            print("this should not happen - error with cross reference")
            print('check this!')
            print('Exitting....')
            import sys
            sys.exit()
        ID, parent_id, name, star = res
        molec.name = name
        molec.change_name = False
        molec.chebiID = int(ID)

    # get structure using CHEBI ID
    # structures.csv - read in, get COMPOUND ID match then extract the
    # get SMILES
    structure, s_type = get_structure(structures_file, molec.chebiID)
    print(structure, s_type)
    if structure is not None:
        # is structure a MolBlock or Smiles
        if s_type == 'mol':
            # convert structure to SMILEs
            rdkitmol = Chem.MolFromMolBlock(structure)
            if rdkitmol is None:
                print('structure could not be deciphered')
                smile = None
                molec.SMILES = smile
                molec.mol = None
                print('probably a polymeric structure - skipping.')
            else:
                rdkitmol.Compute2DCoords()
                smile = Chem.MolToSmiles(rdkitmol)
                molec.SMILES = smile
                # remove molecules with generalised atoms
                if '*' in smile:
                    molec.mol = None
                else:
                    molec.mol = rdkitmol
        elif s_type == 'SMILES':
            smile = structure
            rdkitmol = Chem.MolFromSmiles(smile)
            if rdkitmol is None:
                print('structure could not be deciphered')
                molec.SMILES = smile
                molec.mol = None
            else:
                rdkitmol.Compute2DCoords()
                molec.SMILES = smile
                # remove molecules with generalised atoms
                if '*' in smile:
                    molec.mol = None
                else:
                    molec.mol = rdkitmol
        elif s_type == 'InChI':
            rdkitmol = Chem.MolFromInchi(structure)
            rdkitmol.Compute2DCoords()
            smile = Chem.MolToSmiles(rdkitmol)
            molec.SMILES = smile
            # remove molecules with generalised atoms
            if '*' in smile:
                molec.mol = None
            else:
                molec.mol = rdkitmol
        elif s_type == 'InChIKey':
            rdkitmol = Chem.MolFromInchi(structure)
            rdkitmol.Compute2DCoords()
            smile = None
            molec.SMILES = smile
            molec.mol = None
            print('molecule given as InChIKey - ambiguous')
            print('probably a generic structure - skipping.')
    else:
        # try using the CHEBI API
        # libChEBIpy (https://github.com/libChEBI/libChEBIpy)
        print('testing libchebipy...')
        entity = ChebiEntity(molec.chebiID)
        smile = entity.get_smiles()
        print('libchebipy result:', smile)
        if smile is not None:
            rdkitmol = Chem.MolFromSmiles(smile)
            if rdkitmol is None:
                print('structure could not be deciphered')
                molec.SMILES = smile
                molec.mol = None
            else:
                rdkitmol.Compute2DCoords()
                molec.SMILES = smile
                # remove molecules with generalised atoms
                if '*' in smile:
                    molec.mol = None
                else:
                    molec.mol = rdkitmol
        elif smile is None:
            molec.SMILES = smile
            molec.mol = None
            print('molecule does not have recorded structure in CHEBI DB')
            print('probably a generic structure - skipping.')
        # save InChiKey
        iKEY = entity.get_inchi_key()
        if iKEY is not None:
            molec.InChiKey = iKEY
コード例 #4
0
class TestChebiEntity(unittest.TestCase):
    '''COMMENT'''
    def setUp(self):
        '''COMMENT'''
        self.__existing = ChebiEntity('4167')
        self.__secondary = ChebiEntity('CHEBI:5585')

    def test_get_non_existing(self):
        '''COMMENT'''
        self.assertRaises(ChebiException, ChebiEntity, '-1')

    def test_get_id_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_id() == 'CHEBI:4167')

    def test_get_id_secondary(self):
        '''COMMENT'''
        self.assertTrue(self.__secondary.get_id() == 'CHEBI:5585')

    def test_get_formulae_existing(self):
        '''COMMENT'''
        this_formula = Formula('C6H12O6', 'KEGG COMPOUND')
        self.assertTrue(this_formula in self.__existing.get_formulae())

    def test_get_formulae_secondary(self):
        '''COMMENT'''
        this_formula = Formula('H2O', 'ChEBI')
        self.assertTrue(this_formula in self.__secondary.get_formulae())

    def test_get_formula_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_formula() == 'C6H12O6')

    def test_get_formula_secondary(self):
        '''COMMENT'''
        self.assertTrue(self.__secondary.get_formula() == 'H2O')

    def test_get_mass_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_mass(), 180.15588)

    def test_get_mass_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_mass(), 18.01530)

    def test_get_charge_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_charge(), 0)

    def test_get_charge_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_charge(), 0)

    def test_get_charge_secondary2(self):
        '''COMMENT'''
        self.assertEquals(-2, ChebiEntity('43474').get_charge())

    def test_get_comments_existing(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('29044')
        this_comment = Comment(
            'General', 'General',
            'The substituent name \'3-oxoprop-2-enyl\' is '
            'incorrect but is used in various databases.',
            datetime.datetime.strptime('2005-03-18', '%Y-%M-%d'))
        self.assertTrue(this_comment in this_chebi_entity.get_comments())

    def test_get_comments_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('11505')
        this_comment = Comment(
            'General', 'General',
            'The substituent name \'3-oxoprop-2-enyl\' is '
            'incorrect but is used in various databases.',
            datetime.datetime.strptime('2005-03-18', '%Y-%M-%d'))
        self.assertTrue(this_comment in this_chebi_entity.get_comments())

    def test_get_source_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_source(), 'KEGG COMPOUND')

    def test_get_source_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_source(), 'KEGG COMPOUND')

    def test_get_prnt_id_existing(self):
        '''COMMENT'''
        self.assertIsNone(self.__existing.get_parent_id())

    def test_get_prnt_id_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_parent_id(), 'CHEBI:15377')

    def test_get_name_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_name(), 'D-glucopyranose')

    def test_get_name_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_name(), 'water')

    def test_get_definition_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_definition(),
                         'A glucopyranose having D-configuration.')

    def test_get_definition_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('41140')
        self.assertEqual(
            this_chebi_entity.get_definition(),
            'D-Glucopyranose with beta configuration at the '
            'anomeric centre.')

    def test_get_mod_on_existing(self):
        '''COMMENT'''
        self.assertTrue(self.__existing.get_modified_on() >
                        datetime.datetime.strptime('2014-01-01', '%Y-%M-%d'))

    def test_get_mod_on_secondary(self):
        '''COMMENT'''
        self.assertIsNotNone(self.__secondary.get_modified_on())

    def test_get_created_by_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_created_by(), 'CHEBI')

    def test_get_created_by_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_created_by(), 'ops$mennis')

    def test_get_star_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_star(), 3)

    def test_get_star_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_star(), 3)

    def test_get_db_acc_existing(self):
        '''COMMENT'''
        dat_acc = DatabaseAccession('MetaCyc accession', 'D-Glucose',
                                    'MetaCyc')
        self.assertTrue(dat_acc in self.__existing.get_database_accessions())

    def test_get_db_acc_secondary(self):
        '''COMMENT'''
        dat_acc = DatabaseAccession('MetaCyc accession', 'WATER', 'MetaCyc')
        self.assertTrue(dat_acc in self.__secondary.get_database_accessions())

    def test_get_inchi_existing(self):
        '''COMMENT'''
        inchi = 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/' + \
            'h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1'
        self.assertEqual(self.__existing.get_inchi(), inchi)

    def test_get_inchi_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_inchi(), 'InChI=1S/H2O/h1H2')

    def test_get_inchi_key_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_inchi_key(),
                         'WQZGKKKJIJFFOK-GASJEMHNSA-N')

    def test_get_inchi_key_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_inchi_key(),
                         'XLYOFNOQVPJJNP-UHFFFAOYSA-N')

    def test_get_smiles_existing(self):
        '''COMMENT'''
        self.assertEqual(self.__existing.get_smiles(),
                         'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O')

    def test_get_smiles_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_smiles(), '[H]O[H]')

    def test_get_mol_existing(self):
        '''COMMENT'''
        chebi_id = 73938
        this_chebi_entity = ChebiEntity(str(chebi_id))
        self.assertEqual(this_chebi_entity.get_mol(), _read_mol_file(chebi_id))

    def test_get_mol_secondary(self):
        '''COMMENT'''
        self.assertEqual(self.__secondary.get_mol(), _read_mol_file(15377))

    def test_get_mol_file_existing(self):
        '''COMMENT'''
        chebi_id = 73938
        self.__get_mol_file(chebi_id, chebi_id)

    def test_get_mol_file_secondary(self):
        '''COMMENT'''
        read_id = 15377
        retrieved_id = 42857
        self.__get_mol_file(read_id, retrieved_id)

    def test_get_names_existing(self):
        '''COMMENT'''
        this_name = Name('Grape sugar', 'SYNONYM', 'KEGG COMPOUND', False,
                         'en')
        self.assertTrue(this_name in self.__existing.get_names())

    def test_get_names_secondary(self):
        '''COMMENT'''
        this_name = Name('eau', 'SYNONYM', 'ChEBI', False, 'fr')
        self.assertTrue(this_name in self.__secondary.get_names())

    def test_get_references_existing(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('15347')
        this_reference = Reference(
            'WO2006008754', 'Patent', '', 'NOVEL INTERMEDIATES FOR LINEZOLID '
            'AND RELATED COMPOUNDS')

        self.assertTrue(this_reference in this_chebi_entity.get_references())

    def test_get_references_secondary(self):
        '''COMMENT'''
        this_chebi_entity = ChebiEntity('22182')
        this_reference = Reference(
            'WO2006008754', 'Patent', '', 'NOVEL INTERMEDIATES FOR LINEZOLID '
            'AND RELATED COMPOUNDS')
        self.assertTrue(this_reference in this_chebi_entity.get_references())

    def test_get_cmp_orig_existing(self):
        '''COMMENT'''
        this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606',
                                              None, None, None, None, 'DOI',
                                              '10.1038/nbt.2488', None)
        self.assertTrue(
            this_compound_origin in self.__existing.get_compound_origins())

    def test_get_cmp_orig_secondary(self):
        '''COMMENT'''
        this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606',
                                              None, None, None, None, 'DOI',
                                              '10.1038/nbt.2488', None)
        self.assertTrue(
            this_compound_origin in self.__secondary.get_compound_origins())

    def test_get_out_existing(self):
        '''COMMENT'''
        this_relation = Relation('is_a', '17634', 'C')
        self.assertTrue(this_relation in self.__existing.get_outgoings())

    def test_get_out_secondary(self):
        '''COMMENT'''
        this_relation = Relation('has_role', 'CHEBI:48360', 'C')
        self.assertTrue(this_relation in self.__secondary.get_outgoings())

    def test_get_in_existing(self):
        '''COMMENT'''
        this_relation = Relation('has_functional_parent', 'CHEBI:15866', 'C')
        self.assertTrue(this_relation in self.__existing.get_incomings())

    def test_get_in_secondary(self):
        '''COMMENT'''
        this_relation = Relation('is_conjugate_acid_of', '29412', 'C')
        self.assertTrue(this_relation in self.__secondary.get_incomings())

    def __get_mol_file(self, read_id, retrieved_id):
        '''COMMENT'''
        mol_read = _read_mol_file(read_id)
        this_chebi_entity = ChebiEntity(str(retrieved_id))
        textfile_retrieved = open(this_chebi_entity.get_mol_filename(), 'r')
        mol_retrieved = textfile_retrieved.read()
        textfile_retrieved.close()
        self.assertEquals(mol_read, mol_retrieved)
コード例 #5
0
def hier_name_search(molecule, property, option=False):
    """Search for molecule property in PUBCHEM using a hierarchy of
    name spaces

    Order:
        1 - pubchem ID
        2 - KEGG ID
        3 - chebiID
        4 - chebiID to InChIKey
        5 - IUPAC name
        6 - name

    Properties:
        CanononicalSMILES
        IUPACName
        XLogP
        complexity
        InChiKey

    Tutorial:
        https://pubchemdocs.ncbi.nlm.nih.gov/
        pug-rest-tutorial$_Toc458584413
    """
    QUERY_URL = (
        'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/'
    )

    try:
        if molecule.PubChemID is not None:
            QUERY_URL_fin = QUERY_URL + molecule.PubChemID
            QUERY_URL_fin += '/property/'+property+'/TXT'
            result = run_request(query=QUERY_URL_fin)
            if result is not None:
                print('passed pubchemID')
                return result
    except (AttributeError, ValueError):
        print('failed pubchemID')
        pass
    try:
        if molecule.KEGG_ID is not None:
            QUERY_URL_fin = QUERY_URL + molecule.KEGG_ID
            QUERY_URL_fin += '/property/'+property+'/TXT'
            result = run_request(query=QUERY_URL_fin)
            if result is not None:
                print('passed KEGG ID')
                return result
    except (AttributeError, ValueError):
        print('failed KEGG ID')
        pass
    try:
        if molecule.chebiID is not None:
            QUERY_URL_fin = QUERY_URL + 'chebi:'+molecule.chebiID
            QUERY_URL_fin += '/property/'+property+'/TXT'
            result = run_request(query=QUERY_URL_fin)
            if result is not None:
                print('passed chebiID')
                return result
    except (AttributeError, ValueError):
        print('failed chebiID')
        pass
    try:
        if molecule.chebiID is not None:
            if molecule.InChiKey is None:
                # try using the CHEBI API
                # libChEBIpy (https://github.com/libChEBI/libChEBIpy)
                print('using libchebipy to get InChiKey...')
                from libchebipy import ChebiEntity
                entity = ChebiEntity(molecule.chebiID)
                iKEY = entity.get_inchi_key()
                print(iKEY)
            else:
                iKEY = molecule.InChiKey
            QUERY_URL = (
                'https://pubchem.ncbi.nlm.nih.gov/rest/'
                'pug/compound/inchikey/'
            )
            QUERY_URL_fin = QUERY_URL + iKEY
            QUERY_URL_fin += '/property/'+property+'/TXT'
            result = run_request(query=QUERY_URL_fin)
            if result is not None:
                print('passed chebiID/inchiKey')
                return result
    except (AttributeError, ValueError):
        print('failed chebiID/inchiKey')
        pass
    try:
        if molecule.InChiKey is not None:
            iKEY = molecule.InChiKey
            QUERY_URL = (
                'https://pubchem.ncbi.nlm.nih.gov/rest/'
                'pug/compound/inchikey/'
            )
            QUERY_URL_fin = QUERY_URL + iKEY
            QUERY_URL_fin += '/property/'+property+'/TXT'
            result = run_request(query=QUERY_URL_fin)
            if result is not None:
                print('passed inchiKey')
                return result
    except (AttributeError, ValueError):
        print('failed inchiKey')
        pass
    try:
        if molecule.iupac_name is not None:
            QUERY_URL = (
                'https://pubchem.ncbi.nlm.nih.gov/rest/pug'
                '/compound/name/'
            )
            QUERY_URL_fin = QUERY_URL + molecule.iupac_name
            QUERY_URL_fin += '/property/'+property+'/TXT'
            if property == 'CanonicalSMILES':
                result = run_request(query=QUERY_URL_fin, smiles=True)
                if type(result) == tuple:
                    # handle new line errors in SMILES
                    text, boolean = result
                    if boolean is True:
                        # pick the uncharged SMILES
                        for option, smi in enumerate(text.split('\n')):
                            print('smiles1:', smi)
                            if check_charge_on_SMILES(smi):
                                # charged
                                continue
                            return smi, option
                elif type(result) == str and result is not None:
                    print('passed name')
                    return result
            else:
                result = run_request(
                    query=QUERY_URL_fin,
                    option=option
                )
            if result is not None:
                print('passed IUPAC name')
                return result
    except (AttributeError, ValueError):
        print('failed IUPAC name')
        pass
    try:
        if molecule.name is not None:
            QUERY_URL_fin = QUERY_URL + molecule.name
            QUERY_URL_fin += '/property/'+property+'/TXT'
            if property == 'CanonicalSMILES':
                result = run_request(query=QUERY_URL_fin, smiles=True)
                if type(result) == tuple:
                    # handle new line errors in SMILES
                    text, boolean = result
                    if boolean is True:
                        # pick the uncharged SMILES
                        for option, smi in enumerate(text.split('\n')):
                            print('smiles1:', smi)
                            if check_charge_on_SMILES(smi):
                                # charged
                                continue
                            return smi, option
                elif type(result) == str and result is not None:
                    print('passed name')
                    return result
            else:
                result = run_request(
                    query=QUERY_URL_fin,
                    option=option
                )
            if result is not None:
                print('passed name')
                return result
    except (AttributeError, ValueError):
        print('failed name')
        import sys
        sys.exit()

    return None
コード例 #6
0
def hier_name_search_pcp(molecule, property, option=False):
    """Search for molecule property in PUBCHEM using a hierarchy of
    name spaces
    using pubchempy.

    Property can now be a list.

    Order:
        1 - pubchem ID
        2 - KEGG ID
        3 - chebiID
        4 - chebiID to InChIKey
        5 - IUPAC name
        6 - name

    Properties:
        CanononicalSMILES
        IUPACName
        XLogP
        complexity
        InChiKey
        PubChemID
        synonyms

    if option is not False we want to use the 'name' search only to
    recreate
    conditions of original search that gave option.

    """
    if type(property) is not list:
        property = [property]
    try:
        if molecule.PubChemID is not None and option is False:
            result = run_request_pcp(ident=molecule.PubChemID,
                                     namespace='cid')
            if result is not None:
                print('> passed pubchemID')
                if len(property) > 1:
                    return [
                        extract_property(i, result) for i in property
                    ]
                else:
                    return [
                        extract_property(i, result) for i in property
                    ][0]
    except (AttributeError, ValueError):
        pass
    try:
        if molecule.KEGG_ID is not None and option is False:
            result = run_request_pcp(ident=molecule.KEGG_ID,
                                     namespace='name')
            if result is not None:
                print('> passed KEGG ID')
                if len(property) > 1:
                    return [
                        extract_property(i, result) for i in property
                    ]
                else:
                    return [
                        extract_property(i, result) for i in property
                    ][0]
    except (AttributeError, ValueError):
        pass
    try:
        if molecule.chebiID is not None and option is False:
            result = run_request_pcp(ident='chebi:'+molecule.chebiID,
                                     namespace='name')
            if result is not None:
                print('> passed chebiID')
                if len(property) > 1:
                    return [
                        extract_property(i, result) for i in property
                    ]
                else:
                    return [
                        extract_property(i, result) for i in property
                    ][0]
    except (AttributeError, ValueError):
        pass
    try:
        if molecule.chebiID is not None and option is False:
            if molecule.InChiKey is None:
                # try using the CHEBI API
                # libChEBIpy (https://github.com/libChEBI/libChEBIpy)
                print('> attempting libchebipy to get InChiKey...')
                from libchebipy import ChebiEntity
                entity = ChebiEntity(molecule.chebiID)
                iKEY = entity.get_inchi_key()
                print(iKEY)
            else:
                iKEY = molecule.InChiKey
            result = run_request_pcp(ident=iKEY,
                                     namespace='inchikey')
            if result is not None:
                print('> passed chebiID/inchiKey')
                if len(property) > 1:
                    return [
                        extract_property(i, result) for i in property
                    ]
                else:
                    return [
                        extract_property(i, result) for i in property
                    ][0]
    except (AttributeError, ValueError):
        pass
    try:
        if molecule.InChiKey is not None and option is False:
            result = run_request_pcp(ident=molecule.InChiKey,
                                     namespace='inchikey')
            if result is not None:
                print('> passed inchiKey')
                if len(property) > 1:
                    return [
                        extract_property(i, result) for i in property
                    ]
                else:
                    return [
                        extract_property(i, result) for i in property
                    ][0]
    except (AttributeError, ValueError):
        pass
    try:
        if molecule.iupac_name is not None and option is False:
            result = run_request_pcp(ident=molecule.iupac_name,
                                     namespace='name',
                                     option=option)
            if result is not None:
                print('> passed IUPAC name')
                if len(property) > 1:
                    return [
                        extract_property(i, result) for i in property
                    ]
                else:
                    return [
                        extract_property(i, result) for i in property
                    ][0]
    except (AttributeError, ValueError):
        pass
    try:
        print('> trying name... for:', property)
        if molecule.name is not None:
            if property[0] == 'CanonicalSMILES':
                result = run_request_pcp(ident=molecule.name,
                                         namespace='name',
                                         smiles=True)
                if type(result) == tuple:
                    # handle new line errors in SMILES
                    text, boolean = result
                    if boolean is True:
                        # pick the uncharged SMILES
                        for option, Compound in enumerate(text):
                            synon = [
                                i.lower() for i in Compound.synonyms
                            ]
                            if molecule.name.lower() in synon:
                                # ignore charged species
                                smi = Compound.canonical_smiles
                                if check_charge_on_SMILES(smi):
                                    continue
                                return smi, option
                        return None
                elif type(result) == str and result is not None:
                    print('> passed name')
                    print('I am interested in what this result is:')
                    print(result)
                    import sys
                    sys.exit()
                    return result
            else:
                result = run_request_pcp(ident=molecule.name,
                                         namespace='name',
                                         option=option)
            if result is not None:
                print('> passed name')
                if len(property) > 1:
                    return [
                        extract_property(i, result) for i in property
                    ]
                else:
                    return [
                        extract_property(i, result) for i in property
                    ][0]
    except (AttributeError, ValueError):
        print('> failed all searches...')
    return None