def test_organic_user_db(): db = ChemicalMetadataDB(elements=False, main_db=None, user_dbs=[ os.path.join( folder, 'chemical identifiers example user db.tsv') ]) for CAS, d in db.CAS_index.items(): assert CAS_from_any(d.CASs) == d.CASs # Check something was loaded assert len(db.CAS_index) > 100 # Check smiles are unique / can lookup by smiles for smi, d in db.smiles_index.items(): if not smi: continue assert CAS_from_any('smiles=' + smi) == d.CASs # Check formula is formatted right assert all([ i.formula == serialize_formula(i.formula) for i in db.CAS_index.values() ]) # Check CAS validity assert all([check_CAS(i.CASs) for i in db.CAS_index.values()]) # MW checker for i in db.CAS_index.values(): formula = serialize_formula(i.formula) atoms = nested_formula_parser(formula, check=False) mw_calc = molecular_weight(atoms) assert_allclose(mw_calc, i.MW, atol=0.05) for CAS, d in db.CAS_index.items(): assert CAS_from_any('InChI=1S/' + d.InChI) == int_to_CAS(CAS) for CAS, d in db.CAS_index.items(): assert CAS_from_any('InChIKey=' + d.InChI_key) == int_to_CAS(CAS) # Test the pubchem ids which aren't -1 for CAS, d in db.CAS_index.items(): if d.pubchemid != -1: assert CAS_from_any('PubChem=' + str(d.pubchemid)) == int_to_CAS(CAS) CAS_lenth = len(db.CAS_index) assert CAS_lenth == len(db.smiles_index) assert CAS_lenth == len(db.InChI_index) assert CAS_lenth == len(db.InChI_key_index)
def test_inorganic_db(): db = ChemicalMetadataDB( elements=False, main_db=None, user_dbs=[os.path.join(folder, 'Inorganic db.tsv')]) # Check CAS lookup for CAS, d in db.CAS_index.items(): assert CAS_from_any(d.CASs) == d.CASs # Try ro check formula lookups for formula, d in db.formula_index.items(): if formula in set(['H2MgO2', 'F2N2']): # Formulas which are not unique by design continue assert CAS_from_any(formula) == d.CASs # Check smiles are unique / can lookup by smiles for smi, d in db.smiles_index.items(): if not smi: continue assert CAS_from_any('smiles=' + smi) == d.CASs # Check formula is formatted right assert all([ i.formula == serialize_formula(i.formula) for i in db.CAS_index.values() ]) # Check CAS validity assert all([check_CAS(i.CASs) for i in db.CAS_index.values()]) # MW checker for i in db.CAS_index.values(): formula = serialize_formula(i.formula) atoms = nested_formula_parser(formula, check=False) mw_calc = molecular_weight(atoms) assert_allclose(mw_calc, i.MW, atol=0.05)
from chemicals import * from chemicals.identifiers import ChemicalMetadataDB from numpy.testing import assert_allclose from rdkit import Chem from rdkit.Chem import Descriptors from rdkit.Chem.rdMolDescriptors import CalcMolFormula import json '''Generate a database of all dynonyms that are hardcoded to be included. MW can be ard coded, as well as pubchem ID. ''' db = ChemicalMetadataDB(elements=False, main_db='Inorganic db.tsv', user_dbs=[]) db.autoload_main_db() good_syns = {} for CAS, d in db.CAS_index.items(): CAS = d.CASs if CAS in good_syns: pass else: good_syns[CAS] = {} good_syns[CAS]['synonyms'] = [] D2Se = {'formula': 'D2Se', 'MW': molecular_weight(nested_formula_parser('D2Se'))} ammonium_hexafluorosilicate = {'pubchem': 28145} CsBromate = {'pubchem': 23685550} Br = {'pubchem': 5360770} NaAlO4H4 = {'pubchem': 166673} Na2HPO4 = {'pubchem': 24203}
from thermo import * from chemicals import * from chemicals.identifiers import ChemicalMetadataDB from numpy.testing import assert_allclose from rdkit import Chem from rdkit.Chem import Descriptors from rdkit.Chem.rdMolDescriptors import CalcMolFormula import json db = ChemicalMetadataDB(elements=False, main_db=('chemical identifiers example user db.tsv'), user_dbs=[]) db.autoload_main_db() good_syns = {} for CAS, d in db.CAS_index.items(): CAS = d.CASs if CAS in good_syns: pass else: good_syns[CAS] = {} good_syns[CAS]['synonyms'] = [] good_syns['74-82-8']['synonyms'].extend(['C1', 'nC1', 'n-C1']) good_syns['74-84-0']['synonyms'].extend(['C2', 'nC2', 'n-C2']) good_syns['74-98-6']['synonyms'].extend(['C3', 'nC3', 'n-C3']) good_syns['106-97-8']['synonyms'].extend(['C4', 'nC4', 'n-C4']) good_syns['109-66-0']['synonyms'].extend(['C5', 'nC5', 'n-C5']) good_syns['110-54-3']['synonyms'].extend(['C6', 'nC6', 'n-C6']) good_syns['142-82-5']['synonyms'].extend(['C7', 'nC7', 'n-C7'])