コード例 #1
0
def test_organic_user_db():
    db = ChemicalMetadataDB(elements=False,
                            main_db=None,
                            user_dbs=[
                                os.path.join(
                                    folder,
                                    'chemical identifiers example user db.tsv')
                            ])
    for CAS, d in db.CAS_index.items():
        assert CAS_from_any(d.CASs) == d.CASs
    # Check something was loaded
    assert len(db.CAS_index) > 100

    # Check smiles are unique / can lookup by smiles
    for smi, d in db.smiles_index.items():
        if not smi:
            continue
        assert CAS_from_any('smiles=' + smi) == d.CASs

    # Check formula is formatted right
    assert all([
        i.formula == serialize_formula(i.formula)
        for i in db.CAS_index.values()
    ])

    # Check CAS validity
    assert all([check_CAS(i.CASs) for i in db.CAS_index.values()])

    # MW checker
    for i in db.CAS_index.values():
        formula = serialize_formula(i.formula)
        atoms = nested_formula_parser(formula, check=False)
        mw_calc = molecular_weight(atoms)
        assert_allclose(mw_calc, i.MW, atol=0.05)

    for CAS, d in db.CAS_index.items():
        assert CAS_from_any('InChI=1S/' + d.InChI) == int_to_CAS(CAS)

    for CAS, d in db.CAS_index.items():
        assert CAS_from_any('InChIKey=' + d.InChI_key) == int_to_CAS(CAS)

    # Test the pubchem ids which aren't -1
    for CAS, d in db.CAS_index.items():
        if d.pubchemid != -1:
            assert CAS_from_any('PubChem=' +
                                str(d.pubchemid)) == int_to_CAS(CAS)

    CAS_lenth = len(db.CAS_index)
    assert CAS_lenth == len(db.smiles_index)
    assert CAS_lenth == len(db.InChI_index)
    assert CAS_lenth == len(db.InChI_key_index)
コード例 #2
0
def test_inorganic_db():
    db = ChemicalMetadataDB(
        elements=False,
        main_db=None,
        user_dbs=[os.path.join(folder, 'Inorganic db.tsv')])

    # Check CAS lookup
    for CAS, d in db.CAS_index.items():
        assert CAS_from_any(d.CASs) == d.CASs

    # Try ro check formula lookups
    for formula, d in db.formula_index.items():
        if formula in set(['H2MgO2', 'F2N2']):
            # Formulas which are not unique by design
            continue
        assert CAS_from_any(formula) == d.CASs

    # Check smiles are unique / can lookup by smiles
    for smi, d in db.smiles_index.items():
        if not smi:
            continue
        assert CAS_from_any('smiles=' + smi) == d.CASs

    # Check formula is formatted right
    assert all([
        i.formula == serialize_formula(i.formula)
        for i in db.CAS_index.values()
    ])

    # Check CAS validity
    assert all([check_CAS(i.CASs) for i in db.CAS_index.values()])

    # MW checker
    for i in db.CAS_index.values():
        formula = serialize_formula(i.formula)
        atoms = nested_formula_parser(formula, check=False)
        mw_calc = molecular_weight(atoms)
        assert_allclose(mw_calc, i.MW, atol=0.05)
コード例 #3
0
from chemicals import *
from chemicals.identifiers import ChemicalMetadataDB
from numpy.testing import assert_allclose
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem.rdMolDescriptors import CalcMolFormula
import json

'''Generate a database of all dynonyms that are hardcoded to be included.
MW can be ard coded, as well as pubchem ID.

'''
db = ChemicalMetadataDB(elements=False, main_db='Inorganic db.tsv', user_dbs=[])
db.autoload_main_db()

good_syns = {}

for CAS, d in db.CAS_index.items():
    CAS = d.CASs
    if CAS in good_syns:
        pass
    else:
        good_syns[CAS] = {}
        good_syns[CAS]['synonyms'] = []

D2Se = {'formula': 'D2Se', 'MW': molecular_weight(nested_formula_parser('D2Se'))}
ammonium_hexafluorosilicate = {'pubchem': 28145}
CsBromate = {'pubchem': 23685550}
Br = {'pubchem': 5360770}
NaAlO4H4 = {'pubchem': 166673}
Na2HPO4 = {'pubchem': 24203}
コード例 #4
0
from thermo import *
from chemicals import *
from chemicals.identifiers import ChemicalMetadataDB
from numpy.testing import assert_allclose
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem.rdMolDescriptors import CalcMolFormula
import json

db = ChemicalMetadataDB(elements=False,
                        main_db=('chemical identifiers example user db.tsv'),
                        user_dbs=[])
db.autoload_main_db()

good_syns = {}

for CAS, d in db.CAS_index.items():
    CAS = d.CASs
    if CAS in good_syns:
        pass
    else:
        good_syns[CAS] = {}
        good_syns[CAS]['synonyms'] = []

good_syns['74-82-8']['synonyms'].extend(['C1', 'nC1', 'n-C1'])
good_syns['74-84-0']['synonyms'].extend(['C2', 'nC2', 'n-C2'])
good_syns['74-98-6']['synonyms'].extend(['C3', 'nC3', 'n-C3'])
good_syns['106-97-8']['synonyms'].extend(['C4', 'nC4', 'n-C4'])
good_syns['109-66-0']['synonyms'].extend(['C5', 'nC5', 'n-C5'])
good_syns['110-54-3']['synonyms'].extend(['C6', 'nC6', 'n-C6'])
good_syns['142-82-5']['synonyms'].extend(['C7', 'nC7', 'n-C7'])