def test_organic_user_db(): db = ChemicalMetadataDB(elements=False, main_db=os.path.join( folder, 'chemical identifiers example user db.tsv'), user_dbs=[]) for CAS, d in db.CAS_index.items(): assert CAS_from_any(d.CASs) == d.CASs # Check something was loaded assert len(db.CAS_index) > 100 # Check smiles are unique / can lookup by smiles for smi, d in db.smiles_index.items(): if not smi: continue assert CAS_from_any('smiles=' + smi) == d.CASs # Check formula is formatted right assert all([ i.formula == serialize_formula(i.formula) for i in db.CAS_index.values() ]) # Check CAS validity assert all([checkCAS(i.CASs) for i in db.CAS_index.values()]) # MW checker for i in db.CAS_index.values(): formula = serialize_formula(i.formula) atoms = nested_formula_parser(formula, check=False) mw_calc = molecular_weight(atoms) assert_allclose(mw_calc, i.MW, atol=0.05) for CAS, d in db.CAS_index.items(): assert CAS_from_any('InChI=1S/' + d.InChI) == int2CAS(CAS) for CAS, d in db.CAS_index.items(): assert CAS_from_any('InChIKey=' + d.InChI_key) == int2CAS(CAS) # Test the pubchem ids which aren't -1 for CAS, d in db.CAS_index.items(): if d.pubchemid != -1: assert CAS_from_any('PubChem=' + str(d.pubchemid)) == int2CAS(CAS) CAS_lenth = len(db.CAS_index) assert CAS_lenth == len(db.smiles_index) assert CAS_lenth == len(db.InChI_index) assert CAS_lenth == len(db.InChI_key_index)
def test_TSCA_data(): tots_calc = [TSCA_data[i].sum() for i in ['UV', 'E', 'F', 'N', 'P', 'S', 'R', 'T', 'XU', 'SP', 'TP', 'Y1', 'Y2']] tots = [16829, 271, 3, 713, 8371, 1173, 13, 151, 19035, 74, 50, 352, 9] assert tots_calc == tots assert TSCA_data.index.is_unique assert TSCA_data.shape == (67635, 13) assert all([checkCAS(int2CAS(i)) for i in TSCA_data.index])
def test_organic_user_db(): db = ChemicalMetadataDB(elements=False, main_db=os.path.join(folder, 'chemical identifiers example user db.tsv'), user_dbs=[]) for CAS, d in db.CAS_index.items(): assert CAS_from_any(d.CASs) == d.CASs # Check something was loaded assert len(db.CAS_index) > 100 # Check smiles are unique / can lookup by smiles for smi, d in db.smiles_index.items(): if not smi: continue assert CAS_from_any('smiles=' + smi) == d.CASs # Check formula is formatted right assert all([i.formula == serialize_formula(i.formula) for i in db.CAS_index.values()]) # Check CAS validity assert all([checkCAS(i.CASs) for i in db.CAS_index.values()]) # MW checker for i in db.CAS_index.values(): formula = serialize_formula(i.formula) atoms = nested_formula_parser(formula, check=False) mw_calc = molecular_weight(atoms) assert_allclose(mw_calc, i.MW, atol=0.05) for CAS, d in db.CAS_index.items(): assert CAS_from_any('InChI=1S/' + d.InChI) == int2CAS(CAS) for CAS, d in db.CAS_index.items(): assert CAS_from_any('InChIKey=' + d.InChI_key) == int2CAS(CAS) # Test the pubchem ids which aren't -1 for CAS, d in db.CAS_index.items(): if d.pubchemid != -1: assert CAS_from_any('PubChem=' + str(d.pubchemid)) == int2CAS(CAS) CAS_lenth = len(db.CAS_index) assert CAS_lenth == len(db.smiles_index) assert CAS_lenth == len(db.InChI_index) assert CAS_lenth == len(db.InChI_key_index)
def test_HPV_data(): assert HPV_data.index.is_unique assert HPV_data.shape == (5067, 0) assert sum(list(HPV_data.index)) == 176952023632 assert all([checkCAS(int2CAS(i)) for i in HPV_data.index])
def test_NLP_data(): assert NLP_data.index.is_unique assert NLP_data.shape == (698, 0) assert sum(list(NLP_data.index)) == 83268755392 assert all([checkCAS(int2CAS(i)) for i in NLP_data.index])
def test_SPIN_data(): assert SPIN_data.index.is_unique assert SPIN_data.shape == (26023, 0) assert sum(list(SPIN_data.index)) == 1666688770043 assert all([checkCAS(int2CAS(i)) for i in SPIN_data.index])
def test_EINECS_data(): assert EINECS_data.index.is_unique assert EINECS_data.shape == (100203, 0) assert sum(list(EINECS_data.index)) == 4497611272838 assert all([checkCAS(int2CAS(i)) for i in EINECS_data.index])
def test_DSL_data(): assert DSL_data.index.is_unique assert DSL_data['Registry'].sum() == 48363 assert DSL_data.shape == (73036, 1) assert all([checkCAS(int2CAS(i)) for i in DSL_data.index])
def CASs(self): return int2CAS(self.CAS)