def test_fix_valence(): sm = "Cl.[H][N:1]1=CC(O)=CC2CCCCC12" mol = Chem.MolFromSmiles(sm, sanitize=False) mol.UpdatePropertyCache(False) mol_copy = dm.copy_mol(mol) nitrogen_atom = [a for a in mol.GetAtoms() if a.GetAtomMapNum() == 1][0] nitrogen_valence = nitrogen_atom.GetExplicitValence() assert dm.incorrect_valence(nitrogen_atom, True) fixed_mol = dm.fix_valence_charge(mol, inplace=False) assert dm.to_mol(Chem.MolToSmiles(fixed_mol)) is not None # expect nitrogen atom to still be incorrect assert dm.incorrect_valence(nitrogen_atom, True) # in place fix fixed_mol = dm.fix_valence_charge(mol, inplace=True) # nitrogen should be charged positively if this was fixed. assert nitrogen_atom.GetFormalCharge() == 1 fixed_mol2 = dm.fix_valence(mol_copy) fixed_nitrogen_atom = [ a for a in fixed_mol2.GetAtoms() if a.GetAtomMapNum() == 1 ][0] assert fixed_nitrogen_atom.GetExplicitValence() < nitrogen_valence # mol should be fixed assert dm.to_mol(Chem.MolToSmiles(fixed_mol2)) is not None
def _run_at_all_rct(rxn, mol1, mol2): library = [] rxn = rdChemReactions.ReactionFromSmarts(rdChemReactions.ReactionToSmarts(rxn)) # display(rxn) m1 = rxn.GetReactantTemplate(0) m2 = rxn.GetReactantTemplate(1) mol1_valid = mol1 is not None mol2_valid = mol2 is not None isR1 = mol1_valid and mol1.HasSubstructMatch(m1) isR2 = mol1_valid and mol1.HasSubstructMatch(m2) if isR1 and mol2_valid and mol2.HasSubstructMatch(m2): library.extend(rxn.RunReactants((mol1, mol2))) if isR2 and mol2_valid and mol2.HasSubstructMatch(m1): library.extend(rxn.RunReactants((mol2, mol1))) if library: library = list(itertools.chain(*library)) for m in library: mol = None mSmi = "" try: mSmi = Chem.MolToSmiles(m) mol = dm.to_mol(mSmi) except: pass if mol is None: try: mol.UpdatePropertyCache() mol = dm.sanitize_mol(mol) mSmi = Chem.MolToSmiles(m) mol = dm.to_mol(mSmi) except: pass if mSmi: yield mol, mSmi
def remove_dummies(mol: Chem.rdchem.Mol, dummy: str = "*") -> Optional[Chem.rdchem.Mol]: """Remove dummy atoms from molecules.""" du = dm.to_mol(dummy) out = mol try: out = Chem.ReplaceSubstructs(mol, du, dm.to_mol("[H]"), True)[0] out = Chem.RemoveHs(out) except Exception as e: out = Chem.DeleteSubstructs(mol, du) return out
def test_to_neutral(): smiles = "[NH4+]" mol = dm.to_mol(smiles, add_hs=False, explicit_only=False) smiles = dm.to_smiles(dm.to_neutral(mol)) assert smiles == "[NH4]" smiles = "O=C(c1ccccc1)[O-]" mol = dm.to_mol(smiles, add_hs=False, explicit_only=False) uncharged_mol = dm.to_neutral(mol) assert sum([a.GetFormalCharge() for a in uncharged_mol.GetAtoms()]) == 0
def test_to_mol(): smiles = "O=C(C)Oc1ccccc1C(=O)O" mol = dm.to_mol(smiles) assert mol.GetNumAtoms() == 13 smiles = "O=C(C)Oc1ccccc1C(=O)O" mol = dm.to_mol(smiles, add_hs=True) assert mol.GetNumAtoms() == 21 smiles = "fake_smiles" mol = dm.to_mol(smiles) assert mol is None
def assemble_fragment_order( fragmentlist, seen=None, allow_incomplete: bool = False, max_n_mols: float = float("inf"), RXNS=None, ): """Assemble a list of fragment into a set of possible molecules under rules defined by the brics algorithm ..note :: We are of course assuming: 1. that the order in the fragmentlist matter :D ! 2. that none of the fragment has explicitly defined hydrogen atoms. 3. only a list of unique molecule is internally maintained Args: fragmentlist: list of original fragments to grow seen: original molecules used as base. If none, the first element of fragment list will be poped out allow_incomplete: Whether to accept assembled molecules with missing fragment """ if RXNS is None: RXNS = ALL_BRICS_RETRO fragmentlist = list(fragmentlist) yield_counter = 0 if seen is None: seen = fragmentlist.pop(0) seen = [Chem.MolToSmiles(seen)] # only one molecule to assemble while yield_counter < max_n_mols and len(fragmentlist) > 0: # find all the way to add this fragment to seen frag = fragmentlist.pop(0) level_set = [dm.to_mol(x) for x in seen] seen = set() for sm in level_set: try: # there is no point in even trying something on molecules that cannot be kekulized for rxn in RXNS: for m, mSmi in _run_at_all_rct(rxn, frag, sm): if allow_incomplete and mSmi not in seen: yield m yield_counter += 1 seen.add(mSmi) except Exception as e: print(e) pass for m in seen: if yield_counter < max_n_mols: yield dm.to_mol(m) yield_counter += 1
def test_sdf_props_and_conformer_preserved(tmp_path): sdf_path = tmp_path / "test.sdf" # Generate an SDF file props = dict(test_int=588, test_str="hello") smiles = "CC1(C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O)O" mol = dm.to_mol(smiles) mol = dm.set_mol_props(mol, props) mol = dm.conformers.generate(mol, n_confs=1) pos = mol.GetConformer().GetPositions() dm.to_sdf(mol, sdf_path) # Read sdf file mols = dm.read_sdf(sdf_path) mol = mols[0] # Check properties assert mol.GetPropsAsDict() == props # Check conformer conf = mol.GetConformer() assert mol.GetNumConformers() == 1 assert conf.Is3D() np.testing.assert_almost_equal(conf.GetPositions(), pos, decimal=4)
def smiles_to_fingerprint(smiles): mol = dm.to_mol(str(smiles), ordered=True) # mol = dm.fix_mol(mol) # mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False) # mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True) fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect pars = { "radius": 2, "nBits": 8192, "invariants": [], "fromAtoms": [], "useChirality": False, "useBondTypes": True, "useFeatures": False, } fp = fingerprint_function(mol, **pars) standard_smiles = dm.to_smiles(mol) # row["selfies"] = dm.to_selfies(mol) # row["inchi"] = dm.to_inchi(mol) # row["inchikey"] = dm.to_inchikey(mol) achiral_fp = list(fp.GetOnBits()) return standard_smiles, achiral_fp
def test_fp_deprecated_args_warnings(): smiles = "CC(=O)Oc1ccccc1C(=O)O" mol = dm.to_mol(smiles) args = {} args["mol"] = mol args["radius"] = 3 args["fp_size"] = 2048 args["useFeatures"] = True args["as_array"] = True args["fp_type"] = "ecfp" with warnings.catch_warnings(record=True) as w: dm.to_fp(**args) assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning) assert "will be removed in datamol 0.5.0" in str(w[-1].message) args = {} args["mol"] = mol args["use_features"] = True args["as_array"] = True args["fp_type"] = "ecfp" with warnings.catch_warnings(record=True) as w: dm.to_fp(**args) assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning) assert "will be removed in datamol 0.5.0" in str(w[-1].message)
def test_to_fp(): smiles = "CC(=O)Oc1ccccc1C(=O)O" mol = dm.to_mol(smiles) assert dm.to_fp(mol).shape[0] == 2048 assert dm.to_fp(mol).sum() == 29
def recap( mol: Chem.Mol, remove_parent: bool = False, sanitize: bool = True, fix: bool = True, ): """Fragment the molecule using the recap algorithm. Args: mol: a molecule. remove_parent: Remove parent from the fragments. sanitize: Wether to sanitize the fragments. fix: Wether to fix the fragments. """ res = Recap.RecapDecompose(mol) frags = [dm.to_mol(x) for x in res.GetAllChildren().keys()] if fix: frags = [dm.fix_mol(x) for x in frags] if sanitize: frags = [dm.sanitize_mol(x) for x in frags] frags = [x for x in frags if x is not None] if remove_parent: return frags return [mol] + frags
def frag( mol: Chem.Mol, remove_parent: bool = False, sanitize: bool = True, fix: bool = True, ): """Generate all possible fragmentation of a molecule. Args: mol: a molecule. remove_parent: Remove parent from the fragments. sanitize: Wether to sanitize the fragments. fix: Wether to fix the fragments. """ frags = FraggleSim.generate_fraggle_fragmentation(mol) smiles = set([]) for seq in frags: smiles |= {s.strip() for s in seq.split(".")} smiles = list(sorted(smiles, reverse=True)) frags = [dm.to_mol(s) for s in smiles] if fix: frags = [dm.fix_mol(x) for x in frags] if sanitize: frags = [dm.sanitize_mol(x) for x in frags] frags = [x for x in frags if x is not None] if remove_parent: return frags return [mol] + frags
def test_copy_mol_props(): source = dm.to_mol("CCC") destination = dm.to_mol("CC") props = {} props["bool"] = True props["number"] = 55 props["float"] = 5.555 props["string"] = "hello" props["something_else"] = type(int) dm.set_mol_props(source, props) dm.copy_mol_props(source, destination) assert destination.GetPropsAsDict() == source.GetPropsAsDict()
def test_to_from_text(tmp_path): temp_file = tmp_path / "mols.smi" smiles_list = [ "Cn1c(=S)ccc2nc[nH]c21", "Clc1n[nH]c2c1=[NH+]C(c1ccc[nH+]c1)C[NH+]=2", "Fc1ccsc1", "N#Cc1cc2c(o1)[NH2+]CCN2Cn1cnc2c1CSCC2", "O=CN1CCC2NC=CC2C1", "Oc1[nH]nc2c1-n1ncnc1C2", "OC1=NNC2(OC=CCO2)C2(C3CCCc4nonc43)NN=NN12", "[NH-]Sc1cc2nc[nH+]cc2o1", "[NH3+]C12CNCCOC1(N1CCCCC1)C=C(F)NC2", ] mols = [dm.to_mol(m) for m in smiles_list] # Save from text and read from text dm.to_smi(mols, temp_file) loaded_mols = dm.read_smi(temp_file) loaded_smiles = [dm.to_smiles(m) for m in loaded_mols] assert loaded_smiles == smiles_list # Check error raised when list is empty with pytest.raises(ValueError): dm.to_smi([], temp_file, error_if_empty=True) temp_file.unlink() # Check file like object works too file_like = io.StringIO() dm.to_smi(mols, file_like) assert file_like.getvalue().strip().split("\n") == smiles_list
def test_adjust_singleton(): sm = "Cl.[N:1]1=CC(O)=CC2CCCCC12.CC.C" mol = dm.to_mol(sm) fixed_mol = dm.adjust_singleton(mol) assert len(Chem.rdmolops.GetMolFrags(fixed_mol)) == 2 assert fixed_mol.HasSubstructMatch( Chem.MolFromSmiles("CC")) # assert ethyl is there
def test_mmpa(): smiles = "CCCOCc1cc(c2ncccc2)ccc1" mol = dm.to_mol(smiles) frags = dm.fragment.mmpa_cut(mol) assert len(frags) == 39 assert "CCCOCc1cccc(-c2ccccn2)c1,C(C[*:2])[*:1],C[*:1].c1ccc(-c2cccc(CO[*:2])c2)nc1\n" in frags
def _preprocess(i, row): # print('hello') mol = dm.to_mol(str(row[smiles_column]), ordered=True) mol = dm.fix_mol(mol) mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False) mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True) fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect pars = { "radius": 2, "nBits": 8192, "invariants": [], "fromAtoms": [], "useChirality": True, "useBondTypes": True, "useFeatures": False, } fp = fingerprint_function(mol, **pars) row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol)) row["selfies"] = dm.to_selfies(mol) row["inchi"] = dm.to_inchi(mol) row["inchikey"] = dm.to_inchikey(mol) row["onbits_fp"] =list(fp.GetOnBits()) return row
def test_all_bond_remove(): smiles = "OC1=CC2CCCCC2[N:1]=C1" mol = dm.to_mol(smiles) mols = dm.actions.all_bond_remove(mol) assert isinstance(mols, list)
def test_standardize_mol(): sm = "[Na]OC1=CC2CCCCC2N=C1" sm_standard = dm.to_smiles(dm.standardize_smiles(sm)) standard_mol = dm.standardize_mol(dm.to_mol(sm), disconnect_metals=True, uncharge=True) mol_standard = dm.to_smiles(Chem.MolToSmiles(standard_mol)) assert sm_standard == mol_standard
def test_enumerate_tautomers(): mol = dm.to_mol("OC1=CC2CCCCC2[N:1]=C1") mols = dm.enumerate_tautomers(mol, n_variants=10) assert {dm.to_smiles(m) for m in mols } == {"O=C1C=[N:1]C2CCCCC2C1", "OC1=CC2CCCCC2[N:1]=C1"}
def test_sanitize(): smiles = "CC(=O)Oc1ccccc1C(=O)O" mol = dm.to_mol(smiles, sanitize=False) mol = dm.sanitize_mol(mol, charge_neutral=True) assert dm.to_smiles(mol) == "CC(=O)Oc1ccccc1C(=O)O" mol = dm.sanitize_mol(None, charge_neutral=True) assert mol is None smiles_list = ( "CC.[H][N:1]1(C)=CC(O)=CC2CCCCC12", # broken "O=c1ccc2ccccc2n1", # sanitize "Cc1nnnn1C", # none "CCc1ccc2nc(=O)c(cc2c1)Cc1nnnn1C1CCCCC1", # sanitize "c1cnc2cc3ccnc3cc12", # none "c1cc2cc3ccnc3cc2n1", # none "O=c1ccnc(c1)-c1cnc2cc3ccnc3cc12", # sanitize "O=c1ccnc(c1)-c1cc1", # broken ) # check sanitize_mol assert dm.to_mol(smiles_list[1]) is None assert dm.to_mol(smiles_list[2]) is not None assert dm.sanitize_mol(None) is None assert dm.sanitize_mol(dm.to_mol(smiles_list[0], sanitize=False)) is None assert dm.sanitize_mol(dm.to_mol(smiles_list[1], sanitize=False)) is not None assert dm.sanitize_mol(dm.to_mol(smiles_list[2], sanitize=False)) is not None mol_2 = dm.sanitize_mol(dm.to_mol(smiles_list[1], sanitize=False)) assert dm.to_smiles(mol_2) == dm.sanitize_smiles("O=c1ccc2ccccc2[nH]1") fixed_smiles = [dm.sanitize_smiles(smiles) for smiles in smiles_list] assert len([x for x in fixed_smiles if x is not None]) == 6
def test_get_all_path_between(): smiles = "c1cc2cccccc2c1" mol = dm.to_mol(smiles) all_paths = dm.get_all_path_between(mol, 8, 4, ignore_cycle_basis=False) assert all_paths == [[8, 2, 3, 4], [8, 7, 6, 5, 4], [8, 9, 0, 1, 2, 3, 4]] all_paths = dm.get_all_path_between(mol, 8, 4, ignore_cycle_basis=True) assert all_paths == [[8, 2, 3, 4], [8, 7, 6, 5, 4]]
def test_randomize_atoms(): smiles = "c1ccc(C(=O)O)c(c1)OC(=O)C" mol = dm.to_mol(smiles) orders = [a.GetAtomicNum() for a in mol.GetAtoms()] randomized_mol = dm.randomize_atoms(mol) randomized_orders = [a.GetAtomicNum() for a in randomized_mol.GetAtoms()] assert sum(orders) == sum(randomized_orders)
def test_reorder_atoms(): smiles = "c1ccc(C(=O)O)c(c1)OC(=O)C" mol = dm.to_mol(smiles, add_hs=False, explicit_only=False) orders = [a.GetAtomicNum() for a in mol.GetAtoms()] assert orders == [6, 6, 6, 6, 6, 8, 8, 6, 6, 8, 6, 8, 6] mol = dm.reorder_atoms(mol) orders = [a.GetAtomicNum() for a in mol.GetAtoms()] assert orders == [6, 8, 8, 8, 6, 6, 6, 6, 8, 6, 6, 6, 6]
def test_to_sdf_single_mol(tmp_path): sdf_path = tmp_path / "test.sdf" smiles = "CC1(C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O)O" mol = dm.to_mol(smiles) dm.to_sdf(mol, sdf_path) mols = dm.read_sdf(sdf_path) assert dm.to_smiles(mol) == dm.to_smiles(mols[0])
def test_cluster_mols(): # Get some mols data = dm.data.freesolv() smiles = data["smiles"].iloc[:100].tolist() mols = [dm.to_mol(s) for s in smiles] _, mol_clusters = dm.cluster_mols(mols, cutoff=0.7) cluster_sizes = [15, 12, 3, 6, 9, 9, 4, 1, 4, 3, 3, 2, 3] assert [len(c) for c in mol_clusters[:13]] == cluster_sizes
def test_pick_centroids(): data = dm.data.freesolv() smiles = data["smiles"].iloc[:100].tolist() mols = [dm.to_mol(s) for s in smiles] indices, centroids = dm.pick_centroids( mols, npick=18, threshold=0.7, method="sphere", n_jobs=-1 ) excepted_indices = np.array([0, 1, 2, 3, 4, 5, 8, 11, 13, 15, 16, 17, 18, 19, 21, 23, 25, 32]) assert np.all(indices == excepted_indices)
def test_sanitize_mol_multiple_conformers_no_warning(caplog): # Generate a mol with props and a conformer smiles = "CCC[N+](=O)[O-]" mol = dm.to_mol(smiles) mol = dm.conformers.generate(mol, n_confs=10) # Check no warning log dm.sanitize_mol(mol, verbose=False) assert caplog.text == ""
def test_to_smarts(): smiles = "O=C(C)Oc1ccccc1C(=O)O" mol = dm.to_mol(smiles) smarts = dm.to_smarts(mol, keep_hs=True) assert smarts == "[CH3]-[C](=[O])-[O]-[c]1:[cH]:[cH]:[cH]:[cH]:[c]:1-[C](=[O])-[OH]" smarts = dm.to_smarts(mol, keep_hs=False) assert smarts == "[CH3]-[C](=[O])-[O]-[c]1:[cH]:[cH]:[cH]:[cH]:[c]:1-[C](=[O])-[OH]" assert dm.to_smarts(None) is None
def test_break_mol(): smiles = "CCCOCc1cc(c2ncccc2)ccc1" mol = dm.to_mol(smiles) fragments, *_, tree = dm.fragment.break_mol(mol, randomize=False, mode="brics", returnTree=True) assert fragments == ["CCC", "O", "C", "c1ccncc1", "c1ccccc1"] assert list(tree.nodes) == [0, 1, 2, 3, 4, 5, 6, 7, 8] assert list(tree.edges) == [(0, 1), (0, 2), (2, 3), (2, 4), (4, 5), (4, 6), (6, 7), (6, 8)]