def test_layer_choice(layers): mol1 = mol_toolkit.MolFromSmiles('CCC') mol2 = mol_toolkit.MolFromSmiles('CCCC') # cluster 1 has bond 0-1 in mol1 # cluster 2 has bond 0-1 and 2-3 in mol2 # this should always need 1 layer to work clusters = [('1', [[(0, 1)], []]), ('2', [[], [(0, 1), (2, 3)]])] red = SMIRKSifier([mol1, mol2], clusters, max_layers=layers) assert red.layers == 1
def test_failed_layers(): mol1 = mol_toolkit.MolFromSmiles('CCC') mol2 = mol_toolkit.MolFromSmiles('CCCC') # cluster 1 has bond 0-1 in mol1 # cluster 2 has bond 0-1 and 2-3 in mol2 # the only way to distinguish these is # with more than 1 layer so max_layers = 0 will fail clusters = [('1', [[(0, 1)], []]), ('2', [[], [(0, 1), (3, 2)]])] with pytest.raises(ClusteringError): red = SMIRKSifier([mol1, mol2], clusters, max_layers=0) print(red.current_smirks) print_smirks(red.current_smirks)
def test_no_fail_fragment(smile, layers): mol = mol_toolkit.MolFromSmiles(smile) smirks_atoms = (0, 1) c = ChemPerGraphFromMol(mol, smirks_atoms, layers) assert c.add_atom(None) is None smirks_atoms = (0, ) c = ChemPerGraphFromMol(mol, smirks_atoms, layers)
def test_bond(): mol = mol_toolkit.MolFromSmiles('C') print('made molecule') bond = mol.get_bond_by_index(0) assert bond.get_order() == 1 atoms = bond.get_atoms() assert len(atoms) == 2 assert not bond.is_ring() assert not bond.is_aromatic() assert bond.is_single() assert not bond.is_double() assert not bond.is_triple() mol = bond.get_molecule() smiles = mol.get_smiles() assert smiles == "C" print('trying to get index') assert bond.get_index() == 0 print('past bond index')
def test_bad_smirks(): """ Check a ValueError is raised with improper SMIRKS """ mol = mol_toolkit.MolFromSmiles('C') with pytest.raises(ValueError): mol.smirks_search(']X[')
def test_single_molecule_graph(smiles, layers, expected): mol = mol_toolkit.MolFromSmiles(smiles) smirks_dict = {1:0, 2:1} graph = ChemPerGraphFromMol(mol, smirks_dict, layers) smirks = graph.as_smirks() print(smirks) assert smirks == expected
def test_no_fail_cluster(smiles_list, layers): smirks_atom_lists1 = [[(0, 1), (1, 2)]] * len(smiles_list) smirks_atom_lists2 = [[(0, ), (1, ), (2, )]] * len(smiles_list) mols_list = [mol_toolkit.MolFromSmiles(s) for s in smiles_list] c1 = ClusterGraph(mols_list, smirks_atom_lists1, layers=layers) c2 = ClusterGraph(mols_list, smirks_atom_lists2, layers=layers) assert c1.add_atom(None) is None assert c2.add_atom(None) is None
def test_mols_mismatch(): """ tests that an exception is raised when the number of molecules and the number of smirks dictionaries is not equal """ mols_list = [mol_toolkit.MolFromSmiles('CC')] smirks_atom_lists = [[(0, 1)], [(1, 2)]] with pytest.raises(Exception): ClusterGraph(mols_list, smirks_atom_lists)
def test_expected_removal(in_smirks): # create reducer mol = mol_toolkit.MolFromSmiles('C') smirks_list = [('a', '[#6AH4X4x0!r+0:1]-;!@[#1AH0X1x0!r+0:2]')] red = Reducer(smirks_list, [mol]) # check only OR base to remove red_smirks, changed = red.remove_decorator(in_smirks) while not changed: red_smirks, changed = red.remove_decorator(in_smirks) assert red_smirks == "[*:1]~[*:2]"
def test_max_reduction(smiles): """ starting from a single atom with no layers, you should get [*:1] in a minimum of 7 steps """ mol = mol_toolkit.MolFromSmiles(smiles) cluster_lists = [('1', [[(0, )]])] # create reducer red = SMIRKSifier([mol], cluster_lists, max_layers=0) smirks_list = red.reduce(10) final_smirks = smirks_list[0][1] assert final_smirks == '[*:1]'
def make_cluster_graph(smiles_list, layers=0): """ Generates a chemper Mol for each of the smiles in smiles_list and then uses those Mols to build a ClusterGraph where the same smirks indices are used for all Mols. Specifically, atom 0 is assigned SMIRKS index 1 and atom 1 is assigned SMIRKS index 2. The variable layers is used to set the number of atoms away from the indexed atoms to include. For example if layers is 0 then only the SMIRKS indexed atoms are included in the graph; and if layers is 1 then atoms 1 bond away from the indexed atoms are included, and so forth. Layers can also be "all" which will lead to all atoms in the molecule being added to the graph. """ smirks_atom_lists = [[(0, 1)]] * len(smiles_list) mols_list = [mol_toolkit.MolFromSmiles(smiles) for smiles in smiles_list] return ClusterGraph(mols_list, smirks_atom_lists, layers=layers)
def make_frag_graph(smiles, layers): """ Generates a chemper Mol from the provided smiles and then uses that Mol to build a ChemPerGraph where atom 0 is assigned SMIRKS index 1 and atom 1 is assigned SMIRKS index 2. The variable layers is used to set the number of atoms away from the indexed atoms to include. For example if layers is 0 then only the SMIRKS indexed atoms are included in the graph; and if layers is 1 then atoms 1 bond away from the indexed atoms are included, and so forth. Layers can also be "all" which will lead to all atoms in the molecule being added to the graph. """ mol = mol_toolkit.MolFromSmiles(smiles) smirks_atoms = (0, 1) return ChemPerGraphFromMol(mol, smirks_atoms, layers)
def test_more_complex_reducer(): """ Check that all SMIRKSifier class functions at least work """ smiles = ['CC', 'C=C', 'C#C'] mols = [mol_toolkit.MolFromSmiles(s) for s in smiles] c1 = [[(0, 1)]] * len(smiles) c2 = [[(0, 2)]] * len(smiles) cluster_lists = [('1', c1), ('2', c2)] # create reducer red = SMIRKSifier(mols, cluster_lists, verbose=False) # make sure printing runs: print_smirks(red.current_smirks) # run for a long time (assumed to hit all possible methods) smirks_list = red.reduce(2000)
def test_smirks_search(): """ test SMIRKS searching """ mol = mol_toolkit.MolFromSmiles('C') # smirks for C-H bond smirks = "[#6:1]-[#1:2]" matches = mol.smirks_search(smirks) assert len(matches) == 4 for match in matches: assert 1 in match assert 2 in match
def test_reducer_methods(): """ Due to the random nature of this method, we should explicitly check each method """ # practice reducer mol = mol_toolkit.MolFromSmiles('C') red = Reducer([('a', '[*:1]~[*:2]')], [mol]) # check generic SMIRKS output out_smirks, changed = red.remove_decorator("[*:1]~[*:2]") assert out_smirks == "[*:1]~[*:2]" assert not changed # check explicit output for removing "OR" new, changed = red.remove_or([]) assert not changed # check explicit output for removing "AND" new, changed = red.remove_and([]) assert not changed # check is_bond option for remove_or new, changed = red.remove_or([('-', [])], True) assert changed assert new == [] # check top method new, changed = red.remove_or([('#6', ['X4'])]) assert changed input_ors = [('#6', ['X4']), ('#7', ['X3'])] fun_output = [ (red.remove_all_dec_type(copy.deepcopy(input_ors)), [('#6', []), ('#7', [])]), (red.remove_all_bases(copy.deepcopy(input_ors)), [('*', ['X4']), ('*', ['X3'])]), (red.remove_ref(copy.deepcopy(input_ors), 0), [('#7', ['X3'])]), (red.remove_ref_sub_decs(copy.deepcopy(input_ors), 0), [('#6', []), ('#7', ['X3'])]), (red.remove_one_sub_dec(copy.deepcopy(input_ors), 0), [('#6', []), ('#7', ['X3'])]) ] for output, expected in fun_output: assert output == expected
def test_molecule(): """ Test MolOE functions """ mol = mol_toolkit.MolFromSmiles('C') atoms = 0 for a in mol.get_atoms(): atoms += 1 assert atoms == 5 bonds = 0 for b in mol.get_bonds(): bonds += 1 assert bonds == 4 carbon = mol.get_atom_by_index(0) bond = mol.get_bond_by_index(0) smiles = mol.get_smiles() assert smiles == "C"
def test_atom(): mol = mol_toolkit.MolFromSmiles('C') atom = mol.get_atom_by_index(0) assert atom.atomic_number() == 6 assert atom.degree() == 4 assert atom.connectivity() == 4 assert atom.valence() == 4 assert atom.formal_charge() == 0 assert atom.hydrogen_count() == 4 assert atom.ring_connectivity() == 0 assert atom.min_ring_size() == 0 assert not atom.is_aromatic() assert atom.get_index() == 0 neighbors = atom.get_neighbors() assert len(neighbors) == 4 atom2 = neighbors[0] assert atom.is_connected_to(atom2) assert len(atom.get_bonds()) == 4 # at least run get_molecule function, not sure how to check this mol = atom.get_molecule() smiles = mol.get_smiles() assert smiles == "C"
def test_bad_smiles(): """ Check a ValueError is raised with a bad SMILES """ with pytest.raises(ValueError): mol = mol_toolkit.MolFromSmiles('ZZZ')
from chemper.mol_toolkits import mol_toolkit from chemper.graphs.fragment_graph import ChemPerGraphFromMol mol = mol_toolkit.MolFromSmiles( 'C=C') # note this adds explicit hydrogens to your molecule atoms = (0, 1) graph = ChemPerGraphFromMol(mol, atoms, layers=1) print(graph.as_smirks) # [#6AH2X3x0r0+0:1](-!@[#1AH0X1x0r0+0])(-!@[#1AH0X1x0r0+0])=!@[#6AH2X3x0r0+0:2](-!@[#1AH0X1x0r0+0])-!@[#1AH0X1x0r0+0]
from chemper.mol_toolkits import mol_toolkit from chemper.graphs.cluster_graph import ClusterGraph mol1 = mol_toolkit.MolFromSmiles('CCC') mol2 = mol_toolkit.MolFromSmiles('CCCCC') atoms1 = [(0, 1)] atoms2 = [(0, 1), (1, 2)] graph = ClusterGraph([mol1, mol2], [atoms1, atoms2]) print(graph.as_smirks()) # "[#6AH2X4x0r0+0,#6AH3X4x0r0+0:1]-;!@[#6AH2X4x0r0+0:2]"