def test_deterministic_deletion(): init_smiles = ['CC1=CC(C2=C(C#N)N3N=C(C4CC4)SC3=N2)=CC=C1Cl'] leaf_idxs = [3] test_targets = ['CC1=NN2C(C#N)=C(C3=CC=C(Cl)C(C)=C3)N=C2S1'] for smiles, leaf_idx, target in zip(init_smiles, leaf_idxs, test_targets): mol = me.get_mol(smiles) new_mol = me.delete_random_leaf(mol, act=action.Delete(leaf_idx)) target_mol = me.get_mol(target) assert me.comp_mols(target_mol, new_mol)
def test_deletion(): initial_smile = 'CCC(NC(=O)c1scnc1C1CC1)C(=O)N1CCOCC1' test_targets = [ 'CCC(NC(=O)c1c(C2CC2)ncs1)C(N)=O', 'CCC(CN)NC(=O)c1c(C2CC2)ncs1', 'CC(CN)NC(=O)c1c(C2CC2)ncs1', 'CC(CN)NCc1c(C2CC2)ncs1' ] mol = me.get_mol(initial_smile) for i in range(4): target_mol = me.get_mol(test_targets[i]) rng = np.random.RandomState(i) mol = me.delete_random_leaf(mol, rng) assert me.comp_mols(target_mol, mol)
def test_vocabulary_legal_at_atom(): vocab = vocabulary.Vocabulary() test_mol = [me.get_mol(s) for s in test_smiles] for mol in test_mol: for atom in mol.GetAtoms(): assert vocab.legal_at_atom(atom) == me.legal_at_atom(mol, atom, vocab.vocab)
def test_atom_incidence_sparse(imp): from scipy import sparse mol = me.get_mol( 'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O') rng = np.random.RandomState(42) num_atoms = mol.GetNumAtoms() num_bonds = mol.GetNumBonds() num_bond_emb = 2 * mol.GetNumBonds() bond_embedding = rng.randn(num_bond_emb + 1, 4) bond_embedding[0, :] = 0 result_dense = np.zeros((num_atoms, 6), dtype=np.int32) imp.fill_atom_bond_list(result_dense, mol, 6) result_sparse_idx = np.zeros((2, 2 * num_bonds), dtype=np.int32) result_sparse_values = np.ones(2 * num_bonds, dtype=np.float32) imp.fill_atom_bond_list_sparse(result_sparse_values, result_sparse_idx, mol) result_sparse_values = np.ones(2 * num_bonds, dtype=np.float32) result_sparse = sparse.coo_matrix( (result_sparse_values, result_sparse_idx), shape=(num_atoms, num_bond_emb)) atom_emb_sparse = result_sparse.dot(bond_embedding[1:]) atom_emb_dense = np.sum(np.take(bond_embedding, result_dense.flat, axis=0).reshape(result_dense.shape + (4, )), axis=1) assert atom_emb_sparse.shape == atom_emb_dense.shape assert np.allclose(atom_emb_sparse, atom_emb_dense)
def test_mol2graph_single_rings(): mol = me.get_mol( 'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O') result = mr.mol2graph_single(mol, include_rings=True) assert 'ring_bond_idx' in result assert 'ring_bond_order' in result assert len(result['ring_bond_idx']) == 27 * 2
def test_action_to_integer_roundtrip_delete(): mol = me.get_mol( 'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1') vocab = vocabulary.Vocabulary() act = me.enumerate_deletion_actions(mol)[1] action_roundtrip = roundtrip_action(act, vocab, mol) assert list(act.to_array()) == list(action_roundtrip.to_array())
def test_delete_inverse_bond(): smile = 'C1=CC(=CC=C1)C3=C(C#N)[N]2N=CSC2=N3' vocab = data_utils.get_vocab() act = action.Delete(leaf_idx=2) mol = me.get_mol(smile) result, inverse = me.compute_deletion(mol, act, return_inverse=True) mol_roundtrip = me.compute_insert(result, inverse, vocab) assert me.comp_mols(mol, mol_roundtrip)
def test_vocabulary_legal_at_bond(): vocab = vocabulary.Vocabulary() test_mol = [me.get_mol(s) for s in test_smiles] for mol in test_mol: for bond in mol.GetBonds(): if not bond.IsInRing(): continue assert vocab.legal_at_bond(bond) == me.legal_at_bond(mol, bond, vocab.vocab)
def test_combine_graphs_leaf_rings_singleton_sequence(): mol = me.get_mol( 'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O') result = mr.mol2graph_single(mol, include_leaves=True) result = mr.combine_mol_graph([result]) assert 'leaf_ring' in result assert 'leaf_atom' in result assert np.all(result['leaf_ring_scope'] == np.array([[0, 2]]))
def test_combine_graphs_bond_rings(): mol = me.get_mol( 'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O') result = mr.mol2graph_single(mol, include_leaves=True, include_rings=True) result = mr.combine_mol_graph([result]) assert 'ring_bond_idx' in result assert 'ring_bond_order' in result assert np.allclose(result['ring_scope'], np.array([[0, 27 * 2]]))
def test_delete_inverse_kekulize(): smile = 'C1=CC(=CC=C1)C4=C(C#N)[N]3N=C(C2CC2)SC3=N4' vocab = data_utils.get_vocab() act = action.Delete(leaf_idx=1) mol = me.get_mol(smile) result, inverse = me.compute_deletion(mol, act, return_inverse=True) mol_roundtrip = me.compute_insert(result, inverse, vocab) assert me.comp_mols(mol, mol_roundtrip)
def test_multi_insert_deterministic_and_inverses(): vocab = data_utils.get_vocab() rng = np.random.RandomState(7) init_smiles = [ 'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1', 'COCC[C@@H](C)C(=O)N(C)Cc1ccc(O)cc1', 'C=CCn1c(S[C@H](C)c2nc3sc(C)c(C)c3c(=O)[nH]2)nnc1C1CC1', 'C[NH+](C/C=C/c1ccco1)CCC(F)(F)F', 'COc1ccc(N2C(=O)C(=O)N(CN3CCC(c4nc5ccccc5s4)CC3)C2=O)cc1', 'Cc1ccc([C@@H](C)[NH2+][C@H](C)C(=O)Nc2ccccc2F)cc1', 'O=c1cc(C[NH2+]Cc2cccc(Cl)c2)nc(N2CCCC2)[nH]1', 'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O', 'O=C(Nc1cccc(S(=O)(=O)N2CCCCC2)c1)c1cc(F)c(F)cc1Cl', 'CC(C)Cc1nnc(NC(=O)C(=O)NCCC2CCCCC2)s1', 'C[C@H](NC(=O)[C@@H](O)c1ccccc1)c1nnc2ccccn12', 'O=S(=O)(Nc1cc(F)ccc1F)c1ccc(Cl)cc1F', 'CSc1cc(C(=O)N2c3ccccc3NC(=O)C[C@@H]2C)ccn1', 'CCCN1C(=O)c2[nH]nc(-c3cc(Cl)ccc3O)c2[C@H]1c1ccc(C)cc1', 'CC[S@@](=O)[C@@H]1CCC[C@H](NC(=O)N(Cc2cccs2)C2CC2)C1', 'C[C@@H](c1ccco1)[NH+](Cc1ncc(-c2ccccc2)o1)C1CC1', 'COc1ccc(Cc2nnc(SCC(=O)N3CCC[C@@H](C)C3)o2)cc1', 'O=C(/C=C/c1ccc2c(c1)OCO2)NC[C@@H]1C[NH+]2CCN1CC2', 'COc1ccccc1/C=C/C=C(\\C#N)C(=O)Nc1ccc(C(=O)N(C)C)cc1', 'Cc1cccc(NC(=S)N2CC[NH+](C)CC2)c1C' ] for idx, smiles in enumerate(init_smiles): mol = me.get_mol(smiles) result, act, inverse = me.insert_random_node(mol, vocab, rng=rng, return_action=True, return_inverse=True) result_deterministic = me.compute_insert(mol, act, vocab) assert me.comp_mols(result, result_deterministic) mol_roundtrip = me.delete_random_leaf(result, act=inverse) assert me.comp_mols(mol, mol_roundtrip) # 2nd insertion result2, act2, inverse2 = me.insert_random_node(result, vocab, rng=rng, return_action=True, return_inverse=True) result2_deterministic = me.compute_insert(result, act2, vocab) assert me.comp_mols(result2, result2_deterministic) mol_roundtrip2 = me.delete_random_leaf(result2, act=inverse2) assert me.comp_mols(result, mol_roundtrip2)
def test_action_to_integer_roundtrip_insert_atom(): mol = me.get_mol( 'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1') vocab = vocabulary.Vocabulary() act = me.generate_random_atom_insert(mol, mol.GetAtomWithIdx(1), vocab, rng=np.random.RandomState(20)) action_roundtrip = roundtrip_action(act, vocab, mol) assert list(act.to_array()) == list(action_roundtrip.to_array())
def test_fill_atom_features(): mol = me.get_mol( 'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1') num_atoms = mol.GetNumAtoms() result_py = np.zeros((num_atoms, mr.ATOM_FDIM), dtype=np.float32) result_c = np.zeros((num_atoms, mr.ATOM_FDIM), dtype=np.float32) imp_py.fill_atom_features(result_py, mol) imp_c.fill_atom_features(result_c, mol) assert np.allclose(result_py, result_c)
def test_atom_bond_incidence_segment_reference(): mol = me.get_mol( 'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O') scopes, index = mr.atom_bond_list_segment(mol) a_graph, _ = mol_incidence_reference(mol) assert scopes.shape[0] == a_graph.shape[0] for i in range(scopes.shape[0]): assert np.all(index[scopes[i, 0]:scopes[i, 0] + scopes[i, 1]] == a_graph[i, a_graph[i] >= 0])
def test_bond_embedding(): mol = me.get_mol( 'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1') expected_0 = [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.] expected_8 = [1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0.] result_0 = mr.bond_features(mol.GetBondWithIdx(0)) result_8 = mr.bond_features(mol.GetBondWithIdx(8)) assert expected_0 == list(result_0) assert expected_8 == list(result_8)
def test_combine_graphs(request): data = get_data(request) result = list( mr.combine_mol_graph([ mr.mol2graph_single(me.get_mol(s)) for s in data['smiles'] ]).values()) expected = data['graph_stereo'] def _compare_tensor(a, b): return np.allclose(a, b[:a.shape[0], :]) assert _compare_tensor(result[0], expected[0])
def test_mol2graph_single(request): data = get_data(request) mol = me.get_mol(data['smiles'][0]) result = list(mr.mol2graph_single(mol).values()) expected = data['graph_nostereo'] def _compare_tensor(a, b): return np.allclose(a, b[:a.shape[0], :]) assert _compare_tensor(result[0], expected[0]) assert _compare_tensor(result[1], expected[1][1:])
def test_bond_incidence_segment_reference(): mol = me.get_mol( 'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O') scopes, index = mr.bond_incidence_list_segment(mol) _, b_graph = mol_incidence_reference(mol) assert scopes.shape[0] == b_graph.shape[0] for i in range(scopes.shape[0]): incidence_segment = index[scopes[i, 0]:scopes[i, 0] + scopes[i, 1]] incidence_graph = b_graph[i, b_graph[i] >= 0] assert len(incidence_segment) == len(incidence_graph) assert np.all(incidence_segment == incidence_graph)
def test_delete_inverse(): smile = 'CC1=CC(C2=C(C#N)N3N=C(C4CC4)SC3=N2)=CC=C1Cl' vocab = data_utils.get_vocab() rng = np.random.RandomState(7) mol = me.get_mol(smile) result, act, inverse = me.delete_random_leaf(mol, rng=rng, return_action=True, return_inverse=True) mol_roundtrip = me.insert_random_node(result, vocab, act=inverse) assert me.comp_mols(mol, mol_roundtrip)
def test_mol2graph_single_rings_leaves(): mol = me.get_mol( 'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O') result = mr.mol2graph_single(mol, include_leaves=True) assert 'leaf_ring' in result assert 'leaf_atom' in result assert result['leaf_atom'].tolist() == [0, 7, 8, 16, 25, 33] assert result['leaf_ring'][0][0].tolist() == [0] * 6 + [1] * 6 assert result['leaf_ring'][0][1].tolist() == [9, 10, 11, 12, 13, 14 ] + [26, 27, 28, 29, 30, 31] assert result['leaf_ring'][1].tolist() == [pytest.approx(1 / np.sqrt(6)) ] * 12
def test_atom_insert_deterministic(): smile = 'CC1=CC(C2=C(C#N)N3N=C(C4CC4)SC3=N2)=CC=C1Cl' vocab = data_utils.get_vocab() rng = np.random.RandomState(7) mol = me.get_mol(smile) atom = mol.GetAtomWithIdx(13) result, act = me.insert_at_atom(mol, atom, vocab, rng=rng, return_action=True) result_deterministic = me.compute_insert_atom(mol, act, vocab) assert me.get_smiles(result) == me.get_smiles(result_deterministic)
def test_atom_bond_list_segment(): mol = me.get_mol( 'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O') scopes_c = np.empty((mol.GetNumAtoms(), 2), dtype=np.int32) index_c = np.empty(2 * mol.GetNumBonds(), dtype=np.int32) imp_c.fill_atom_bond_list_segment(scopes_c, index_c, mol) scopes_py = np.empty((mol.GetNumAtoms(), 2), dtype=np.int32) index_py = np.empty(2 * mol.GetNumBonds(), dtype=np.int32) imp_py.fill_atom_bond_list_segment(scopes_py, index_py, mol) assert np.all(scopes_c == scopes_py) assert np.all(index_c == index_py)
def test_action_canonical_roundtrip(seed): mol = me.get_mol( 'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1') vocab = vocabulary.Vocabulary() act = me.generate_random_atom_insert(mol, mol.GetAtomWithIdx(1), vocab, rng=np.random.RandomState(seed)) encoder = ar.VocabInsertEncoder(vocab) result, offsets, lengths = action_mol_to_integer(act, mol, encoder) action_roundtrip = ar.integer_to_action(result, lengths, encoder) assert (action_mol_to_integer(act, mol, encoder)[0] == action_mol_to_integer( action_roundtrip, mol, encoder)[0])
def test_insert_atom_inverse(): smile = 'CC1=CC(C2=C(C#N)N3N=C(C4CC4)SC3=N2)=CC=C1Cl' vocab = data_utils.get_vocab() rng = np.random.RandomState(7) mol = me.get_mol(smile) atom = mol.GetAtomWithIdx(13) result, act, inverse = me.insert_at_atom(mol, atom, vocab, rng=rng, return_action=True, return_inverse=True) result = chemutils.sanitize(result) mol_roundtrip = me.compute_deletion(result, inverse) assert me.comp_mols(mol, mol_roundtrip)
def test_action_canonical_actions_nitrogen(): mol = me.get_mol('C1CNCCNC1') atom_equiv, num_equiv = ar.compute_canonical_atom_insert_locations(mol) assert num_equiv == 4
def test_action_canonical_actions(): mol = me.get_mol('C1CCC1') atom_equiv, num_equiv = ar.compute_canonical_atom_insert_locations(mol) assert num_equiv == 1 assert list(atom_equiv) == [0] * 4