Example #1
0
def test_deterministic_deletion():
    init_smiles = ['CC1=CC(C2=C(C#N)N3N=C(C4CC4)SC3=N2)=CC=C1Cl']
    leaf_idxs = [3]
    test_targets = ['CC1=NN2C(C#N)=C(C3=CC=C(Cl)C(C)=C3)N=C2S1']

    for smiles, leaf_idx, target in zip(init_smiles, leaf_idxs, test_targets):
        mol = me.get_mol(smiles)
        new_mol = me.delete_random_leaf(mol, act=action.Delete(leaf_idx))
        target_mol = me.get_mol(target)
        assert me.comp_mols(target_mol, new_mol)
Example #2
0
def test_deletion():
    initial_smile = 'CCC(NC(=O)c1scnc1C1CC1)C(=O)N1CCOCC1'

    test_targets = [
        'CCC(NC(=O)c1c(C2CC2)ncs1)C(N)=O', 'CCC(CN)NC(=O)c1c(C2CC2)ncs1',
        'CC(CN)NC(=O)c1c(C2CC2)ncs1', 'CC(CN)NCc1c(C2CC2)ncs1'
    ]

    mol = me.get_mol(initial_smile)
    for i in range(4):
        target_mol = me.get_mol(test_targets[i])
        rng = np.random.RandomState(i)
        mol = me.delete_random_leaf(mol, rng)
        assert me.comp_mols(target_mol, mol)
Example #3
0
def test_vocabulary_legal_at_atom():
    vocab = vocabulary.Vocabulary()
    test_mol = [me.get_mol(s) for s in test_smiles]

    for mol in test_mol:
        for atom in mol.GetAtoms():
            assert vocab.legal_at_atom(atom) == me.legal_at_atom(mol, atom, vocab.vocab)
Example #4
0
def test_atom_incidence_sparse(imp):
    from scipy import sparse
    mol = me.get_mol(
        'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O')

    rng = np.random.RandomState(42)

    num_atoms = mol.GetNumAtoms()
    num_bonds = mol.GetNumBonds()
    num_bond_emb = 2 * mol.GetNumBonds()

    bond_embedding = rng.randn(num_bond_emb + 1, 4)
    bond_embedding[0, :] = 0

    result_dense = np.zeros((num_atoms, 6), dtype=np.int32)
    imp.fill_atom_bond_list(result_dense, mol, 6)

    result_sparse_idx = np.zeros((2, 2 * num_bonds), dtype=np.int32)
    result_sparse_values = np.ones(2 * num_bonds, dtype=np.float32)
    imp.fill_atom_bond_list_sparse(result_sparse_values, result_sparse_idx,
                                   mol)
    result_sparse_values = np.ones(2 * num_bonds, dtype=np.float32)

    result_sparse = sparse.coo_matrix(
        (result_sparse_values, result_sparse_idx),
        shape=(num_atoms, num_bond_emb))

    atom_emb_sparse = result_sparse.dot(bond_embedding[1:])
    atom_emb_dense = np.sum(np.take(bond_embedding, result_dense.flat,
                                    axis=0).reshape(result_dense.shape +
                                                    (4, )),
                            axis=1)

    assert atom_emb_sparse.shape == atom_emb_dense.shape
    assert np.allclose(atom_emb_sparse, atom_emb_dense)
Example #5
0
def test_mol2graph_single_rings():
    mol = me.get_mol(
        'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O')
    result = mr.mol2graph_single(mol, include_rings=True)

    assert 'ring_bond_idx' in result
    assert 'ring_bond_order' in result

    assert len(result['ring_bond_idx']) == 27 * 2
def test_action_to_integer_roundtrip_delete():
    mol = me.get_mol(
        'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1')
    vocab = vocabulary.Vocabulary()
    act = me.enumerate_deletion_actions(mol)[1]

    action_roundtrip = roundtrip_action(act, vocab, mol)

    assert list(act.to_array()) == list(action_roundtrip.to_array())
Example #7
0
def test_delete_inverse_bond():
    smile = 'C1=CC(=CC=C1)C3=C(C#N)[N]2N=CSC2=N3'
    vocab = data_utils.get_vocab()
    act = action.Delete(leaf_idx=2)

    mol = me.get_mol(smile)
    result, inverse = me.compute_deletion(mol, act, return_inverse=True)
    mol_roundtrip = me.compute_insert(result, inverse, vocab)

    assert me.comp_mols(mol, mol_roundtrip)
Example #8
0
def test_vocabulary_legal_at_bond():
    vocab = vocabulary.Vocabulary()
    test_mol = [me.get_mol(s) for s in test_smiles]

    for mol in test_mol:
        for bond in mol.GetBonds():
            if not bond.IsInRing():
                continue

            assert vocab.legal_at_bond(bond) == me.legal_at_bond(mol, bond, vocab.vocab)
Example #9
0
def test_combine_graphs_leaf_rings_singleton_sequence():
    mol = me.get_mol(
        'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O')
    result = mr.mol2graph_single(mol, include_leaves=True)
    result = mr.combine_mol_graph([result])

    assert 'leaf_ring' in result
    assert 'leaf_atom' in result

    assert np.all(result['leaf_ring_scope'] == np.array([[0, 2]]))
Example #10
0
def test_combine_graphs_bond_rings():
    mol = me.get_mol(
        'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O')
    result = mr.mol2graph_single(mol, include_leaves=True, include_rings=True)
    result = mr.combine_mol_graph([result])

    assert 'ring_bond_idx' in result
    assert 'ring_bond_order' in result

    assert np.allclose(result['ring_scope'], np.array([[0, 27 * 2]]))
Example #11
0
def test_delete_inverse_kekulize():
    smile = 'C1=CC(=CC=C1)C4=C(C#N)[N]3N=C(C2CC2)SC3=N4'

    vocab = data_utils.get_vocab()
    act = action.Delete(leaf_idx=1)

    mol = me.get_mol(smile)
    result, inverse = me.compute_deletion(mol, act, return_inverse=True)
    mol_roundtrip = me.compute_insert(result, inverse, vocab)

    assert me.comp_mols(mol, mol_roundtrip)
Example #12
0
def test_multi_insert_deterministic_and_inverses():
    vocab = data_utils.get_vocab()
    rng = np.random.RandomState(7)

    init_smiles = [
        'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1',
        'COCC[C@@H](C)C(=O)N(C)Cc1ccc(O)cc1',
        'C=CCn1c(S[C@H](C)c2nc3sc(C)c(C)c3c(=O)[nH]2)nnc1C1CC1',
        'C[NH+](C/C=C/c1ccco1)CCC(F)(F)F',
        'COc1ccc(N2C(=O)C(=O)N(CN3CCC(c4nc5ccccc5s4)CC3)C2=O)cc1',
        'Cc1ccc([C@@H](C)[NH2+][C@H](C)C(=O)Nc2ccccc2F)cc1',
        'O=c1cc(C[NH2+]Cc2cccc(Cl)c2)nc(N2CCCC2)[nH]1',
        'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O',
        'O=C(Nc1cccc(S(=O)(=O)N2CCCCC2)c1)c1cc(F)c(F)cc1Cl',
        'CC(C)Cc1nnc(NC(=O)C(=O)NCCC2CCCCC2)s1',
        'C[C@H](NC(=O)[C@@H](O)c1ccccc1)c1nnc2ccccn12',
        'O=S(=O)(Nc1cc(F)ccc1F)c1ccc(Cl)cc1F',
        'CSc1cc(C(=O)N2c3ccccc3NC(=O)C[C@@H]2C)ccn1',
        'CCCN1C(=O)c2[nH]nc(-c3cc(Cl)ccc3O)c2[C@H]1c1ccc(C)cc1',
        'CC[S@@](=O)[C@@H]1CCC[C@H](NC(=O)N(Cc2cccs2)C2CC2)C1',
        'C[C@@H](c1ccco1)[NH+](Cc1ncc(-c2ccccc2)o1)C1CC1',
        'COc1ccc(Cc2nnc(SCC(=O)N3CCC[C@@H](C)C3)o2)cc1',
        'O=C(/C=C/c1ccc2c(c1)OCO2)NC[C@@H]1C[NH+]2CCN1CC2',
        'COc1ccccc1/C=C/C=C(\\C#N)C(=O)Nc1ccc(C(=O)N(C)C)cc1',
        'Cc1cccc(NC(=S)N2CC[NH+](C)CC2)c1C'
    ]

    for idx, smiles in enumerate(init_smiles):
        mol = me.get_mol(smiles)

        result, act, inverse = me.insert_random_node(mol,
                                                     vocab,
                                                     rng=rng,
                                                     return_action=True,
                                                     return_inverse=True)

        result_deterministic = me.compute_insert(mol, act, vocab)
        assert me.comp_mols(result, result_deterministic)

        mol_roundtrip = me.delete_random_leaf(result, act=inverse)
        assert me.comp_mols(mol, mol_roundtrip)

        # 2nd insertion
        result2, act2, inverse2 = me.insert_random_node(result,
                                                        vocab,
                                                        rng=rng,
                                                        return_action=True,
                                                        return_inverse=True)

        result2_deterministic = me.compute_insert(result, act2, vocab)
        assert me.comp_mols(result2, result2_deterministic)

        mol_roundtrip2 = me.delete_random_leaf(result2, act=inverse2)
        assert me.comp_mols(result, mol_roundtrip2)
def test_action_to_integer_roundtrip_insert_atom():
    mol = me.get_mol(
        'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1')
    vocab = vocabulary.Vocabulary()
    act = me.generate_random_atom_insert(mol,
                                         mol.GetAtomWithIdx(1),
                                         vocab,
                                         rng=np.random.RandomState(20))

    action_roundtrip = roundtrip_action(act, vocab, mol)

    assert list(act.to_array()) == list(action_roundtrip.to_array())
Example #14
0
def test_fill_atom_features():
    mol = me.get_mol(
        'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1')
    num_atoms = mol.GetNumAtoms()

    result_py = np.zeros((num_atoms, mr.ATOM_FDIM), dtype=np.float32)
    result_c = np.zeros((num_atoms, mr.ATOM_FDIM), dtype=np.float32)

    imp_py.fill_atom_features(result_py, mol)
    imp_c.fill_atom_features(result_c, mol)

    assert np.allclose(result_py, result_c)
def test_atom_bond_incidence_segment_reference():
    mol = me.get_mol(
        'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O')

    scopes, index = mr.atom_bond_list_segment(mol)
    a_graph, _ = mol_incidence_reference(mol)

    assert scopes.shape[0] == a_graph.shape[0]

    for i in range(scopes.shape[0]):
        assert np.all(index[scopes[i, 0]:scopes[i, 0] +
                            scopes[i, 1]] == a_graph[i, a_graph[i] >= 0])
Example #16
0
def test_bond_embedding():
    mol = me.get_mol(
        'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1')

    expected_0 = [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]
    expected_8 = [1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0.]

    result_0 = mr.bond_features(mol.GetBondWithIdx(0))
    result_8 = mr.bond_features(mol.GetBondWithIdx(8))

    assert expected_0 == list(result_0)
    assert expected_8 == list(result_8)
Example #17
0
def test_combine_graphs(request):
    data = get_data(request)

    result = list(
        mr.combine_mol_graph([
            mr.mol2graph_single(me.get_mol(s)) for s in data['smiles']
        ]).values())
    expected = data['graph_stereo']

    def _compare_tensor(a, b):
        return np.allclose(a, b[:a.shape[0], :])

    assert _compare_tensor(result[0], expected[0])
Example #18
0
def test_mol2graph_single(request):
    data = get_data(request)

    mol = me.get_mol(data['smiles'][0])

    result = list(mr.mol2graph_single(mol).values())
    expected = data['graph_nostereo']

    def _compare_tensor(a, b):
        return np.allclose(a, b[:a.shape[0], :])

    assert _compare_tensor(result[0], expected[0])
    assert _compare_tensor(result[1], expected[1][1:])
def test_bond_incidence_segment_reference():
    mol = me.get_mol(
        'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O')

    scopes, index = mr.bond_incidence_list_segment(mol)
    _, b_graph = mol_incidence_reference(mol)

    assert scopes.shape[0] == b_graph.shape[0]

    for i in range(scopes.shape[0]):
        incidence_segment = index[scopes[i, 0]:scopes[i, 0] + scopes[i, 1]]
        incidence_graph = b_graph[i, b_graph[i] >= 0]
        assert len(incidence_segment) == len(incidence_graph)
        assert np.all(incidence_segment == incidence_graph)
Example #20
0
def test_delete_inverse():
    smile = 'CC1=CC(C2=C(C#N)N3N=C(C4CC4)SC3=N2)=CC=C1Cl'
    vocab = data_utils.get_vocab()
    rng = np.random.RandomState(7)

    mol = me.get_mol(smile)
    result, act, inverse = me.delete_random_leaf(mol,
                                                 rng=rng,
                                                 return_action=True,
                                                 return_inverse=True)

    mol_roundtrip = me.insert_random_node(result, vocab, act=inverse)

    assert me.comp_mols(mol, mol_roundtrip)
Example #21
0
def test_mol2graph_single_rings_leaves():
    mol = me.get_mol(
        'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O')
    result = mr.mol2graph_single(mol, include_leaves=True)

    assert 'leaf_ring' in result
    assert 'leaf_atom' in result

    assert result['leaf_atom'].tolist() == [0, 7, 8, 16, 25, 33]

    assert result['leaf_ring'][0][0].tolist() == [0] * 6 + [1] * 6
    assert result['leaf_ring'][0][1].tolist() == [9, 10, 11, 12, 13, 14
                                                  ] + [26, 27, 28, 29, 30, 31]
    assert result['leaf_ring'][1].tolist() == [pytest.approx(1 / np.sqrt(6))
                                               ] * 12
Example #22
0
def test_atom_insert_deterministic():
    smile = 'CC1=CC(C2=C(C#N)N3N=C(C4CC4)SC3=N2)=CC=C1Cl'
    vocab = data_utils.get_vocab()
    rng = np.random.RandomState(7)

    mol = me.get_mol(smile)
    atom = mol.GetAtomWithIdx(13)
    result, act = me.insert_at_atom(mol,
                                    atom,
                                    vocab,
                                    rng=rng,
                                    return_action=True)
    result_deterministic = me.compute_insert_atom(mol, act, vocab)

    assert me.get_smiles(result) == me.get_smiles(result_deterministic)
Example #23
0
def test_atom_bond_list_segment():
    mol = me.get_mol(
        'O=C(Cn1nc(C(=O)[O-])c2ccccc2c1=O)Nc1ccc2c(c1)C(=O)c1ccccc1C2=O')

    scopes_c = np.empty((mol.GetNumAtoms(), 2), dtype=np.int32)
    index_c = np.empty(2 * mol.GetNumBonds(), dtype=np.int32)

    imp_c.fill_atom_bond_list_segment(scopes_c, index_c, mol)

    scopes_py = np.empty((mol.GetNumAtoms(), 2), dtype=np.int32)
    index_py = np.empty(2 * mol.GetNumBonds(), dtype=np.int32)

    imp_py.fill_atom_bond_list_segment(scopes_py, index_py, mol)

    assert np.all(scopes_c == scopes_py)
    assert np.all(index_c == index_py)
def test_action_canonical_roundtrip(seed):
    mol = me.get_mol(
        'CCCCCCC1=NN2C(=N)/C(=C\\c3cc(C)n(-c4ccc(C)cc4C)c3C)C(=O)N=C2S1')
    vocab = vocabulary.Vocabulary()

    act = me.generate_random_atom_insert(mol,
                                         mol.GetAtomWithIdx(1),
                                         vocab,
                                         rng=np.random.RandomState(seed))

    encoder = ar.VocabInsertEncoder(vocab)
    result, offsets, lengths = action_mol_to_integer(act, mol, encoder)
    action_roundtrip = ar.integer_to_action(result, lengths, encoder)

    assert (action_mol_to_integer(act, mol,
                                  encoder)[0] == action_mol_to_integer(
                                      action_roundtrip, mol, encoder)[0])
Example #25
0
def test_insert_atom_inverse():
    smile = 'CC1=CC(C2=C(C#N)N3N=C(C4CC4)SC3=N2)=CC=C1Cl'
    vocab = data_utils.get_vocab()
    rng = np.random.RandomState(7)

    mol = me.get_mol(smile)
    atom = mol.GetAtomWithIdx(13)
    result, act, inverse = me.insert_at_atom(mol,
                                             atom,
                                             vocab,
                                             rng=rng,
                                             return_action=True,
                                             return_inverse=True)
    result = chemutils.sanitize(result)

    mol_roundtrip = me.compute_deletion(result, inverse)

    assert me.comp_mols(mol, mol_roundtrip)
def test_action_canonical_actions_nitrogen():
    mol = me.get_mol('C1CNCCNC1')
    atom_equiv, num_equiv = ar.compute_canonical_atom_insert_locations(mol)

    assert num_equiv == 4
def test_action_canonical_actions():
    mol = me.get_mol('C1CCC1')
    atom_equiv, num_equiv = ar.compute_canonical_atom_insert_locations(mol)

    assert num_equiv == 1
    assert list(atom_equiv) == [0] * 4