Exemple #1
0
    def test_alkane(self):
        """Test on simple alkane"""
        raw_smiles = ['CCC']
        import rdkit
        mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
        featurizer = ConvMolFeaturizer()
        mol_list = featurizer.featurize(mols)
        mol = mol_list[0]

        # 3 carbonds in alkane
        assert mol.get_num_atoms() == 3

        deg_adj_lists = mol.get_deg_adjacency_lists()
        assert np.array_equal(deg_adj_lists[0], np.zeros([0, 0],
                                                         dtype=np.int32))
        # Outer two carbonds are connected to central carbon
        assert np.array_equal(deg_adj_lists[1],
                              np.array([[2], [2]], dtype=np.int32))
        # Central carbon connected to outer two
        assert np.array_equal(deg_adj_lists[2],
                              np.array([[0, 1]], dtype=np.int32))
        assert np.array_equal(deg_adj_lists[3], np.zeros([0, 3],
                                                         dtype=np.int32))
        assert np.array_equal(deg_adj_lists[4], np.zeros([0, 4],
                                                         dtype=np.int32))
        assert np.array_equal(deg_adj_lists[5], np.zeros([0, 5],
                                                         dtype=np.int32))
        assert np.array_equal(deg_adj_lists[6], np.zeros([0, 6],
                                                         dtype=np.int32))
Exemple #2
0
    def test_graph_gather(self):
        """Test that GraphGather can be invoked."""
        batch_size = 2
        n_features = 75
        n_atoms = 4  # In CCC and C, there are 4 atoms
        raw_smiles = ['CCC', 'C']
        mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
        featurizer = ConvMolFeaturizer()
        mols = featurizer.featurize(mols)
        multi_mol = ConvMol.agglomerate_mols(mols)
        atom_features = multi_mol.get_atom_features()
        degree_slice = multi_mol.deg_slice
        membership = multi_mol.membership
        deg_adjs = multi_mol.get_deg_adjacency_lists()[1:]

        with self.session() as sess:
            atom_features = tf.convert_to_tensor(atom_features,
                                                 dtype=tf.float32)
            degree_slice = tf.convert_to_tensor(degree_slice, dtype=tf.int32)
            membership = tf.convert_to_tensor(membership, dtype=tf.int32)
            deg_adjs_tf = []
            for deg_adj in deg_adjs:
                deg_adjs_tf.append(
                    tf.convert_to_tensor(deg_adj, dtype=tf.int32))
            args = [atom_features, degree_slice, membership] + deg_adjs_tf
            out_tensor = GraphGather(batch_size)(*args)
            sess.run(tf.global_variables_initializer())
            out_tensor = out_tensor.eval()
            # TODO(rbharath): Why is it 2*n_features instead of n_features?
            assert out_tensor.shape == (batch_size, 2 * n_features)
Exemple #3
0
    def test_carbon_nitrogen(self):
        """Test on carbon nitrogen molecule"""
        # Note there is a central nitrogen of degree 4, with 4 carbons
        # of degree 1 (connected only to central nitrogen).
        raw_smiles = ['C[N+](C)(C)C']
        import rdkit
        mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
        featurizer = ConvMolFeaturizer()
        mols = featurizer.featurize(mols)
        mol = mols[0]

        # 5 atoms in compound
        assert mol.get_num_atoms() == 5

        # Get the adjacency lists grouped by degree
        deg_adj_lists = mol.get_deg_adjacency_lists()
        assert np.array_equal(deg_adj_lists[0], np.zeros([0, 0],
                                                         dtype=np.int32))
        # The 4 outer atoms connected to central nitrogen
        assert np.array_equal(deg_adj_lists[1],
                              np.array([[4], [4], [4], [4]], dtype=np.int32))
        assert np.array_equal(deg_adj_lists[2], np.zeros([0, 2],
                                                         dtype=np.int32))
        assert np.array_equal(deg_adj_lists[3], np.zeros([0, 3],
                                                         dtype=np.int32))
        # Central nitrogen connected to everything else.
        assert np.array_equal(deg_adj_lists[4],
                              np.array([[0, 1, 2, 3]], dtype=np.int32))
        assert np.array_equal(deg_adj_lists[5], np.zeros([0, 5],
                                                         dtype=np.int32))
        assert np.array_equal(deg_adj_lists[6], np.zeros([0, 6],
                                                         dtype=np.int32))
Exemple #4
0
    def test_single_carbon(self):
        """Test that single carbon atom is featurized properly."""
        raw_smiles = ['C']
        import rdkit
        mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
        featurizer = ConvMolFeaturizer()
        mol_list = featurizer.featurize(mols)
        mol = mol_list[0]

        # Only one carbon
        assert mol.get_num_atoms() == 1

        # No bonds, so degree adjacency lists are empty
        deg_adj_lists = mol.get_deg_adjacency_lists()
        assert np.array_equal(deg_adj_lists[0], np.zeros([1, 0],
                                                         dtype=np.int32))
        assert np.array_equal(deg_adj_lists[1], np.zeros([0, 1],
                                                         dtype=np.int32))
        assert np.array_equal(deg_adj_lists[2], np.zeros([0, 2],
                                                         dtype=np.int32))
        assert np.array_equal(deg_adj_lists[3], np.zeros([0, 3],
                                                         dtype=np.int32))
        assert np.array_equal(deg_adj_lists[4], np.zeros([0, 4],
                                                         dtype=np.int32))
        assert np.array_equal(deg_adj_lists[5], np.zeros([0, 5],
                                                         dtype=np.int32))
        assert np.array_equal(deg_adj_lists[6], np.zeros([0, 6],
                                                         dtype=np.int32))
  def test_alkane(self):
    """Test on simple alkane"""
    raw_smiles = ['CCC']
    mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
    featurizer = ConvMolFeaturizer()
    mol_list = featurizer.featurize(mols)
    mol = mol_list[0]

    # 3 carbonds in alkane 
    assert mol.get_num_atoms() == 3

    deg_adj_lists = mol.get_deg_adjacency_lists()
    assert np.array_equal(deg_adj_lists[0],
                          np.zeros([0,0], dtype=np.int32))
    # Outer two carbonds are connected to central carbon
    assert np.array_equal(deg_adj_lists[1],
                          np.array([[2], [2]], dtype=np.int32))
    # Central carbon connected to outer two
    assert np.array_equal(deg_adj_lists[2],
                          np.array([[0,1]], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[3],
                          np.zeros([0,3], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[4],
                          np.zeros([0,4], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[5],
                          np.zeros([0,5], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[6],
                          np.zeros([0,6], dtype=np.int32))
  def test_single_carbon(self):
    """Test that single carbon atom is featurized properly."""
    raw_smiles = ['C']
    mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
    featurizer = ConvMolFeaturizer()
    mol_list = featurizer.featurize(mols)
    mol = mol_list[0]

    # Only one carbon
    assert mol.get_num_atoms() == 1

    # No bonds, so degree adjacency lists are empty
    deg_adj_lists = mol.get_deg_adjacency_lists()
    assert np.array_equal(deg_adj_lists[0],
                          np.zeros([1,0], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[1],
                          np.zeros([0,1], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[2],
                          np.zeros([0,2], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[3],
                          np.zeros([0,3], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[4],
                          np.zeros([0,4], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[5],
                          np.zeros([0,5], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[6],
                          np.zeros([0,6], dtype=np.int32))
  def test_carbon_nitrogen(self):
    """Test on carbon nitrogen molecule"""
    # Note there is a central carbon of degree 4, with 3 carbons and
    # one nitrogen of degree 1 (connected only to central carbon).
    raw_smiles = ['C[N+](C)(C)C']
    mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
    featurizer = ConvMolFeaturizer()
    mols = featurizer.featurize(mols)
    mol = mols[0]

    # 5 atoms in compound
    assert mol.get_num_atoms() == 5

    # Get the adjacency lists grouped by degree
    deg_adj_lists = mol.get_deg_adjacency_lists()
    assert np.array_equal(deg_adj_lists[0],
                          np.zeros([0,0], dtype=np.int32))
    # The 4 outer atoms connected to central carbon
    assert np.array_equal(deg_adj_lists[1],
                          np.array([[4], [4], [4], [4]], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[2],
                          np.zeros([0,2], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[3],
                          np.zeros([0,3], dtype=np.int32))
    # Central carbon connected to everything else.
    assert np.array_equal(deg_adj_lists[4],
                          np.array([[0, 1, 2, 3]], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[5],
                          np.zeros([0,5], dtype=np.int32))
    assert np.array_equal(deg_adj_lists[6],
                          np.zeros([0,6], dtype=np.int32))
Exemple #8
0
  def test_graph_gather(self):
    """Test that GraphGather can be invoked."""
    batch_size = 2
    n_features = 75
    n_atoms = 4  # In CCC and C, there are 4 atoms
    raw_smiles = ['CCC', 'C']
    mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
    featurizer = ConvMolFeaturizer()
    mols = featurizer.featurize(mols)
    multi_mol = ConvMol.agglomerate_mols(mols)
    atom_features = multi_mol.get_atom_features()
    degree_slice = multi_mol.deg_slice
    membership = multi_mol.membership
    deg_adjs = multi_mol.get_deg_adjacency_lists()[1:]

    with self.session() as sess:
      atom_features = tf.convert_to_tensor(atom_features, dtype=tf.float32)
      degree_slice = tf.convert_to_tensor(degree_slice, dtype=tf.int32)
      membership = tf.convert_to_tensor(membership, dtype=tf.int32)
      deg_adjs_tf = []
      for deg_adj in deg_adjs:
        deg_adjs_tf.append(tf.convert_to_tensor(deg_adj, dtype=tf.int32))
      args = [atom_features, degree_slice, membership] + deg_adjs_tf
      out_tensor = GraphGather(batch_size)(*args)
      sess.run(tf.global_variables_initializer())
      out_tensor = out_tensor.eval()
      # TODO(rbharath): Why is it 2*n_features instead of n_features?
      assert out_tensor.shape == (batch_size, 2 * n_features)
 def test_per_atom_fragmentation(self):
   """checks if instantiating featurizer with per_atom_fragmentation=True
   leads to  as many fragments' features, as many atoms mol has for any mol"""
   import rdkit.Chem
   raw_smiles = ['CC(CO)Cc1ccccc1', 'CC']
   mols = [rdkit.Chem.MolFromSmiles(m) for m in raw_smiles]
   featurizer = ConvMolFeaturizer(per_atom_fragmentation=True)
   feat = featurizer.featurize(mols)
   for i, j in zip(feat, mols):
     assert len(i) == j.GetNumHeavyAtoms()