def test_coulomb_matrix(self):
   """
   Test CoulombMatrix.
   """
   f = CoulombMatrix(self.num_atoms)
   rval = f([self.mol_with_no_conf])
   assert rval.shape == (1, self.num_atoms, self.num_atoms)
   rval = f([self.mol_with_one_conf])
   assert rval.shape == (1, self.num_atoms, self.num_atoms)
   rval = f([self.mol_with_multi_conf])
   assert rval.shape == (1, self.num_confs, self.num_atoms, self.num_atoms)
 def test_upper_tri_coulomb_matrix(self):
   """
   Test upper triangular CoulombMatrix.
   """
   f = CoulombMatrix(self.num_atoms, upper_tri=True)
   size = np.triu_indices(self.num_atoms)[0].size
   rval = f([self.mol_with_no_conf])
   assert rval.shape == (1, size)
   rval = f([self.mol_with_one_conf])
   assert rval.shape == (1, size)
   rval = f([self.mol_with_multi_conf])
   assert rval.shape == (1, self.num_confs, size)
 def test_coulomb_matrix_padding(self):
   """
   Test CoulombMatrix with padding.
   """
   max_atoms = self.num_atoms * 2
   f = CoulombMatrix(max_atoms=max_atoms)
   rval = f([self.mol_with_no_conf])
   assert rval.shape == (1, max_atoms, max_atoms)
   rval = f([self.mol_with_one_conf])
   assert rval.shape == (1, max_atoms, max_atoms)
   rval = f([self.mol_with_multi_conf])
   assert rval.shape == (1, self.num_confs, max_atoms, max_atoms)
 def test_coulomb_matrix_hydrogens(self):
   """
   Test no hydrogen removal.
   """
   f = CoulombMatrix(
       max_atoms=self.num_atoms, remove_hydrogens=False, upper_tri=True)
   size = np.triu_indices(self.num_atoms)[0].size
   rval = f([self.mol_with_no_conf])
   assert rval.shape == (1, size)
   rval = f([self.mol_with_one_conf])
   assert rval.shape == (1, size)
   rval = f([self.mol_with_multi_conf])
   assert rval.shape == (1, self.num_confs, size)
 def test_coulomb_matrix_no_hydrogens(self):
   """
   Test hydrogen removal.
   """
   num_atoms_with_no_H = self.mol_with_no_conf.GetNumAtoms()
   assert num_atoms_with_no_H < self.num_atoms
   f = CoulombMatrix(
       max_atoms=num_atoms_with_no_H, remove_hydrogens=True, upper_tri=True)
   size = np.triu_indices(num_atoms_with_no_H)[0].size
   rval = f([self.mol_with_no_conf])
   assert rval.shape == (1, size)
   rval = f([self.mol_with_one_conf])
   assert rval.shape == (1, size)
   rval = f([self.mol_with_multi_conf])
   assert rval.shape == (1, self.num_confs, size)
Beispiel #6
0
    def generate_coulomb_matrices(self):

        if not __deepchem_imported__:
            self.log.error(
                "deepchem was not imported or available in this execution environment"
            )
            sys.exit()

        self.log.info(
            "Generating Coulomb Matrices, may take a few moments ...")

        matrix_featurizer = CoulombMatrix(self.max_atoms,
                                          randomize=False,
                                          n_samples=1)
        eigen_featurizer = CoulombMatrixEig(self.max_atoms)

        matrices = []
        smiles = []
        arrays = []
        eigenarrays = []
        num_confs = 1

        for index, row in self.data.iterrows():
            mol = Chem.MolFromSmiles(row["SMILE"])
            Chem.AddHs(mol)
            ids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs)
            if len(ids) != num_confs:
                ids = AllChem.EmbedMultipleConfs(mol,
                                                 numConfs=num_confs,
                                                 ignoreSmoothingFailures=True)
                if len(ids) != num_confs:
                    self.log.warning(
                        "Coulomb Matrix - unable to generate %d conformer(s) for smile: %s",
                        num_confs, row["SMILE"])

            if len(ids) == num_confs:
                for id in ids:
                    AllChem.UFFOptimizeMolecule(mol, confId=id)
                matrix = matrix_featurizer.coulomb_matrix(mol)
                matrices.append(matrix)
                arrays.append(matrix[0].flatten())
                smiles.append(row["SMILE"])
                eigenvalues = eigen_featurizer.featurize([mol])
                eigenarrays.append(eigenvalues[0].flatten())

        pd_dict = {
            "SMILE": smiles,
            "COULOMB": matrices,
            "COULOMB_ARRAY": arrays,
            "COULOMB_EIGEN": eigenarrays
        }
        coulomb_frame = pd.DataFrame(pd_dict)

        before_ids = list(self.data["ID"])
        self.data = pd.merge(self.data,
                             coulomb_frame,
                             how="inner",
                             on=["SMILE"])
        self.data = self.data.drop_duplicates(subset=["SMILE"], keep="first")
        after_ids = list(self.data["ID"])
        missing_list = list(set(before_ids) - set(after_ids))
        for missing in missing_list:
            self.log.warning(
                "Dropped molecule ID: %s after join with Coulomb Matrix data",
                missing)