Example #1
0
    def from_smiles(self, smiles, hydrogens_are_explicit=True):
        """
        It initializes an RDKit's Molecule object from a SMILES tag.

        Parameters
        ----------
        smiles : str
            The SMILES tag to construct the molecule structure with
        hydrogens_are_explicit : bool
            Whether the SMILES tag has explicit information about
            hydrogen atoms or not. Otherwise, they will be added when
            the molecule is built. Default is True

        Returns
        -------
        molecule : an rdkit.Chem.rdchem.Mol object
            The RDKit's Molecule object
        """
        from rdkit.Chem import AllChem as Chem

        molecule = Chem.MolFromSmiles(smiles, sanitize=False)

        Chem.SanitizeMol(
            molecule, Chem.SANITIZE_ALL ^ Chem.SANITIZE_ADJUSTHS
            ^ Chem.SANITIZE_SETAROMATICITY)

        # Add hydrogens to molecule
        if not hydrogens_are_explicit:
            molecule = Chem.AddHs(molecule)

        # Generate 3D coordinates
        Chem.EmbedMolecule(molecule)

        return molecule
def generate_structures(vae, smi, char_to_index, limit=1e4, write=False):
    rdkit_mols = []
    temps = []
    iterations = []
    iteration = limit_counter = 0
    while True:
        iteration += 1
        limit_counter += 1
        t = random.random()*2
        candidate = decode_smiles(vae, smi, char_to_index, temp=t).split(" ")[0]
        try:
            sampled = Chem.MolFromSmiles(candidate)
            cation = Chem.AddHs(sampled)
            Chem.EmbedMolecule(cation, Chem.ETKDG())
            Chem.UFFOptimizeMolecule(cation)
            cation = Chem.RemoveHs(cation)
            candidate = Chem.MolToSmiles(cation)
            if candidate not in rdkit_mols:
                temps.append(t)
                iterations.append(iteration)
                rdkit_mols.append(candidate) 
                limit_counter = 0
                df = pd.DataFrame([rdkit_mols,temps,iterations]).T
                df.columns = ['smiles', 'temperature', 'iteration']
                print(df)
        except:
            pass
        if limit_counter > limit:
            break
        if write:
            df = pd.DataFrame([rdkit_mols,temps,iterations]).T
            df.columns = ['smiles', 'temperature', 'iteration']
            pd.DataFrame.to_csv(df, path_or_buf='{}.csv'.format(write), index=False)
    return df
Example #3
0
    def from_smiles(self, smiles):
        """
        It initializes an RDKit's Molecule object from a SMILES tag.

        Parameters
        ----------
        smiles : str
            The SMILES tag to construct the molecule structure with.

        Returns
        -------
        molecule : an rdkit.Chem.rdchem.Mol object
            The RDKit's Molecule object
        """
        from rdkit.Chem import AllChem as Chem

        molecule = Chem.MolFromSmiles(smiles)

        # Add hydrogens to molecule
        molecule = Chem.AddHs(molecule)

        # Generate 3D coordinates
        Chem.EmbedMolecule(molecule)

        return molecule
Example #4
0
    def _getMolWithEHTcharges(self, mol : Chem.Mol):
        """
        Prepare the molecule and calculate the charges.

        TODO: perhaps this should be divided into more methods for more flexibility...

        :param mol:
        :return:
        """

        mol_ = Chem.RemoveHs(mol)
        mol_ = Chem.AddHs(mol_)
        Chem.EmbedMolecule(mol_)
        # should create two output files ('run.out' and 'nul') TODO: maybe this is not really needed or even desirable in the final version?
        passed, res = rdEHTTools.RunMol(mol_)
        nat = len(mol_.GetAtoms())
        charges = res.GetAtomicCharges()
        if not self.includeHs:
            mol_ = Chem.RemoveHs(mol)
        # creates a mol object with charges set as double properties on atoms of that mol object
        for i in range(nat):
            if i < len(mol_.GetAtoms()):
                mol_.GetAtomWithIdx(i).SetDoubleProp('EHTcharge', charges[i])
            else:
                break
        return mol_
Example #5
0
def get_descriptors(smiles):
    """
    Get a dictionary of RDKit descriptors from a SMILES string.

    Parameters
    ----------
    smiles : str
        The SMILES string of the chemical of interest

    Returns
    -------
    descriptors : dict
        A collection of molecular descriptors
    
    Notes: Developed with RDKit 2019.03.4, although doc pages listed 2019.03.1
    """

    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)

    Chem.EmbedMolecule(mol, Chem.ETKDG())

    descriptors = {}

    # Starting with simple descriptors:
    # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors.html

    # Molecular weight
    descriptors['molwt'] = Descriptors.ExactMolWt(mol)

    # Partial charge metrics
    descriptors['max_abs_partial_charge'] = Descriptors.MaxAbsPartialCharge(mol)
    descriptors['max_partial_charge'] = Descriptors.MaxPartialCharge(mol)
    descriptors['min_abs_partial_charge'] = Descriptors.MinAbsPartialCharge(mol)
    descriptors['min_partial_charge'] = Descriptors.MinPartialCharge(mol)

    # Basic electron counts
    descriptors['num_radical_electrons'] = Descriptors.NumRadicalElectrons(mol)
    descriptors['num_valence_electrons'] = Descriptors.NumValenceElectrons(mol)

    # 3-D descriptors
    # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors3D.html

    # Calculating these should produce the same result, according to some basic tests
    # descriptors['asphericity'] = rdMolDescriptors.CalcAsphericity(mol)
    # descriptors['eccentricity'] = rdMolDescriptors.CalcEccentricity(mol)
    descriptors['asphericity'] = Descriptors3D.Asphericity(mol)
    descriptors['eccentricity'] = Descriptors3D.Eccentricity(mol)

    descriptors['inertial_shape_factor'] = Descriptors3D.InertialShapeFactor(mol)

    descriptors['radius_of_gyration'] = Descriptors3D.RadiusOfGyration(mol)
    descriptors['spherocity_index'] = Descriptors3D.SpherocityIndex(mol)

    # Graph descriptors
    # https://www.rdkit.org/docs/source/rdkit.Chem.GraphDescriptors.html
    descriptors['balaban_j'] = GraphDescriptors.BalabanJ(mol)
    descriptors['bertz_ct'] = GraphDescriptors.BertzCT(mol)

    descriptors['chi0'] = GraphDescriptors.Chi0(mol)
    descriptors['chi0n'] = GraphDescriptors.Chi0n(mol)
    descriptors['chi0v'] = GraphDescriptors.Chi0v(mol)
    descriptors['chi1'] = GraphDescriptors.Chi1(mol)
    descriptors['chi1n'] = GraphDescriptors.Chi1n(mol)
    descriptors['chi1v'] = GraphDescriptors.Chi1v(mol)
    descriptors['chi2n'] = GraphDescriptors.Chi2n(mol)
    descriptors['chi2v'] = GraphDescriptors.Chi2v(mol)
    descriptors['chi3n'] = GraphDescriptors.Chi3n(mol)
    descriptors['chi3v'] = GraphDescriptors.Chi3v(mol)
    descriptors['chi4n'] = GraphDescriptors.Chi4n(mol)
    descriptors['chi4v'] = GraphDescriptors.Chi4v(mol)

    descriptors['hall_kier_alpha'] = GraphDescriptors.HallKierAlpha(mol)

    descriptors['kappa1'] = GraphDescriptors.Kappa1(mol)
    descriptors['kappa2'] = GraphDescriptors.Kappa2(mol)
    descriptors['kappa3'] = GraphDescriptors.Kappa3(mol)

    # Predicted properties from Wildman and Crippen
    descriptors['log_p'] = Descriptors.MolLogP(mol)
    descriptors['refractivity'] = Descriptors.MolMR(mol)

    return descriptors