Python Chem.AddHs Examples

Programming Language: Python

Namespace/Package Name: rdkit

Class/Type: Chem

Method/Function: AddHs

Examples at hotexamples.com: 30

The function rdkit.Chem.AddHs() is a part of the RDKit library in Python. It is used to add explicit hydrogen atoms to a molecular structure. Hydrogen atoms are crucial for accurately representing the 3D geometry and interactions of molecules. This function helps in preparing molecules for tasks such as molecular docking, virtual screening, and molecular dynamics simulations. By adding hydrogen atoms, the function ensures that the molecular structure is complete and ready for further analysis and computations.

Python Chem.AddHs - 30 examples found. These are the top rated real world Python examples of rdkit.Chem.AddHs extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

AddHs(30)

CanonSmiles(30)

GetAdjacencyMatrix(30)

FragmentOnBonds(30)

ForwardSDMolSupplier(30)

FindMolChiralCenters(30)

FindAtomEnvironmentOfRadiusN(30)

DeleteSubstructs(30)

Conformer(30)

CombineMols(30)

CanonicalRankAtoms(30)

GetDistanceMatrix(30)

Atom(30)

AssignStereochemistry(30)

Get3DDistanceMatrix(29)

FindAllPathsOfLengthN(29)

EditableMol(24)

AssignAtomChiralTagsFromStructure(24)

AtomPDBResidueInfo(21)

FastFindRings(14)

AdjustQueryProperties(13)

DetectBondStereochemistry(10)

BondType(9)

FindPotentialStereoBonds(9)

Cleanup(8)

AdjustQueryParameters(8)

AtomFromSmiles(5)

AssignStereochemistryFrom3D(5)

EmbedMolecule(5)

FragmentOnBRICSBonds(4)

DetectChemistryProblems(4)

ClearMolSubstanceGroups(4)

CreateAtomDoublePropertyList(3)

ETKDG(3)

CreateMolSubstanceGroup(2)

FindAllSubgraphsOfLengthN(2)

AssignRadicals(2)

FindUniqueSubgraphsOfLengthN(2)

CalcPMI3(1)

GetBondBetweenAtoms(1)

GetBestRMS(1)

GetAtomPairFingerPrint(1)

CalcNumSpiroAtoms(1)

CalcPMI1(1)

FragmentOnSomeBonds(1)

CalcPMI2(1)

ClearMolSGroups(1)

AtomMonomerInfo(1)

ForwardSDMOLSupplier(1)

DetectBondStereoChemistry(1)

Example #1

Show file

def create_dataset(args):
    from rdkit import Chem

    filename = os.path.join('dataset', '%s.pth' % (args.dataset))
    inputfile = os.path.join('dataset', '%s.txt' % (args.dataset))

    dataset = []

    # Load a dataset.
    with open(inputfile, 'r') as f:
        lines = f.readlines()

        for index, line in enumerate(lines, 1):
            smiles, property = line.strip('\n').split(' ')

            # Exclude the data contains '.' in its smiles.
            if '.' in smiles:
                continue

            # Create each data with the above defined functions.
            mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
            atoms = create_atoms(mol, atom_dict)
            molecular_size = len(atoms)
            ij_bond_dict = create_ij_bond_dict(mol, bond_dict)
            fingerprints = extract_fingerprints(args.radius, atoms,
                                                ij_bond_dict, fingerprint_dict,
                                                edge_dict)
            adjacency = Chem.GetAdjacencyMatrix(mol)

            #print(fingerprints.shape, adjacency.shape, molecular_size, property)
            #print(fingerprints, adjacency, molecular_size, property)

            dataset.append((fingerprints, adjacency, molecular_size, property))

            print('\r%s: %5d/%5d' % (filename, index, len(lines)), end='')
        print()

    return dataset

Example #2

Show file

def generate_conformations(m, n):
    
    mol = Chem.AddHs(m)
    ids=ids = AllChem.EmbedMultipleConfs(mol,numConfs=n,useExpTorsionAnglePrefs=True,useBasicKnowledge=True)
    #ids=ids = AllChem.EmbedMultipleConfs(mol,numConfs=n)
    results ={}
    
    for i in ids:   
        
        try:

            if Chem.rdForceFieldHelpers.MMFFHasAllMoleculeParams(mol):
                #print("MMFF")
                ff = AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol), confId=i)
                ff.Initialize()
                ff.CalcEnergy()
      
                if MAP_paths.MINI_Iterations > 0:
                    AllChem.MMFFOptimizeMolecule(mol, confId=i)
                    results[i] = ff.CalcEnergy()

            elif Chem.rdForceFieldHelpers.UFFHasAllMoleculeParams(mol): 
                #print("UFF")
                ff = AllChem.UFFGetMoleculeForceField(mol, confId=i)
                ff.Initialize()
                ff.CalcEnergy()
                
                if MAP_paths.MINI_Iterations > 0:
                    AllChem.UFFOptimizeMolecule(mol, confId=i)
                    results[i] = ff.CalcEnergy()
            else:
                print(">> ERROR: missing force field parameters for atom(s) in your molecule.")
                return                
        except:
            print(">> ERROR: something went wrong in force field minimization.")
            return
        
    return mol, results

Example #3

Show file

File: cheminfo.py Project: alongd/KinBot

def generate_3d_structure(smi, obabel=1):
    """
    Method to generate the 3D coordinates of a molecule from its smiles
    The default code is OpenBabel, RDKit can also be used.
    """
    structure = []
    if obabel:  # use OpenBabel
        obmol = pybel.readstring('smi', smi)
        obmol.OBMol.AddHydrogens()
        obmol.make3D()
        bond = np.zeros((len(obmol.atoms), len(obmol.atoms)), dtype=int)
        for i in range(len(obmol.atoms)):
            for j in range(len(obmol.atoms)):
                if not obmol.OBMol.GetBond(i + 1, j + 1) is None:
                    order = obmol.OBMol.GetBond(i + 1, j + 1).GetBO()
                    bond[i][j] = order
        for at in obmol.atoms:
            pos = at.coords
            sym = num_to_syms[at.atomicnum]
            structure += [sym, pos[0], pos[1], pos[2]]
        return obmol, structure, bond
    else:  # use RDKit
        rdmol = Chem.AddHs(Chem.MolFromSmiles(smi))
        AllChem.EmbedMolecule(rdmol, AllChem.ETKDG())
        AllChem.MMFFOptimizeMolecule(rdmol)
        atoms = rdmol.GetAtoms()
        bond = np.zeros((len(atoms), len(atoms)), dtype=int)
        for i in range(len(rdmol.GetAtoms())):
            for j in range(len(rdmol.GetAtoms())):
                if not rdmol.GetBondBetweenAtoms(i, j) is None:
                    b = rdmol.GetBondBetweenAtoms(i, j)
                    order = int(b.GetBondTypeAsDouble())
                    bond[i][j] = order
        for i, atom in enumerate(rdmol.GetAtoms()):
            pos = rdmol.GetConformer(0).GetAtomPosition(i)
            sym = atom.GetSymbol()
            structure += [sym, pos.x, pos.y, pos.z]
        return rdmol, structure, bond

Example #4

Show file

File: utils.py Project: colliner/pyCBH

def xyzfromsmi(smi):
  mol = Chem.MolFromSmiles(smi)
  mol = Chem.AddHs(mol)
  AllChem.EmbedMolecule(mol)
  Chem.Kekulize(mol, clearAromaticFlags=True)
  #print(smi, mol.GetNumAtoms())
  try:
    mol_ = Chem.RemoveHs(mol)
    with open('fragment_lookup/tmp.mol', "w") as FILE:
      FILE.write(Chem.MolToMolBlock(mol_))
    xyz_coordinates=list()
    #print(Chem.MolToMolBlock(mol))
    bashCommand = 'obabel -imol fragment_lookup/tmp.mol -oxyz --gen3d -xb'
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, error = process.communicate()
    output=[x.split(' ') for x in output.decode("utf-8").split("\n")[2::] ]
    for i, x_ in enumerate(output):
      #vprint(x_,len(x_))
      if len(x_) > 3:
        xyz_coordinates.append([float(x) for x in x_[1::] if len(x) > 0])
  except:
    xyz_coordinates=list()
    #print(Chem.MolToMolBlock(mol))
    with open('tmp','w') as FILE:
      FILE.write(smi)
    bashCommand = 'obabel -ismi tmp -oxyz --gen3d -xb '
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, error = process.communicate()
    #print(output)
    output=[x.split(' ') for x in output.decode("utf-8").split("\n")[2::] ]
    for i, x_ in enumerate(output):
      #print(x_,len(x_))
      if len(x_) > 3:  
        xyz_coordinates.append([float(x) for x in x_[1::] if len(x) > 0]) 
  #print(coords)
  atoms = [atom2label(atom.GetSymbol()) for atom in mol.GetAtoms()]
  #print(atoms)
  return atoms, xyz_coordinates

Example #5

Show file

def get_charge(mol, property_name, do_charge):

    from rdkit.Chem import AllChem as Chem
    err = 0

    # partial charges
    if do_charge is False:
        err = check_mol(mol, property_name, do_charge)
        if err == 0:
            # prepares molecule
            mol = Chem.RemoveHs(mol)
            n_at = mol.GetNumAtoms()
            # takes properties
            list_prop = mol.GetPropsAsDict()
            # extracts the property according to the set name
            string_values = list_prop[property_name]
            string_values = string_values.split("\n")
            w = np.asarray(map(float, string_values))
        else:
            mol = Chem.AddHs(mol)
            n_at = mol.GetNumAtoms()
            w = np.ones((n_at, 1)) / n_at
            # same format as previous calculation
            w = np.asarray(map(float, w))
            property_name = 'equal_w'
            err = 0
        # extract properties
        for atom in range(n_at):
            mol.GetAtomWithIdx(atom).SetDoubleProp(property_name, w[atom])

        mol = Chem.RemoveHs(mol)

    # Gasteiger-Marsili Charges
    elif (do_charge is True) and (err is 0):
        Chem.ComputeGasteigerCharges(mol)
        err = check_mol(mol, property_name, do_charge)

    return mol, property_name, err

Example #6

Show file

File: decoy_utils.py Project: oxpig/DeepCoy

def calc_props_dekois(smiles):
    # Create RDKit mol
    try:
        mol = Chem.MolFromSmiles(smiles)
        mol = Chem.AddHs(mol)
        # Calculate properties and store in dict
        prop_dict = {}
        # molweight
        prop_dict.update({'mol_wg': Descriptors.MolWt(mol)})
        # logP
        prop_dict.update({'log_p': Chem.Crippen.MolLogP(mol)})
        # HBA
        prop_dict.update(
            {'hba': Chem.rdMolDescriptors.CalcNumLipinskiHBA(mol)})
        # HBD
        prop_dict.update(
            {'hbd': Chem.rdMolDescriptors.CalcNumLipinskiHBD(mol)})
        # aromatic ring count
        prop_dict.update(
            {'ring_ct': Chem.rdMolDescriptors.CalcNumAromaticRings(mol)})
        # rotatable bonds
        prop_dict.update(
            {'rot_bnds': Chem.rdMolDescriptors.CalcNumRotatableBonds(mol)})
        # Formal charges
        pos, neg = calc_charges(mol)
        prop_dict.update({'pos_charge': pos})
        prop_dict.update({'neg_charge': neg})

        prop_array = [
            prop_dict['mol_wg'], prop_dict['log_p'], prop_dict['hba'],
            prop_dict['hbd'], prop_dict['ring_ct'], prop_dict['rot_bnds'],
            prop_dict['pos_charge'], prop_dict['neg_charge']
        ]

        return (prop_dict, prop_array)

    except:
        return ({}, [0, 0, 0, 0, 0, 0, 0, 0])

Example #7

Show file

File: charge.py Project: ABorrel/CompDESC

def getLDI(mol):
    """
    #################################################################
    Calculation of local dipole index (D)
    #################################################################
    """
    Hmol = Chem.AddHs(mol)
    GMCharge.ComputeGasteigerCharges(Hmol, iter_step)
    res = []
    for atom in Hmol.GetAtoms():
        charge = float(atom.GetProp('_GasteigerCharge'))
        if not math.isnan(charge) and not charge == numpy.inf:
            res.append(charge)
        else:
            res.append(0.0)
    cc = [
        numpy.absolute(res[x.GetBeginAtom().GetIdx()] -
                       res[x.GetEndAtom().GetIdx()]) for x in Hmol.GetBonds()
    ]
    B = len(Hmol.GetBonds())
    if B == 0:
        return 0.0
    return round(sum(cc) / B, 6)

Example #8

Show file

File: xyz.py Project: learningmatter-mit/NeuralForceField

def get_mol(xyz, smiles, with_conformer=True):
    """
    Get an RDKit mol from an xyz and smiles. Note that this
    assumes that the xyz is ordered in the same way an RDKit
    object of the same smiles would be ordered, and that there
    is no change in connectivity between the RDKit mol and the
    xyz.

    Args:
        xyz (torch.Tensor): atom type and xyz of geometry.
        smiles (str): SMILES string
        with_conformer (bool): also add conformer to the RDKit mol
    Returns:
        mol (rdkit.Chem.rdchem.Mol): RDKit mol object
    """

    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    if with_conformer:
        conformer = get_conformer(xyz)
    mol.AddConformer(conformer)

    return mol

Example #9

Show file

def opt_mol_from_smi(smi, save_name):
    """
    opt mol from smiles
    """
    mol = Chem.MolFromSmiles(smi)

    mol = Chem.AddHs(mol)

    AllChem.EmbedMolecule(mol, randomSeed=3)

    AllChem.MMFFOptimizeMolecule(mol)
    mol = Chem.rdmolops.RemoveHs(mol)

    writer = Chem.SDWriter(save_name)
    mol.SetProp('_Name', 'Chemistry ' + save_name)
    mol.SetProp('STEREOCHEM', str(1.00))
    mol.SetProp('EF', str(1.00))
    mol.SetProp('MOL_WEIGHT', str(1.00))
    mol.SetProp('COMPOUND_ID', str(1.00))
    mol.SetProp('SUPPLIER', str(1.00))
    mol.SetProp('COMMEN', str(1.00))
    writer.write(mol)
    return save_name + ".mol"

Example #10

Show file

File: decoy_to_sdf.py Project: abhik78/ligand_screener

def create_3d_sdf_from_smiles(smiles_zincid_dict, decoy_sdf):

    '''
    create 3DF sdf files using rdkit
    :param smiles_zincid_dict: dictionary of smiles and molecule name
    :param decoy_sdf: 3d sdf
    :return: combined sdf file
    '''
    writer = Chem.SDWriter(decoy_sdf)

    for k, v in smiles_zincid_dict.items():
        #print(v)
        try:
            mol = Chem.MolFromSmiles(k)
            molH = Chem.AddHs(mol)

            AllChem.EmbedMolecule(molH, useRandomCoords=True)
            AllChem.UFFOptimizeMolecule(molH)
            molH.SetProp("_Name", "ZIN"+v)
            writer.write(molH)
        except:
            print(("no conformer for Zin" + "{}").format(v))
    writer.close()

Example #11

Show file

File: mol.py Project: MobleyLab/openff-spellbook

def build_from_smiles(smiles_pattern, protonate=False, openff_compatible=True):

    mol = Chem.MolFromSmiles(smiles_pattern, sanitize=False)

    flags = Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_SETAROMATICITY

    if not protonate:
        flags ^= Chem.SanitizeFlags.SANITIZE_ADJUSTHS

    Chem.SanitizeMol(mol, flags)

    if protonate:
        mol = Chem.AddHs(mol)

    Chem.SetAromaticity(mol, Chem.AromaticityModel.AROMATICITY_MDL)
    Chem.SanitizeMol(mol, Chem.SanitizeFlags.SANITIZE_SETAROMATICITY)

    if openff_compatible:
        # Needed since the TK doesn't like float bond orders
        # This means it will essentially throw out the : specifier?
        Chem.Kekulize(mol)

    return mol

Example #12

Show file

File: molkit.py Project: chemistry-scripts/PyFrag

def add_prot_Hs(rdmol):
    """
    Add hydrogens to molecules read from PDB
    Makes sure that the hydrogens get the correct PDBResidue info
    """
    retmol = Chem.AddHs(rdmol, addCoords=True)
    for atom in retmol.GetAtoms():
        if atom.GetPDBResidueInfo() is None and atom.GetSymbol() == "H":
            bond = atom.GetBonds()[0]
            if bond.GetBeginAtom().GetIdx() == atom.GetIdx:
                connected_atom = bond.GetEndAtom()
            else:
                connected_atom = bond.GetBeginAtom()
            try:
                ResInfo = connected_atom.GetPDBResidueInfo()
                atom.SetMonomerInfo(ResInfo)
            except:
                print(
                    "Hydrogen annotation failed:",
                    connected_atom.GetIdx(),
                    atom.GetIdx(),
                )
    return retmol

Example #13

Show file

def multi_prods(mol_list, rxn, debug=False):
    prod1_list = []
    prod2_list = []
    prod3_list = []
    prod4_list = []
    for mol in mol_list:
        if debug:
            print(MolToSmiles(mol))
        try:
            mol.UpdatePropertyCache()
            FastFindRings(mol)
        except:
            print('This mol fails! ' + MolToSmiles(mol))
            #             print('This mol fails! ' +mol)
            continue
        products = rxn.RunReactants((Chem.AddHs(mol), ))
        if products != ():
            for prod in products:
                prod1_list.append(prod[0])
                prod2_list.append(prod[1])
                prod3_list.append(prod[2])
                prod4_list.append(prod[3])
    return prod1_list, prod2_list, prod3_list, prod4_list

Example #14

Show file

def CalculateHydrogenNumber(mol):
    """
    #################################################################
    Calculation of Number of Hydrogen in a molecule

    ---->nhyd

    Usage:

        result=CalculateHydrogenNumber(mol)

        Input: mol is a molecule object.

        Output: result is a numeric value.
    #################################################################
    """
    i = 0
    Hmol = Chem.AddHs(mol)
    for atom in Hmol.GetAtoms():
        if atom.GetAtomicNum() == 1:
            i = i + 1

    return i

Example #15

Show file

def convert_sugar_forms(molecule):
    rxn1 = '[O:1]1[C:2]([C:8])[C:3][C:4][C:5][C:6]1[O:7][H]>>[H][O:1][C:2]([C:8])[C:3][C:4][C:5][C:6]=[O:7]'
    rxn2 = '[O:6]1[C:2]([O:7][H])([C:1])[C:3][C:4][C:5]1>>[C:1][C:2](=[O:7])[C:3][C:4][C:5][O:6][H]'
    rxn3 = '[O:1]1[C:2]([H:8])[C:3][C:4][C:5][C:6]1[O:7][H]>>[H][O:1][C:2]([H:8])[C:3][C:4][C:5][C:6]=[O:7]'
    rxn4 = '[O:6]1[C:2]([O:7][H])([H:1])[C:3][C:4][C:5]1>>[H:1][C:2](=[O:7])[C:3][C:4][C:5][O:6][H]'
    sugarrxns = [AllChem.ReactionFromSmarts(rxn1), 
                 AllChem.ReactionFromSmarts(rxn2),
                 AllChem.ReactionFromSmarts(rxn3),
                 AllChem.ReactionFromSmarts(rxn4)]
    rxnproducts = [molecule]
    seen = set()
    seen.add(Chem.MolToSmiles(Chem.RemoveHs(molecule), isomericSmiles=True))    
    for sugarrxn in sugarrxns:
        prods = sugarrxn.RunReactants((molecule,))
        for p in traverse(prods):
            
            smilesprod = Chem.MolToSmiles(p, isomericSmiles=True)
            if smilesprod not in seen:
                pmol = Chem.MolFromSmiles(smilesprod)
                pmol = Chem.AddHs(pmol)
                seen.add(smilesprod)
                rxnproducts.append(pmol)
    return rxnproducts

Example #16

Show file

File: mol_preprocessor.py Project: zizai/chainer-chemistry

    def prepare_smiles_and_mol(self, mol):
        """Prepare `smiles` and `mol` used in following preprocessing.

        This method is called before `get_input_features` is called, by parser
        class.
        This method may be overriden to support custom `smile`/`mol` extraction

        Args:
            mol (mol): mol instance

        Returns (tuple): (`smiles`, `mol`)
        """
        # Note that smiles expression is not unique.
        # we obtain canonical smiles which is unique in `mol`
        canonical_smiles = Chem.MolToSmiles(mol,
                                            isomericSmiles=False,
                                            canonical=True)
        mol = Chem.MolFromSmiles(canonical_smiles)
        if self.add_Hs:
            mol = Chem.AddHs(mol)
        if self.kekulize:
            Chem.Kekulize(mol)
        return canonical_smiles, mol

Example #17

Show file

File: fingerprints.py Project: autolife/descriptors

def calculate_and_write_fp(title, mol, outf, _type, n, nbits, compress):
    if _type == 1:  #ecfp
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, n, nBits=nbits)
        bit_string = fp.ToBitString()
    elif _type == 2:  #fcfp
        fp = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                   n,
                                                   nBits=nbits,
                                                   useFeatures=True)
        bit_string = fp.ToBitString()
    elif _type == 3:  #MACCS
        mol = Chem.AddHs(mol)
        fp = MACCSkeys.GenMACCSKeys(mol)
        bit_string = fp.ToBitString()[1:]
    else:
        pass
    outf.write(title)
    if compress:
        outf.write("\t" + bit_string)
    else:
        for bit in bit_string:
            outf.write("\t" + bit)
    outf.write("\n")

Example #18

Show file

File: gnnfeat.py Project: AspirinCode/jova_baselines

 def _featurize(self, mol, smiles):
     """
     Featurizes a compound as described in the paper cited above.
     :param mol:
     :param smiles:
     :return:
     """
     mol = Chem.AddHs(mol)  # Consider hydrogens.
     # Process each fragment in the compound separately and join the fingerprints of all fragments to form the
     # fingerprint of the compound/molecule.
     # We think this provides a better handling of SMILES with '.' in them (Disconnected structures)
     # The original codes of the aforecited paper removes all such samples.
     fragments = Chem.GetMolFrags(mol, asMols=True)
     frag_fingerprints = []
     for frag_mol in fragments:
         atoms = create_atoms(self, frag_mol)
         i_jbond_dict = create_ijbonddict(self, frag_mol)
         fingerprints = extract_fingerprints(self, atoms, i_jbond_dict,
                                             self.radius)
         frag_fingerprints.append(fingerprints)
     fingerprints = np.concatenate(frag_fingerprints)
     adjacency = create_adjacency(mol)
     return GnnMol(mol, fingerprints, adjacency, smiles)

Example #19

Show file

def parse_smiles(smiles):
    # load in rdkit
    from rdkit import Chem
    from rdkit.Chem import AllChem

    # construct rdkir object
    m = Chem.MolFromSmiles(smiles)
    m2 = Chem.AddHs(m)
    AllChem.EmbedMolecule(m2)

    # parse mol file and obtain E & G
    lines = Chem.MolToMolBlock(m2).split('\n')
    E = []
    G = []
    for line in lines:
        fields = line.split()
        if len(fields) > 5 and fields[0] != 'M' and fields[-1] != 'V2000':
            E += [fields[3]]
            geo = [float(x) for x in fields[:3]]
            G += [geo]

    G = np.array(G)
    return E, G

Example #20

Show file

File: make_conformers.py Project: ConorFWild/pandda_science

def make_conformers(
    smiles_path,
    output_dir,
    num_confs=300,
    rms_thresh=1.5,
):
    smiles_string = smiles_from_path(smiles_path)
    m = Chem.MolFromSmiles(smiles_string)
    m2 = Chem.AddHs(m)
    cids = AllChem.EmbedMultipleConfs(
        m2,
        numConfs=num_confs,
        pruneRmsThresh=rms_thresh,
    )
    m3 = Chem.RemoveHs(m2)
    for i, conformer in enumerate(cids):
        output_conformer(
            m3,
            conformer,
            output_dir / "{}.pdb".format(i),
        )

    return len(cids), output_dir

Example #21

Show file

def from_smarts(smarts, nconfs=1, name=None, forcefield=None, rms=0.1):
    """
    Generates PLAMS molecule(s) from a smarts strings.
    This allows for example to define hydrogens explicitly.
    However it is less suitable for aromatic molecules (use from_smiles in that case).

    :parameter str smarts: A smarts string
    :parameter int nconfs: Number of conformers to be generated
    :parameter str name: A name for the molecule
    :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
        optimization and ranking of comformations. The default value None results
        in skipping of the geometry optimization step.
    :parameter float rms: Root Mean Square deviation threshold for removing
        similar/equivalent conformations.
    :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
    :rtype: |Molecule| or list of PLAMS Molecules
    """
    smiles = str(smarts.split()[0])
    mol = Chem.MolFromSmarts(smiles)
    Chem.SanitizeMol(mol)
    molecule = Chem.AddHs(mol)
    molecule.SetProp('smiles', smiles)
    return get_conformations(molecule, nconfs, name, forcefield, rms)

Example #22

Show file

    def test0AddHds(self):
        mol = Chem.MolFromSmiles("CC")
        conf = Chem.Conformer(1)
        conf.SetAtomPosition(0, Point3D(-0.5, 0.0, 0.0))
        conf.SetAtomPosition(1, Point3D(1.0, 0.0, 0.0))
        cid = mol.AddConformer(conf)

        conf2 = mol.GetConformer()
        self.assertTrue(conf2.GetNumAtoms() == 2)

        nmol = Chem.AddHs(mol, 0, 1)
        conf3 = nmol.GetConformer()
        self.assertTrue(conf3.GetNumAtoms() == 8)
        self.assertTrue(conf2.GetNumAtoms() == 2)

        targetCoords = [[-0.5, 0.0, 0.0], [1.0, 0.0, 0.0],
                        [-0.8667, 0.0, 1.03709], [-0.8667, 0.8981, -0.5185],
                        [-0.8667, -0.8981, -0.5185], [1.3667, 0.0, -1.0371],
                        [1.36667, 0.8981, 0.5185], [1.36667, -0.8981, 0.5185]]

        for i in range(8):
            pt = conf3.GetAtomPosition(i)
            self.assertTrue(ptEq(pt, Point3D(*tuple(targetCoords[i]))))

Example #23

Show file

File: edit_entry.py Project: ACasey13/ACS-JCIM-3D-Convnet

def canon(df, idx):
    print('trying to canonize smile for idx: {}'.format(idx))
    try:
        smile = df.loc[idx]['smiles']
        m = Chem.MolFromSmiles(smile)
        m = Chem.AddHs(m)
        c_smile = Chem.MolToSmiles(m)
        df.loc[idx, 'c_smiles'] = c_smile
        df.loc[idx, 'status'] = 0

        formula = CalcMolFormula(m)
        if 'Cl' in formula:
            formula = formula.replace('Cl', '')
            formula = formula + 'Cl'

        df.loc[idx, 'Formula'] = formula

    except Exception as e:
        df.loc[idx, 'status'] = -2
        print("could not convert smile {} of molecule {} : {}".format(
            smile, idx, df.loc[idx]['Name']))
        print('Exception: {}'.format(e))
    return df

Example #24

Show file

    def generate_conformers(self, savefolder, savename="molecule_conformers", filetype="pdb",
                            savefolder_exist_ok=False, num_confs=400):
        """
        Generates ligand conformer and saves the results to a folder.


        Parameters
        ----------
        savefolder: str
            Path to directory where the results will be saved
        savename: str
           Name of the generated files. example filename: <savename>_1.pdb
        filetype: str
           must be 'pdb' or 'mol2'
        savefolder_exist_ok: bool
           if false returns an error if savefolder already exsits
        Nconformers: int
           Number of conforer to generate.

        """
        from rdkit.Chem import AllChem
        os.makedirs(savefolder, exist_ok=savefolder_exist_ok)

        mol = deepcopy(self._mol)
        mol = Chem.AddHs(mol)
        ids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, pruneRmsThresh=1., maxAttempts=10000)
        for id in ids:
            AllChem.UFFOptimizeMolecule(mol, confId=id)
        for index, id in enumerate(ids):
            if filetype == "pdb":
                chemwrite = Chem.PDBWriter
            elif filetype == "sdf":
                chemwrite = Chem.SDWriter
            else:
                raise ValueError("Unknown file format. Cannot save to format '{}'".format(filetype))
            writer = chemwrite(os.path.join(savefolder, '{}_{}.{}'.format(savename, index + 1, filetype)))
            writer.write(mol, confId=id)

Example #25

Show file

File: _rdkit.py Project: sjklipp/autochem

def to_conformers(rdm, nconfs):
    """ Generate molecular geometries for a set of conformers
        from am RDKit molecule object.

        Currently not removing redundant conformers.

        :param rdm: molecule object
        :type rdm: RDKit molecule object
        :param nconfs: number of conformers to generate
        :type nconfs: int
        :rtype: automol geometry data structure
    """

    rdm = _rd_chem.AddHs(rdm)
    atms = rdm.GetAtoms()
    natms = len(rdm.GetAtoms())
    geos = []
    if natms == 1:
        syms = [str(atms[0].GetSymbol()).title()]
        xyzs = [(0., 0., 0.)]
        geos.append(
            automol.create.geom.from_data(syms, xyzs, angstrom=True))
    else:
        cids = _rd_all_chem.EmbedMultipleConfs(rdm, numConfs=nconfs)
        res = _rd_all_chem.MMFFOptimizeMoleculeConfs(rdm)
        energies = list(zip(*res))[1]
        for cid in cids:
            syms = tuple(str(rda.GetSymbol()).title() for rda in atms)
            xyzs = tuple(map(tuple, rdm.GetConformer(cid).GetPositions()))
            geos.append(
                automol.create.geom.from_data(syms, xyzs, angstrom=True))
        # Sort geometries using the energies
        geos = [
            x for _, x in sorted(zip(energies, geos), key=lambda pair: pair[0])
        ]

    return geos

Example #26

Show file

def mol_to_hg(mol, kekulize, add_Hs):
    """
    get a bipartite representation of a molecule.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        molecule object
    kekulize : bool
        kekulize or not
    add_Hs : bool
        add implicit hydrogens to the molecule or not.

    Returns
    -------
    Hypergraph
    """
    if add_Hs:
        mol = Chem.AddHs(mol)

    if kekulize:
        Chem.Kekulize(mol)

    bipartite_g = mol_to_bipartite(mol, kekulize)
    hg = Hypergraph()
    for each_atom in [
            each_node for each_node in bipartite_g.nodes()
            if each_node.startswith('atom_')
    ]:
        node_set = set([])
        for each_bond in bipartite_g.adj[each_atom]:
            hg.add_node(each_bond,
                        attr_dict=bipartite_g.node[each_bond]['bond_attr'])
            node_set.add(each_bond)
        hg.add_edge(node_set,
                    attr_dict=bipartite_g.node[each_atom]['atom_attr'])
    return hg

Example #27

Show file

File: vae2.py Project: jameslee97/adaptive_learning_mixtures

def generate_structures(vae, smi, char_to_index, limit=1e4, write=False):
    rdkit_mols = []
    temps = []
    iterations = []
    iteration = limit_counter = 0
    while True:
        iteration += 1
        limit_counter += 1
        t = random.random() * 2
        candidate = decode_smiles(vae, smi, char_to_index,
                                  temp=t).split(" ")[0]
        try:
            sampled = Chem.MolFromSmiles(candidate)
            cation = Chem.AddHs(sampled)
            Chem.EmbedMolecule(cation, Chem.ETKDG())
            Chem.UFFOptimizeMolecule(cation)
            cation = Chem.RemoveHs(cation)
            candidate = Chem.MolToSmiles(cation)
            if candidate not in rdkit_mols:
                temps.append(t)
                iterations.append(iteration)
                rdkit_mols.append(candidate)
                limit_counter = 0
                df = pd.DataFrame([rdkit_mols, temps, iterations]).T
                df.columns = ['smiles', 'temperature', 'iteration']
                print(df)
        except:
            pass
        if limit_counter > limit:
            break
        if write:
            df = pd.DataFrame([rdkit_mols, temps, iterations]).T
            df.columns = ['smiles', 'temperature', 'iteration']
            pd.DataFrame.to_csv(df,
                                path_or_buf='{}.csv'.format(write),
                                index=False)
    return df

Example #28

Show file

    def from_smiles(cls, smiles_input, neutralize_molecule=True):
        """ A SMILES-string is used to generate the Molecules InChI-string and it's graph. """
        # molecule instantiation and adding the smiles-string:
        start_time = time.perf_counter()
        mdh_mol = cls()

        # adding InChI-string to the molecule (for finding entries in database) and canonicalization of SMILES:
        molecule = Chem.MolFromSmiles(smiles_input)

        # neutralize atoms
        if neutralize_molecule:
            molecule = neutralize_atoms(molecule)
        mdh_mol.inchi = AllChem.MolToInchi(
            molecule)  # TODO: producing warning, generate inchi's separately?
        mdh_mol.smiles = Chem.MolToSmiles(molecule)

        # generate 3D atomic coordinates based on RDKit's EKTD-method:
        molecule = Chem.AddHs(molecule)
        AllChem.EmbedMolecule(molecule, randomSeed=0xF00D)
        molblock = Chem.MolToMolBlock(molecule)

        # add metadata:
        mdh_mol.coordinate_metadata[0] = rdkit.__name__
        mdh_mol.coordinate_metadata[1] = rdkit.__version__
        mdh_mol.coordinate_metadata[2] = "EKTG"  # TODO: change to ETKDG
        mdh_mol.coordinate_metadata[4] = datetime.datetime.utcnow()

        # 5-tuple: [software, version, method, calc.-time, datetime]
        # self.coordinate_metadata = [None, None, None, None, None]

        # generate/extract cartesian coordinates and add them to the molecule:
        mdh_mol.molblock_data_extraction(molblock, rdkit_molblock=True)

        mdh_mol.coordinate_metadata[3] = round(
            time.perf_counter() - start_time, 3)

        return mdh_mol

Example #29

Show file

File: RDKitPerformMinimization.py Project: sirimullalab/LigandNet

def MinimizeMolecule(Mol, MolCount, Writer):
    "Minimize moleculer and write it out"

    if OptionsInfo["AddHydrogens"]:
        Mol = Chem.AddHs(Mol)

    Status = 0
    try:
        if OptionsInfo["UseUFF"]:
            Status = AllChem.UFFOptimizeMolecule(
                Mol, maxIters=OptionsInfo["MaxIters"])
        elif OptionsInfo["UseMMFF"]:
            Status = AllChem.MMFFOptimizeMolecule(
                Mol, maxIters=OptionsInfo["MaxIters"])
        else:
            MiscUtil.PrintError(
                "Minimization couldn't be performed: Specified forcefield, %s, is not supported"
                % OptionsInfo["ForceField"])
    except RuntimeError as ErrMsg:
        MolName = RDKitUtil.GetMolName(Mol, MolCount)
        MiscUtil.PrintWarning(
            "Minimization couldn't be performed for molecule %s:\n%s\n" %
            (MolName, ErrMsg))
        return False

    if Status != 0:
        MolName = RDKitUtil.GetMolName(Mol, MolCount)
        MiscUtil.PrintWarning(
            "Minimization failed to converge for molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n"
            % (MolName, OptionsInfo["MaxIters"]))

    if OptionsInfo["RemoveHydrogens"]:
        Mol = Chem.RemoveHs(Mol)

    Writer.write(Mol)

    return True

Example #30

Show file

def fragment_mol(smi, smi_id=''):

    mol = Chem.MolFromSmiles(smi)

    outlines = set()

    if mol is None:
        sys.stderr.write("Can't generate mol for: %s\n" % smi)
    else:
        # heavy atoms
        frags = rdMMPA.FragmentMol(mol,
                                   pattern="[!#1]!@!=!#[!#1]",
                                   maxCuts=4,
                                   resultsAsMols=False,
                                   maxCutBonds=30)
        frags += rdMMPA.FragmentMol(mol,
                                    pattern="[!#1]!@!=!#[!#1]",
                                    maxCuts=3,
                                    resultsAsMols=False,
                                    maxCutBonds=30)
        frags = set(frags)
        for core, chains in frags:
            output = '%s,%s,%s,%s\n' % (smi, smi_id, core, chains)
            outlines.add(output)
        # hydrogen splitting
        mol = Chem.AddHs(mol)
        n = mol.GetNumAtoms() - mol.GetNumHeavyAtoms()
        if n < 60:
            frags = rdMMPA.FragmentMol(mol,
                                       pattern="[#1]!@!=!#[!#1]",
                                       maxCuts=1,
                                       resultsAsMols=False,
                                       maxCutBonds=100)
            for core, chains in frags:
                output = '%s,%s,%s,%s\n' % (smi, smi_id, core, chains)
                outlines.add(output)
    return outlines