Example #1
0
def create_dataset(args):
    from rdkit import Chem

    filename = os.path.join('dataset', '%s.pth' % (args.dataset))
    inputfile = os.path.join('dataset', '%s.txt' % (args.dataset))

    dataset = []

    # Load a dataset.
    with open(inputfile, 'r') as f:
        lines = f.readlines()

        for index, line in enumerate(lines, 1):
            smiles, property = line.strip('\n').split(' ')

            # Exclude the data contains '.' in its smiles.
            if '.' in smiles:
                continue

            # Create each data with the above defined functions.
            mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
            atoms = create_atoms(mol, atom_dict)
            molecular_size = len(atoms)
            ij_bond_dict = create_ij_bond_dict(mol, bond_dict)
            fingerprints = extract_fingerprints(args.radius, atoms,
                                                ij_bond_dict, fingerprint_dict,
                                                edge_dict)
            adjacency = Chem.GetAdjacencyMatrix(mol)

            #print(fingerprints.shape, adjacency.shape, molecular_size, property)
            #print(fingerprints, adjacency, molecular_size, property)

            dataset.append((fingerprints, adjacency, molecular_size, property))

            print('\r%s: %5d/%5d' % (filename, index, len(lines)), end='')
        print()

    return dataset
Example #2
0
def generate_conformations(m, n):
    
    mol = Chem.AddHs(m)
    ids=ids = AllChem.EmbedMultipleConfs(mol,numConfs=n,useExpTorsionAnglePrefs=True,useBasicKnowledge=True)
    #ids=ids = AllChem.EmbedMultipleConfs(mol,numConfs=n)
    results ={}
    
    for i in ids:   
        
        try:

            if Chem.rdForceFieldHelpers.MMFFHasAllMoleculeParams(mol):
                #print("MMFF")
                ff = AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol), confId=i)
                ff.Initialize()
                ff.CalcEnergy()
      
                if MAP_paths.MINI_Iterations > 0:
                    AllChem.MMFFOptimizeMolecule(mol, confId=i)
                    results[i] = ff.CalcEnergy()

            elif Chem.rdForceFieldHelpers.UFFHasAllMoleculeParams(mol): 
                #print("UFF")
                ff = AllChem.UFFGetMoleculeForceField(mol, confId=i)
                ff.Initialize()
                ff.CalcEnergy()
                
                if MAP_paths.MINI_Iterations > 0:
                    AllChem.UFFOptimizeMolecule(mol, confId=i)
                    results[i] = ff.CalcEnergy()
            else:
                print(">> ERROR: missing force field parameters for atom(s) in your molecule.")
                return                
        except:
            print(">> ERROR: something went wrong in force field minimization.")
            return
        
    return mol, results
Example #3
0
def generate_3d_structure(smi, obabel=1):
    """
    Method to generate the 3D coordinates of a molecule from its smiles
    The default code is OpenBabel, RDKit can also be used.
    """
    structure = []
    if obabel:  # use OpenBabel
        obmol = pybel.readstring('smi', smi)
        obmol.OBMol.AddHydrogens()
        obmol.make3D()
        bond = np.zeros((len(obmol.atoms), len(obmol.atoms)), dtype=int)
        for i in range(len(obmol.atoms)):
            for j in range(len(obmol.atoms)):
                if not obmol.OBMol.GetBond(i + 1, j + 1) is None:
                    order = obmol.OBMol.GetBond(i + 1, j + 1).GetBO()
                    bond[i][j] = order
        for at in obmol.atoms:
            pos = at.coords
            sym = num_to_syms[at.atomicnum]
            structure += [sym, pos[0], pos[1], pos[2]]
        return obmol, structure, bond
    else:  # use RDKit
        rdmol = Chem.AddHs(Chem.MolFromSmiles(smi))
        AllChem.EmbedMolecule(rdmol, AllChem.ETKDG())
        AllChem.MMFFOptimizeMolecule(rdmol)
        atoms = rdmol.GetAtoms()
        bond = np.zeros((len(atoms), len(atoms)), dtype=int)
        for i in range(len(rdmol.GetAtoms())):
            for j in range(len(rdmol.GetAtoms())):
                if not rdmol.GetBondBetweenAtoms(i, j) is None:
                    b = rdmol.GetBondBetweenAtoms(i, j)
                    order = int(b.GetBondTypeAsDouble())
                    bond[i][j] = order
        for i, atom in enumerate(rdmol.GetAtoms()):
            pos = rdmol.GetConformer(0).GetAtomPosition(i)
            sym = atom.GetSymbol()
            structure += [sym, pos.x, pos.y, pos.z]
        return rdmol, structure, bond
Example #4
0
def xyzfromsmi(smi):
  mol = Chem.MolFromSmiles(smi)
  mol = Chem.AddHs(mol)
  AllChem.EmbedMolecule(mol)
  Chem.Kekulize(mol, clearAromaticFlags=True)
  #print(smi, mol.GetNumAtoms())
  try:
    mol_ = Chem.RemoveHs(mol)
    with open('fragment_lookup/tmp.mol', "w") as FILE:
      FILE.write(Chem.MolToMolBlock(mol_))
    xyz_coordinates=list()
    #print(Chem.MolToMolBlock(mol))
    bashCommand = 'obabel -imol fragment_lookup/tmp.mol -oxyz --gen3d -xb'
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, error = process.communicate()
    output=[x.split(' ') for x in output.decode("utf-8").split("\n")[2::] ]
    for i, x_ in enumerate(output):
      #vprint(x_,len(x_))
      if len(x_) > 3:
        xyz_coordinates.append([float(x) for x in x_[1::] if len(x) > 0])
  except:
    xyz_coordinates=list()
    #print(Chem.MolToMolBlock(mol))
    with open('tmp','w') as FILE:
      FILE.write(smi)
    bashCommand = 'obabel -ismi tmp -oxyz --gen3d -xb '
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, error = process.communicate()
    #print(output)
    output=[x.split(' ') for x in output.decode("utf-8").split("\n")[2::] ]
    for i, x_ in enumerate(output):
      #print(x_,len(x_))
      if len(x_) > 3:  
        xyz_coordinates.append([float(x) for x in x_[1::] if len(x) > 0]) 
  #print(coords)
  atoms = [atom2label(atom.GetSymbol()) for atom in mol.GetAtoms()]
  #print(atoms)
  return atoms, xyz_coordinates
Example #5
0
def get_charge(mol, property_name, do_charge):

    from rdkit.Chem import AllChem as Chem
    err = 0

    # partial charges
    if do_charge is False:
        err = check_mol(mol, property_name, do_charge)
        if err == 0:
            # prepares molecule
            mol = Chem.RemoveHs(mol)
            n_at = mol.GetNumAtoms()
            # takes properties
            list_prop = mol.GetPropsAsDict()
            # extracts the property according to the set name
            string_values = list_prop[property_name]
            string_values = string_values.split("\n")
            w = np.asarray(map(float, string_values))
        else:
            mol = Chem.AddHs(mol)
            n_at = mol.GetNumAtoms()
            w = np.ones((n_at, 1)) / n_at
            # same format as previous calculation
            w = np.asarray(map(float, w))
            property_name = 'equal_w'
            err = 0
        # extract properties
        for atom in range(n_at):
            mol.GetAtomWithIdx(atom).SetDoubleProp(property_name, w[atom])

        mol = Chem.RemoveHs(mol)

    # Gasteiger-Marsili Charges
    elif (do_charge is True) and (err is 0):
        Chem.ComputeGasteigerCharges(mol)
        err = check_mol(mol, property_name, do_charge)

    return mol, property_name, err
Example #6
0
def calc_props_dekois(smiles):
    # Create RDKit mol
    try:
        mol = Chem.MolFromSmiles(smiles)
        mol = Chem.AddHs(mol)
        # Calculate properties and store in dict
        prop_dict = {}
        # molweight
        prop_dict.update({'mol_wg': Descriptors.MolWt(mol)})
        # logP
        prop_dict.update({'log_p': Chem.Crippen.MolLogP(mol)})
        # HBA
        prop_dict.update(
            {'hba': Chem.rdMolDescriptors.CalcNumLipinskiHBA(mol)})
        # HBD
        prop_dict.update(
            {'hbd': Chem.rdMolDescriptors.CalcNumLipinskiHBD(mol)})
        # aromatic ring count
        prop_dict.update(
            {'ring_ct': Chem.rdMolDescriptors.CalcNumAromaticRings(mol)})
        # rotatable bonds
        prop_dict.update(
            {'rot_bnds': Chem.rdMolDescriptors.CalcNumRotatableBonds(mol)})
        # Formal charges
        pos, neg = calc_charges(mol)
        prop_dict.update({'pos_charge': pos})
        prop_dict.update({'neg_charge': neg})

        prop_array = [
            prop_dict['mol_wg'], prop_dict['log_p'], prop_dict['hba'],
            prop_dict['hbd'], prop_dict['ring_ct'], prop_dict['rot_bnds'],
            prop_dict['pos_charge'], prop_dict['neg_charge']
        ]

        return (prop_dict, prop_array)

    except:
        return ({}, [0, 0, 0, 0, 0, 0, 0, 0])
Example #7
0
def getLDI(mol):
    """
    #################################################################
    Calculation of local dipole index (D)
    #################################################################
    """
    Hmol = Chem.AddHs(mol)
    GMCharge.ComputeGasteigerCharges(Hmol, iter_step)
    res = []
    for atom in Hmol.GetAtoms():
        charge = float(atom.GetProp('_GasteigerCharge'))
        if not math.isnan(charge) and not charge == numpy.inf:
            res.append(charge)
        else:
            res.append(0.0)
    cc = [
        numpy.absolute(res[x.GetBeginAtom().GetIdx()] -
                       res[x.GetEndAtom().GetIdx()]) for x in Hmol.GetBonds()
    ]
    B = len(Hmol.GetBonds())
    if B == 0:
        return 0.0
    return round(sum(cc) / B, 6)
def get_mol(xyz, smiles, with_conformer=True):
    """
    Get an RDKit mol from an xyz and smiles. Note that this
    assumes that the xyz is ordered in the same way an RDKit
    object of the same smiles would be ordered, and that there
    is no change in connectivity between the RDKit mol and the
    xyz.

    Args:
        xyz (torch.Tensor): atom type and xyz of geometry.
        smiles (str): SMILES string
        with_conformer (bool): also add conformer to the RDKit mol
    Returns:
        mol (rdkit.Chem.rdchem.Mol): RDKit mol object
    """

    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    if with_conformer:
        conformer = get_conformer(xyz)
    mol.AddConformer(conformer)

    return mol
Example #9
0
def opt_mol_from_smi(smi, save_name):
    """
    opt mol from smiles
    """
    mol = Chem.MolFromSmiles(smi)

    mol = Chem.AddHs(mol)

    AllChem.EmbedMolecule(mol, randomSeed=3)

    AllChem.MMFFOptimizeMolecule(mol)
    mol = Chem.rdmolops.RemoveHs(mol)

    writer = Chem.SDWriter(save_name)
    mol.SetProp('_Name', 'Chemistry ' + save_name)
    mol.SetProp('STEREOCHEM', str(1.00))
    mol.SetProp('EF', str(1.00))
    mol.SetProp('MOL_WEIGHT', str(1.00))
    mol.SetProp('COMPOUND_ID', str(1.00))
    mol.SetProp('SUPPLIER', str(1.00))
    mol.SetProp('COMMEN', str(1.00))
    writer.write(mol)
    return save_name + ".mol"
Example #10
0
def create_3d_sdf_from_smiles(smiles_zincid_dict, decoy_sdf):

    '''
    create 3DF sdf files using rdkit
    :param smiles_zincid_dict: dictionary of smiles and molecule name
    :param decoy_sdf: 3d sdf
    :return: combined sdf file
    '''
    writer = Chem.SDWriter(decoy_sdf)

    for k, v in smiles_zincid_dict.items():
        #print(v)
        try:
            mol = Chem.MolFromSmiles(k)
            molH = Chem.AddHs(mol)

            AllChem.EmbedMolecule(molH, useRandomCoords=True)
            AllChem.UFFOptimizeMolecule(molH)
            molH.SetProp("_Name", "ZIN"+v)
            writer.write(molH)
        except:
            print(("no conformer for Zin" + "{}").format(v))
    writer.close()
Example #11
0
def build_from_smiles(smiles_pattern, protonate=False, openff_compatible=True):

    mol = Chem.MolFromSmiles(smiles_pattern, sanitize=False)

    flags = Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_SETAROMATICITY

    if not protonate:
        flags ^= Chem.SanitizeFlags.SANITIZE_ADJUSTHS

    Chem.SanitizeMol(mol, flags)

    if protonate:
        mol = Chem.AddHs(mol)

    Chem.SetAromaticity(mol, Chem.AromaticityModel.AROMATICITY_MDL)
    Chem.SanitizeMol(mol, Chem.SanitizeFlags.SANITIZE_SETAROMATICITY)

    if openff_compatible:
        # Needed since the TK doesn't like float bond orders
        # This means it will essentially throw out the : specifier?
        Chem.Kekulize(mol)

    return mol
Example #12
0
def add_prot_Hs(rdmol):
    """
    Add hydrogens to molecules read from PDB
    Makes sure that the hydrogens get the correct PDBResidue info
    """
    retmol = Chem.AddHs(rdmol, addCoords=True)
    for atom in retmol.GetAtoms():
        if atom.GetPDBResidueInfo() is None and atom.GetSymbol() == "H":
            bond = atom.GetBonds()[0]
            if bond.GetBeginAtom().GetIdx() == atom.GetIdx:
                connected_atom = bond.GetEndAtom()
            else:
                connected_atom = bond.GetBeginAtom()
            try:
                ResInfo = connected_atom.GetPDBResidueInfo()
                atom.SetMonomerInfo(ResInfo)
            except:
                print(
                    "Hydrogen annotation failed:",
                    connected_atom.GetIdx(),
                    atom.GetIdx(),
                )
    return retmol
Example #13
0
def multi_prods(mol_list, rxn, debug=False):
    prod1_list = []
    prod2_list = []
    prod3_list = []
    prod4_list = []
    for mol in mol_list:
        if debug:
            print(MolToSmiles(mol))
        try:
            mol.UpdatePropertyCache()
            FastFindRings(mol)
        except:
            print('This mol fails! ' + MolToSmiles(mol))
            #             print('This mol fails! ' +mol)
            continue
        products = rxn.RunReactants((Chem.AddHs(mol), ))
        if products != ():
            for prod in products:
                prod1_list.append(prod[0])
                prod2_list.append(prod[1])
                prod3_list.append(prod[2])
                prod4_list.append(prod[3])
    return prod1_list, prod2_list, prod3_list, prod4_list
Example #14
0
def CalculateHydrogenNumber(mol):
    """
    #################################################################
    Calculation of Number of Hydrogen in a molecule

    ---->nhyd

    Usage:

        result=CalculateHydrogenNumber(mol)

        Input: mol is a molecule object.

        Output: result is a numeric value.
    #################################################################
    """
    i = 0
    Hmol = Chem.AddHs(mol)
    for atom in Hmol.GetAtoms():
        if atom.GetAtomicNum() == 1:
            i = i + 1

    return i
Example #15
0
def convert_sugar_forms(molecule):
    rxn1 = '[O:1]1[C:2]([C:8])[C:3][C:4][C:5][C:6]1[O:7][H]>>[H][O:1][C:2]([C:8])[C:3][C:4][C:5][C:6]=[O:7]'
    rxn2 = '[O:6]1[C:2]([O:7][H])([C:1])[C:3][C:4][C:5]1>>[C:1][C:2](=[O:7])[C:3][C:4][C:5][O:6][H]'
    rxn3 = '[O:1]1[C:2]([H:8])[C:3][C:4][C:5][C:6]1[O:7][H]>>[H][O:1][C:2]([H:8])[C:3][C:4][C:5][C:6]=[O:7]'
    rxn4 = '[O:6]1[C:2]([O:7][H])([H:1])[C:3][C:4][C:5]1>>[H:1][C:2](=[O:7])[C:3][C:4][C:5][O:6][H]'
    sugarrxns = [AllChem.ReactionFromSmarts(rxn1), 
                 AllChem.ReactionFromSmarts(rxn2),
                 AllChem.ReactionFromSmarts(rxn3),
                 AllChem.ReactionFromSmarts(rxn4)]
    rxnproducts = [molecule]
    seen = set()
    seen.add(Chem.MolToSmiles(Chem.RemoveHs(molecule), isomericSmiles=True))    
    for sugarrxn in sugarrxns:
        prods = sugarrxn.RunReactants((molecule,))
        for p in traverse(prods):
            
            smilesprod = Chem.MolToSmiles(p, isomericSmiles=True)
            if smilesprod not in seen:
                pmol = Chem.MolFromSmiles(smilesprod)
                pmol = Chem.AddHs(pmol)
                seen.add(smilesprod)
                rxnproducts.append(pmol)
    return rxnproducts
    def prepare_smiles_and_mol(self, mol):
        """Prepare `smiles` and `mol` used in following preprocessing.

        This method is called before `get_input_features` is called, by parser
        class.
        This method may be overriden to support custom `smile`/`mol` extraction

        Args:
            mol (mol): mol instance

        Returns (tuple): (`smiles`, `mol`)
        """
        # Note that smiles expression is not unique.
        # we obtain canonical smiles which is unique in `mol`
        canonical_smiles = Chem.MolToSmiles(mol,
                                            isomericSmiles=False,
                                            canonical=True)
        mol = Chem.MolFromSmiles(canonical_smiles)
        if self.add_Hs:
            mol = Chem.AddHs(mol)
        if self.kekulize:
            Chem.Kekulize(mol)
        return canonical_smiles, mol
Example #17
0
def calculate_and_write_fp(title, mol, outf, _type, n, nbits, compress):
    if _type == 1:  #ecfp
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, n, nBits=nbits)
        bit_string = fp.ToBitString()
    elif _type == 2:  #fcfp
        fp = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                   n,
                                                   nBits=nbits,
                                                   useFeatures=True)
        bit_string = fp.ToBitString()
    elif _type == 3:  #MACCS
        mol = Chem.AddHs(mol)
        fp = MACCSkeys.GenMACCSKeys(mol)
        bit_string = fp.ToBitString()[1:]
    else:
        pass
    outf.write(title)
    if compress:
        outf.write("\t" + bit_string)
    else:
        for bit in bit_string:
            outf.write("\t" + bit)
    outf.write("\n")
Example #18
0
 def _featurize(self, mol, smiles):
     """
     Featurizes a compound as described in the paper cited above.
     :param mol:
     :param smiles:
     :return:
     """
     mol = Chem.AddHs(mol)  # Consider hydrogens.
     # Process each fragment in the compound separately and join the fingerprints of all fragments to form the
     # fingerprint of the compound/molecule.
     # We think this provides a better handling of SMILES with '.' in them (Disconnected structures)
     # The original codes of the aforecited paper removes all such samples.
     fragments = Chem.GetMolFrags(mol, asMols=True)
     frag_fingerprints = []
     for frag_mol in fragments:
         atoms = create_atoms(self, frag_mol)
         i_jbond_dict = create_ijbonddict(self, frag_mol)
         fingerprints = extract_fingerprints(self, atoms, i_jbond_dict,
                                             self.radius)
         frag_fingerprints.append(fingerprints)
     fingerprints = np.concatenate(frag_fingerprints)
     adjacency = create_adjacency(mol)
     return GnnMol(mol, fingerprints, adjacency, smiles)
Example #19
0
def parse_smiles(smiles):
    # load in rdkit
    from rdkit import Chem
    from rdkit.Chem import AllChem

    # construct rdkir object
    m = Chem.MolFromSmiles(smiles)
    m2 = Chem.AddHs(m)
    AllChem.EmbedMolecule(m2)

    # parse mol file and obtain E & G
    lines = Chem.MolToMolBlock(m2).split('\n')
    E = []
    G = []
    for line in lines:
        fields = line.split()
        if len(fields) > 5 and fields[0] != 'M' and fields[-1] != 'V2000':
            E += [fields[3]]
            geo = [float(x) for x in fields[:3]]
            G += [geo]

    G = np.array(G)
    return E, G
def make_conformers(
    smiles_path,
    output_dir,
    num_confs=300,
    rms_thresh=1.5,
):
    smiles_string = smiles_from_path(smiles_path)
    m = Chem.MolFromSmiles(smiles_string)
    m2 = Chem.AddHs(m)
    cids = AllChem.EmbedMultipleConfs(
        m2,
        numConfs=num_confs,
        pruneRmsThresh=rms_thresh,
    )
    m3 = Chem.RemoveHs(m2)
    for i, conformer in enumerate(cids):
        output_conformer(
            m3,
            conformer,
            output_dir / "{}.pdb".format(i),
        )

    return len(cids), output_dir
Example #21
0
def from_smarts(smarts, nconfs=1, name=None, forcefield=None, rms=0.1):
    """
    Generates PLAMS molecule(s) from a smarts strings.
    This allows for example to define hydrogens explicitly.
    However it is less suitable for aromatic molecules (use from_smiles in that case).

    :parameter str smarts: A smarts string
    :parameter int nconfs: Number of conformers to be generated
    :parameter str name: A name for the molecule
    :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
        optimization and ranking of comformations. The default value None results
        in skipping of the geometry optimization step.
    :parameter float rms: Root Mean Square deviation threshold for removing
        similar/equivalent conformations.
    :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
    :rtype: |Molecule| or list of PLAMS Molecules
    """
    smiles = str(smarts.split()[0])
    mol = Chem.MolFromSmarts(smiles)
    Chem.SanitizeMol(mol)
    molecule = Chem.AddHs(mol)
    molecule.SetProp('smiles', smiles)
    return get_conformations(molecule, nconfs, name, forcefield, rms)
Example #22
0
    def test0AddHds(self):
        mol = Chem.MolFromSmiles("CC")
        conf = Chem.Conformer(1)
        conf.SetAtomPosition(0, Point3D(-0.5, 0.0, 0.0))
        conf.SetAtomPosition(1, Point3D(1.0, 0.0, 0.0))
        cid = mol.AddConformer(conf)

        conf2 = mol.GetConformer()
        self.assertTrue(conf2.GetNumAtoms() == 2)

        nmol = Chem.AddHs(mol, 0, 1)
        conf3 = nmol.GetConformer()
        self.assertTrue(conf3.GetNumAtoms() == 8)
        self.assertTrue(conf2.GetNumAtoms() == 2)

        targetCoords = [[-0.5, 0.0, 0.0], [1.0, 0.0, 0.0],
                        [-0.8667, 0.0, 1.03709], [-0.8667, 0.8981, -0.5185],
                        [-0.8667, -0.8981, -0.5185], [1.3667, 0.0, -1.0371],
                        [1.36667, 0.8981, 0.5185], [1.36667, -0.8981, 0.5185]]

        for i in range(8):
            pt = conf3.GetAtomPosition(i)
            self.assertTrue(ptEq(pt, Point3D(*tuple(targetCoords[i]))))
def canon(df, idx):
    print('trying to canonize smile for idx: {}'.format(idx))
    try:
        smile = df.loc[idx]['smiles']
        m = Chem.MolFromSmiles(smile)
        m = Chem.AddHs(m)
        c_smile = Chem.MolToSmiles(m)
        df.loc[idx, 'c_smiles'] = c_smile
        df.loc[idx, 'status'] = 0

        formula = CalcMolFormula(m)
        if 'Cl' in formula:
            formula = formula.replace('Cl', '')
            formula = formula + 'Cl'

        df.loc[idx, 'Formula'] = formula

    except Exception as e:
        df.loc[idx, 'status'] = -2
        print("could not convert smile {} of molecule {} : {}".format(
            smile, idx, df.loc[idx]['Name']))
        print('Exception: {}'.format(e))
    return df
Example #24
0
    def generate_conformers(self, savefolder, savename="molecule_conformers", filetype="pdb",
                            savefolder_exist_ok=False, num_confs=400):
        """
        Generates ligand conformer and saves the results to a folder.


        Parameters
        ----------
        savefolder: str
            Path to directory where the results will be saved
        savename: str
           Name of the generated files. example filename: <savename>_1.pdb
        filetype: str
           must be 'pdb' or 'mol2'
        savefolder_exist_ok: bool
           if false returns an error if savefolder already exsits
        Nconformers: int
           Number of conforer to generate.

        """
        from rdkit.Chem import AllChem
        os.makedirs(savefolder, exist_ok=savefolder_exist_ok)

        mol = deepcopy(self._mol)
        mol = Chem.AddHs(mol)
        ids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, pruneRmsThresh=1., maxAttempts=10000)
        for id in ids:
            AllChem.UFFOptimizeMolecule(mol, confId=id)
        for index, id in enumerate(ids):
            if filetype == "pdb":
                chemwrite = Chem.PDBWriter
            elif filetype == "sdf":
                chemwrite = Chem.SDWriter
            else:
                raise ValueError("Unknown file format. Cannot save to format '{}'".format(filetype))
            writer = chemwrite(os.path.join(savefolder, '{}_{}.{}'.format(savename, index + 1, filetype)))
            writer.write(mol, confId=id)
Example #25
0
def to_conformers(rdm, nconfs):
    """ Generate molecular geometries for a set of conformers
        from am RDKit molecule object.

        Currently not removing redundant conformers.

        :param rdm: molecule object
        :type rdm: RDKit molecule object
        :param nconfs: number of conformers to generate
        :type nconfs: int
        :rtype: automol geometry data structure
    """

    rdm = _rd_chem.AddHs(rdm)
    atms = rdm.GetAtoms()
    natms = len(rdm.GetAtoms())
    geos = []
    if natms == 1:
        syms = [str(atms[0].GetSymbol()).title()]
        xyzs = [(0., 0., 0.)]
        geos.append(
            automol.create.geom.from_data(syms, xyzs, angstrom=True))
    else:
        cids = _rd_all_chem.EmbedMultipleConfs(rdm, numConfs=nconfs)
        res = _rd_all_chem.MMFFOptimizeMoleculeConfs(rdm)
        energies = list(zip(*res))[1]
        for cid in cids:
            syms = tuple(str(rda.GetSymbol()).title() for rda in atms)
            xyzs = tuple(map(tuple, rdm.GetConformer(cid).GetPositions()))
            geos.append(
                automol.create.geom.from_data(syms, xyzs, angstrom=True))
        # Sort geometries using the energies
        geos = [
            x for _, x in sorted(zip(energies, geos), key=lambda pair: pair[0])
        ]

    return geos
Example #26
0
def mol_to_hg(mol, kekulize, add_Hs):
    """
    get a bipartite representation of a molecule.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        molecule object
    kekulize : bool
        kekulize or not
    add_Hs : bool
        add implicit hydrogens to the molecule or not.

    Returns
    -------
    Hypergraph
    """
    if add_Hs:
        mol = Chem.AddHs(mol)

    if kekulize:
        Chem.Kekulize(mol)

    bipartite_g = mol_to_bipartite(mol, kekulize)
    hg = Hypergraph()
    for each_atom in [
            each_node for each_node in bipartite_g.nodes()
            if each_node.startswith('atom_')
    ]:
        node_set = set([])
        for each_bond in bipartite_g.adj[each_atom]:
            hg.add_node(each_bond,
                        attr_dict=bipartite_g.node[each_bond]['bond_attr'])
            node_set.add(each_bond)
        hg.add_edge(node_set,
                    attr_dict=bipartite_g.node[each_atom]['atom_attr'])
    return hg
def generate_structures(vae, smi, char_to_index, limit=1e4, write=False):
    rdkit_mols = []
    temps = []
    iterations = []
    iteration = limit_counter = 0
    while True:
        iteration += 1
        limit_counter += 1
        t = random.random() * 2
        candidate = decode_smiles(vae, smi, char_to_index,
                                  temp=t).split(" ")[0]
        try:
            sampled = Chem.MolFromSmiles(candidate)
            cation = Chem.AddHs(sampled)
            Chem.EmbedMolecule(cation, Chem.ETKDG())
            Chem.UFFOptimizeMolecule(cation)
            cation = Chem.RemoveHs(cation)
            candidate = Chem.MolToSmiles(cation)
            if candidate not in rdkit_mols:
                temps.append(t)
                iterations.append(iteration)
                rdkit_mols.append(candidate)
                limit_counter = 0
                df = pd.DataFrame([rdkit_mols, temps, iterations]).T
                df.columns = ['smiles', 'temperature', 'iteration']
                print(df)
        except:
            pass
        if limit_counter > limit:
            break
        if write:
            df = pd.DataFrame([rdkit_mols, temps, iterations]).T
            df.columns = ['smiles', 'temperature', 'iteration']
            pd.DataFrame.to_csv(df,
                                path_or_buf='{}.csv'.format(write),
                                index=False)
    return df
Example #28
0
    def from_smiles(cls, smiles_input, neutralize_molecule=True):
        """ A SMILES-string is used to generate the Molecules InChI-string and it's graph. """
        # molecule instantiation and adding the smiles-string:
        start_time = time.perf_counter()
        mdh_mol = cls()

        # adding InChI-string to the molecule (for finding entries in database) and canonicalization of SMILES:
        molecule = Chem.MolFromSmiles(smiles_input)

        # neutralize atoms
        if neutralize_molecule:
            molecule = neutralize_atoms(molecule)
        mdh_mol.inchi = AllChem.MolToInchi(
            molecule)  # TODO: producing warning, generate inchi's separately?
        mdh_mol.smiles = Chem.MolToSmiles(molecule)

        # generate 3D atomic coordinates based on RDKit's EKTD-method:
        molecule = Chem.AddHs(molecule)
        AllChem.EmbedMolecule(molecule, randomSeed=0xF00D)
        molblock = Chem.MolToMolBlock(molecule)

        # add metadata:
        mdh_mol.coordinate_metadata[0] = rdkit.__name__
        mdh_mol.coordinate_metadata[1] = rdkit.__version__
        mdh_mol.coordinate_metadata[2] = "EKTG"  # TODO: change to ETKDG
        mdh_mol.coordinate_metadata[4] = datetime.datetime.utcnow()

        # 5-tuple: [software, version, method, calc.-time, datetime]
        # self.coordinate_metadata = [None, None, None, None, None]

        # generate/extract cartesian coordinates and add them to the molecule:
        mdh_mol.molblock_data_extraction(molblock, rdkit_molblock=True)

        mdh_mol.coordinate_metadata[3] = round(
            time.perf_counter() - start_time, 3)

        return mdh_mol
def MinimizeMolecule(Mol, MolCount, Writer):
    "Minimize moleculer and write it out"

    if OptionsInfo["AddHydrogens"]:
        Mol = Chem.AddHs(Mol)

    Status = 0
    try:
        if OptionsInfo["UseUFF"]:
            Status = AllChem.UFFOptimizeMolecule(
                Mol, maxIters=OptionsInfo["MaxIters"])
        elif OptionsInfo["UseMMFF"]:
            Status = AllChem.MMFFOptimizeMolecule(
                Mol, maxIters=OptionsInfo["MaxIters"])
        else:
            MiscUtil.PrintError(
                "Minimization couldn't be performed: Specified forcefield, %s, is not supported"
                % OptionsInfo["ForceField"])
    except RuntimeError as ErrMsg:
        MolName = RDKitUtil.GetMolName(Mol, MolCount)
        MiscUtil.PrintWarning(
            "Minimization couldn't be performed for molecule %s:\n%s\n" %
            (MolName, ErrMsg))
        return False

    if Status != 0:
        MolName = RDKitUtil.GetMolName(Mol, MolCount)
        MiscUtil.PrintWarning(
            "Minimization failed to converge for molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n"
            % (MolName, OptionsInfo["MaxIters"]))

    if OptionsInfo["RemoveHydrogens"]:
        Mol = Chem.RemoveHs(Mol)

    Writer.write(Mol)

    return True
Example #30
0
def fragment_mol(smi, smi_id=''):

    mol = Chem.MolFromSmiles(smi)

    outlines = set()

    if mol is None:
        sys.stderr.write("Can't generate mol for: %s\n" % smi)
    else:
        # heavy atoms
        frags = rdMMPA.FragmentMol(mol,
                                   pattern="[!#1]!@!=!#[!#1]",
                                   maxCuts=4,
                                   resultsAsMols=False,
                                   maxCutBonds=30)
        frags += rdMMPA.FragmentMol(mol,
                                    pattern="[!#1]!@!=!#[!#1]",
                                    maxCuts=3,
                                    resultsAsMols=False,
                                    maxCutBonds=30)
        frags = set(frags)
        for core, chains in frags:
            output = '%s,%s,%s,%s\n' % (smi, smi_id, core, chains)
            outlines.add(output)
        # hydrogen splitting
        mol = Chem.AddHs(mol)
        n = mol.GetNumAtoms() - mol.GetNumHeavyAtoms()
        if n < 60:
            frags = rdMMPA.FragmentMol(mol,
                                       pattern="[#1]!@!=!#[!#1]",
                                       maxCuts=1,
                                       resultsAsMols=False,
                                       maxCutBonds=100)
            for core, chains in frags:
                output = '%s,%s,%s,%s\n' % (smi, smi_id, core, chains)
                outlines.add(output)
    return outlines