def get_filter_values(mol): """ calculate the values, for a given molecule, that are used to filter return as a dictionary """ assert isinstance(mol, Chem.Mol) values = {} values["MW"] = desc.CalcExactMolWt(mol) values["logP"] = crip.MolLogP(mol) values["HBA"] = lip.NumHAcceptors(mol) values["HBD"] = lip.NumHDonors(mol) values["tPSA"] = desc.CalcTPSA(mol) values["rot_bonds"] = lip.NumRotatableBonds(mol) values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"] # assume mutual exclusion values["num_rings"] = lip.RingCount(mol) values["num_hetero_atoms"] = lip.NumHeteroatoms(mol) values["charge"] = rdmolops.GetFormalCharge(mol) # trusting this charge calculation method values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol) try: values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"]) except ZeroDivisionError: values["hc_ratio"] = 100000000 # if there are zero carbons values["fc"] = len(list(Brics.FindBRICSBonds(mol))) # how many BRICS bonds, related to complexity values["is_good"] = True # default to true, but not yet observed atoms = [atom.GetSymbol() for atom in mol.GetAtoms()] # get all the atoms, and make the list unique (only types) atoms = set(atoms) atoms = list(atoms) values["atoms"] = atoms values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True)) values["rejections"] = [] # empty list to store the reasons for rejection return values
def getAllFeatures(SMILE, targets): m = Chem.MolFromSmiles(SMILE) MW = props.MolWt(m) # molecular weight XlogP = props.MolLogP(m) # octanol-water partition coefficient log P HBD = props.NumHDonors(m) #hydrogen bond donor count HBA = props.NumHAcceptors(m) #hydrogen bond acceptor count PSA = props.TPSA(m) #polar surface area FC = rdmolops.GetFormalCharge(m) #formal charge RBC = props.NumRotatableBonds(m) #rotatable bonds count refr = props.MolMR(m) # refractivity alogP = None # nA = m.GetNumAtoms() #sum(atomcountMA(mol,addH=FALSE)) #number atoms AROMs = props.NumAromaticRings(m) nALERTS = len([ 1 for i in data["ualerts"] if m.HasSubstructMatch(Chem.MolFromSmarts(i)) ]) Ro5 = int(MW < 500 and HBD < 5 and HBA < 10 and XlogP < 5) Veber = int(RBC <= 10 and PSA <= 140) Ghose = int(PSA < 140 and (-0.4 <= XlogP < 5.6) and (160 <= MW < 480) and (20 <= nA < 70)) QED = Chem.QED.qed(m) t_set = set(targets) (lof, btwn, degree) = (data['target_data']['lof'], data['target_data']['btwn'], data['target_data']['degree']) lossFreq = max( [lof.xs(i)[1] / lof.xs(i)[2] for i in lof.index if i in t_set]) maxBtwn = max([btwn.xs(i)[0] for i in btwn.index if i in t_set]) maxDegree = max([degree.xs(i)[0] for i in degree.index if i in t_set]) pc = get_PC_value(data["target_data"]["expr"], targets) return { 'MolecularWeight': MW, 'XLogP': XlogP, 'HydrogenBondDonorCount': HBD, 'HydrogenBondAcceptorCount': HBA, 'PolarSurfaceArea': PSA, 'FormalCharge': FC, 'NumRings': AROMs, 'RotatableBondCount': RBC, 'Refractivity': refr, 'lossFreq': lossFreq, 'maxBtwn': maxBtwn, 'maxDegree': maxDegree, # 'Ro5': Ro5, # 'Ghose': Ghose, # 'Veber': Veber, 'wQED': QED, 'PC1': pc[0], 'PC2': pc[1], 'PC3': pc[2], }
def main(fname): mol = Chem.MolFromMolFile(fname) if mol: charge = rdmolops.GetFormalCharge(mol) else: sys.stderr.write( 'Molecule from file {} cannot be parsed'.format(fname)) return None return charge
def molobj_to_axyzc(molobj, atom_type=int, idx=-1): """ rdkit molobj to xyz """ atoms = molobj_to_atoms(molobj, atom_type=atom_type) coordinates = molobj_to_coordinates(molobj, idx=idx) charge = rdmolops.GetFormalCharge(molobj) return atoms, coordinates, charge
def parametrise(self, params=None, molecule_type="ligand", id=None, reparametrise=False): """ Parametrises the ligand using ProtoCaller.Parametrise. Parameters ---------- params : ProtoCaller.Parametrise.Params Force field parameters. molecule_type : str The type of the molecule. One of: "ligand" and "cofactor". id : str The name of the molecule. Default: equal to the ligand name. reparametrise : bool Whether to reparametrise an already parametrised ligand. """ with self.workdir: if self._parametrised and not reparametrise: _logging.debug("Ligand %s is already parametrised." % self.name) return _logging.info("Parametrising ligand %s..." % self.name) if not self.protonated: _logging.warning( "Cannot parametrise unprotonated ligand. Protonating first with default parameters..." ) self.protonate() if params is None: params = _parametrise.Params() # we convert the protonated file into a pdb so that antechamber can read it filename = _babel.babelTransform(self.protonated_filename, "pdb") if id is None: id = self.name charge = _rdmolops.GetFormalCharge(self.molecule) self.parametrised_files = _parametrise.parametriseFile( params=params, filename=filename, molecule_type=molecule_type, id=id, charge=charge)
def run_antechamber(mol, sdf_file, ff): net_charge = int(rdmolops.GetFormalCharge(mol)) os.system( 'antechamber -i %s -fi sdf -o UNL.mol2 -fo mol2 -rn UNL -nc %d -at %s -c bcc -s 0 -pf y' % (sdf_file, net_charge, ff)) os.system('parmchk -i UNL.mol2 -f mol2 -o missing_gaff.frcmod -at %s' % (ff)) # clean SDF file for rdkit os.system('antechamber -i UNL.mol2 -fi mol2 -o UNL.sdf -fo sdf') with open('convert.leap', 'w') as f: f.write('source leaprc.%s\n' % (ff)) f.write('UNL=loadmol2 UNL.mol2\n') f.write('saveoff UNL UNL.off\n') f.write('quit') os.system('tleap -f convert.leap>out')
def get_monoisotopic_mz_and_z(structure): """ Determines the monoisotopic m/z value and charge of an ion provided as a SMILES string or .sdf file. :param structure: str a valid SMILES string OR a path to an .sdf file containg a single ion structure. :return out_dict: dict w/ entries "charge" (int) and "monoiso_mz" (float in Daltons) and rdkit mol obj. """ # parse input try: mol = Chem.MolFromSmiles(structure) if mol is None: raise TypeError( 'The provided structure was not a valid SMILES, assuming it is a path to an .sdf file...' ) except TypeError: try: lst = [mol for mol in Chem.SDMolSupplier(structure)] mol = lst[0] except OSError: raise TypeError( 'The provide structure was neither a valid SMILES string nor a path to an .sdf file.' ) # ensure mol exists if not mol: raise NotImplementedError( 'For unknown reasons, the provided structure could not be analyzed.' ) # determine properties of mol monoiso_mz = rdMolDescriptors.CalcExactMolWt(mol) charge = rdmolops.GetFormalCharge(mol) # ensure provided structure is of an ion if not charge: raise ValueError( 'Provided structures must be of ions, not neutral molecules.') charge = int(charge) out_dict = {'charge': charge, 'monoiso_mz': monoiso_mz, 'mol': mol} return out_dict
def datadump(database, dumpdir): db = pickle.load(open(database, "rb")) if os.path.exists(dumpdir): raise Warning( "Caution, %s already exists. Already existing data may be overwritten." ) else: os.mkdir(dumpdir) os.mkdir(dumpdir + "/png") frag2mol = db.get_frag2mol() frag2lcapconn = db.get_frag2lcapconn() frag2rcapconn = db.get_frag2rcapconn() mol2frag = db.get_mol2frag() mol2conn = db.get_mol2conn() frag_log = logger(dumpdir + "/frag.dat") frag_log.log("### datadump of database %s" % database) frag_log.log("### timestamp %s" % time.asctime(time.localtime(time.time()))) frag_log.log("### written by run_fragresp.py datadump routine.") frag_log.log("###") frag_log.log("### ----------------- ###") frag_log.log("### FRAGMENT DATA LOG ###") frag_log.log("### ----------------- ###") frag_log.log("###") frag_log.log( "# id smiles mol_id lcap_id rcap_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings" ) for frag_i in range(db.get_frag_count()): frag = db.get_frag(frag_i) Chem.SanitizeMol(frag) log_str = list() ### id log_str.append(str(frag_i) + " ") ### smiles log_str.append(str(Chem.MolToSmiles(frag, isomericSmiles=True)) + " ") ### mol_id mol_count = len(frag2mol[frag_i]) if mol_count == 0: log_str.append("-1 ") else: for i in range(mol_count): mol_i = frag2mol[frag_i][i] if i < mol_count - 1: log_str.append(str(mol_i) + ",") else: log_str.append(str(mol_i) + " ") ### lcap_id lcap_count = len(frag2lcapconn[frag_i]) if lcap_count == 0: log_str.append("-1 ") else: for i in range(lcap_count): cap_i = frag2lcapconn[frag_i][i] if i < lcap_count - 1: log_str.append(str(cap_i) + ",") else: log_str.append(str(cap_i) + " ") ### rcap_id rcap_count = len(frag2rcapconn[frag_i]) if rcap_count == 0: log_str.append("-1 ") else: for i in range(rcap_count): cap_i = frag2rcapconn[frag_i][i] if i < rcap_count - 1: log_str.append(str(cap_i) + ",") else: log_str.append(str(cap_i) + " ") ### N_atoms log_str.append(str(frag.GetNumAtoms()) + " ") ### N_bonds log_str.append(str(frag.GetNumBonds()) + " ") ### Nnonhatoms log_str.append(str(frag.GetNumHeavyAtoms()) + " ") ### Chg log_str.append(str(rdmolops.GetFormalCharge(frag)) + " ") ### Nhbd log_str.append(str(rdMolDescriptors.CalcNumHBD(frag)) + " ") ### Nhba log_str.append(str(rdMolDescriptors.CalcNumHBA(frag)) + " ") ### Nrotbonds log_str.append(str(rdMolDescriptors.CalcNumRotatableBonds(frag)) + " ") ### Nrings log_str.append(str(rdMolDescriptors.CalcNumRings(frag)) + " ") frag_log.log("".join(log_str)) png_path = dumpdir + "/png/" + "frag_%d.png" % frag_i try: Chem.SanitizeMol(frag) AllChem.Compute2DCoords(frag) Draw.MolToFile(frag, png_path, size=(500, 500)) except: #Chem.Kekulize(frag) print("Could not save frag %d to disk." % frag_i) frag_log.close() mol_log = logger(dumpdir + "/mol.dat") mol_log.log("### datadump of database %s" % database) mol_log.log("### timestamp %s" % time.asctime(time.localtime(time.time()))) mol_log.log("### written by run_fragresp.py datadump routine.") mol_log.log("###") mol_log.log("### ----------------- ###") mol_log.log("### MOLECULE DATA LOG ###") mol_log.log("### ----------------- ###") mol_log.log("###") mol_log.log( "# id name smiles frag_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings" ) for mol_i in range(db.get_mol_count()): mol = db.get_mol(mol_i) Chem.SanitizeMol(mol) name = db.get_name(mol_i) decomp = db.get_decompose(mol_i) log_str = list() log_str.append(str(mol_i) + " ") log_str.append(name + " ") log_str.append(str(Chem.MolToSmiles(mol, isomericSmiles=True)) + " ") frag_count = decomp.get_frag_count() if frag_count == 0: log_str.append("-1 ") else: for i in range(frag_count): frag_i = mol2frag[mol_i][i] if i < frag_count - 1: log_str.append(str(frag_i) + ",") else: log_str.append(str(frag_i) + " ") log_str.append(str(mol.GetNumAtoms()) + " ") log_str.append(str(mol.GetNumBonds()) + " ") log_str.append(str(mol.GetNumHeavyAtoms()) + " ") log_str.append(str(rdmolops.GetFormalCharge(mol)) + " ") log_str.append(str(rdMolDescriptors.CalcNumHBD(mol)) + " ") log_str.append(str(rdMolDescriptors.CalcNumHBA(mol)) + " ") log_str.append(str(rdMolDescriptors.CalcNumRotatableBonds(mol)) + " ") log_str.append(str(rdMolDescriptors.CalcNumRings(mol)) + " ") mol_log.log("".join(log_str)) png_path = dumpdir + "/png/" + "mol_%d.png" % mol_i AllChem.Compute2DCoords(mol) Chem.Kekulize(mol) Draw.MolToFile(mol, png_path, size=(500, 500)) mol_log.close() surr_log = logger(dumpdir + "/surr.dat") surr_log.log("### datadump of database %s" % database) surr_log.log("### timestamp %s" % time.asctime(time.localtime(time.time()))) surr_log.log("### written by run_fragresp.py datadump routine.") surr_log.log("###") surr_log.log("### ----------------- ###") surr_log.log("### SURROGATE DATA LOG ###") surr_log.log("### ------------------ ###") surr_log.log("###") surr_log.log( "# id name smiles mol_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings" ) for conn_i, conn in enumerate(db.get_conn_list()): if conn.get_terminal(): continue name = conn.get_name() conn_cap = conn.get_surrogate_cap() Chem.SanitizeMol(conn_cap) log_str = list() log_str.append(str(conn_i) + " ") log_str.append(name + " ") log_str.append( str(Chem.MolToSmiles(conn_cap, isomericSmiles=True)) + " ") conn2mol = db.get_conn2mol()[conn_i] mol_count = len(conn2mol) if mol_count == 0: log_str.append("-1 ") else: for i in range(mol_count): mol_i = conn2mol[i] if i < mol_count - 1: log_str.append(str(mol_i) + ",") else: log_str.append(str(mol_i) + " ") log_str.append(str(conn_cap.GetNumAtoms()) + " ") log_str.append(str(conn_cap.GetNumBonds()) + " ") log_str.append(str(conn_cap.GetNumHeavyAtoms()) + " ") log_str.append(str(rdmolops.GetFormalCharge(conn_cap)) + " ") log_str.append(str(rdMolDescriptors.CalcNumHBD(conn_cap)) + " ") log_str.append(str(rdMolDescriptors.CalcNumHBA(conn_cap)) + " ") log_str.append( str(rdMolDescriptors.CalcNumRotatableBonds(conn_cap)) + " ") log_str.append(str(rdMolDescriptors.CalcNumRings(conn_cap)) + " ") surr_log.log("".join(log_str)) png_path = dumpdir + "/png/" + "surr_%s.png" % (conn_i) AllChem.Compute2DCoords(conn_cap) Chem.Kekulize(conn_cap) Draw.MolToFile(conn_cap, png_path, size=(500, 500)) surr_log.close()
def standard_qlj_typer(mol): """ This function parameterizes the nonbonded terms of a molecule in a relatively simple and forcefield independent way. The parameters here roughly follow the Smirnoff 1.1.0 Lennard Jones types. These values are taken from timemachine/ff/params/smirnoff_1_1_0_cc.py, rounding down to two decimal places for sigma and one decimal place for epsilon. Note that charges are set to net_formal_charge(mol)/num_atoms. Parameters ---------- mol: RDKit.ROMol RDKit molecule Returns ------- [N,3] array containing (charge, sigma, epsilon) """ standard_qlj = [] # for charged ligands, we don't want to remove the charge fully as it will # introduce large variance in the resulting estimator standard_charge = float(rdmolops.GetFormalCharge(mol)) / mol.GetNumAtoms() for atom in mol.GetAtoms(): a_num = atom.GetAtomicNum() if a_num == 1: assert len(atom.GetNeighbors()) == 1 neighbor = atom.GetNeighbors()[0] b_num = neighbor.GetAtomicNum() if b_num == 6: val = (standard_charge, 0.25, 0.25) elif b_num == 7: val = (standard_charge, 0.10, 0.25) elif b_num == 8: val = (standard_charge, 0.05, 0.02) elif b_num == 16: val = (standard_charge, 0.10, 0.25) else: val = (standard_charge, 0.10, 0.25) elif a_num == 6: val = (standard_charge, 0.34, 0.6) elif a_num == 7: val = (standard_charge, 0.32, 0.8) elif a_num == 8: val = (standard_charge, 0.30, 0.9) elif a_num == 9: val = (standard_charge, 0.3, 0.5) elif a_num == 15: val = (standard_charge, 0.37, 0.9) elif a_num == 16: val = (standard_charge, 0.35, 1.0) elif a_num == 17: val = (standard_charge, 0.35, 1.0) elif a_num == 35: val = (standard_charge, 0.39, 1.1) elif a_num == 53: val = (standard_charge, 0.41, 1.2) else: # print("Unknown a_num", a_num) assert 0, "Unknown a_num " + str(a_num) # sigmas need to be halved standard_qlj.append((val[0], val[1] / 2, val[2])) standard_qlj = np.array(standard_qlj) return standard_qlj
os.mkdir(dir_2_name) ## Write start ligand file, parameters ## os.chdir(dir_1_name) writer = SDWriter('for_parm.sdf') if ligands_name.count(pair[0]) > 0: writer.write(ligands[ligands_name.index(pair[0])]) writer.flush() else: print('Error: cannot map ligand %s.\n' % (pair[0])) sys.exit() run_antechamber('for_parm.sdf', 'UNL', ff, int( rdmolops.GetFormalCharge( ligands[ligands_name.index(pair[0])])), clean_sdf=True) # setup Molecule_ff LIG = Molecule_ff(name='LIG') n_atoms = len(ligands[ligands_name.index(pair[0])].GetAtoms()) for at in ligands[ligands_name.index(pair[0])].GetAtoms(): x = ligands[ligands_name.index( pair[0])].GetConformer().GetAtomPosition(at.GetIdx()).x y = ligands[ligands_name.index( pair[0])].GetConformer().GetAtomPosition(at.GetIdx()).y z = ligands[ligands_name.index( pair[0])].GetConformer().GetAtomPosition(at.GetIdx()).z LIG.add_atom( Atom_ff(idx=at.GetIdx(), atomic_num=at.GetAtomicNum(),