def atom_count(str_data, in_format): mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.ReadString(mol, str_data) return mol.NumAtoms()
def sucess_analyses(output, leader_list, n, ref): print "Calculating RMSD with the reference" ext = ref.split(".")[1] conv = OBConversion() conv.SetInFormat(ext) mol = OBMol() conv.ReadFile(mol, ref) str_info = "\t%s\t %20s\t %20s\n" % ("File", "Model", "RMSD") for i in range(0, n): rmsd = getRMSD(leader_list[i], mol) str_info += "%s\t%20s \t%20.3f\n" % (leader_list[i].getFileBelow(), leader_list[i].getID(), rmsd) out_log = file(output + "_rmsd_" + ".info", "w") out_log.write(str_info) out_log.close()
def __init__(self, can=False): self._OBconverter = OBConversion() self._OBconverter.SetOutFormat('smi') options = 'in' if can: options += 'c' self._OBconverter.SetOptions(options, OBConversion.OUTOPTIONS)
def __init__(self, miner=None, excludeAtomList=[]): Molecule.__init__(self) self.embedings=[] self.miner=miner self.excludeAtomList=excludeAtomList self.lastExtendedAtomicNum=0 self.lastExtendedAtomIndex=0 self.writer=OBConversion() self.writer.SetInAndOutFormats("smi","smi")
def __init__(self, smile): self._smile = smile.strip() conv = OBConversion() if not conv.SetInAndOutFormats('smi', 'inchi'): raise 'Problem with openbabel' mol = OBMol() if not conv.ReadString(mol, self._smile): raise TypeError, "No such smile: %s" % self._smile self._inchi = conv.WriteString(mol).strip()
def cjson_to_ob_molecule(cjson): cjson_str = json.dumps(cjson) sdf_str = avo_convert_str(cjson_str, 'cjson', 'sdf') conv = OBConversion() conv.SetInFormat('sdf') conv.SetOutFormat('sdf') mol = OBMol() conv.ReadString(mol, sdf_str) return mol
def autodetect_bonds(cjson): mol = cjson_to_ob_molecule(cjson) mol.ConnectTheDots() mol.PerceiveBondOrders() conv = OBConversion() conv.SetInFormat('sdf') conv.SetOutFormat('sdf') sdf_str = conv.WriteString(mol) cjson_str = avo_convert_str(sdf_str, 'sdf', 'cjson') return json.loads(cjson_str)
def LoadMolFromSmiles(smiles): """Returns an OBMol construcetd from an SMILES code""" smiles = sorted(smiles.split("."), key=len)[-1] ## Strip salts mol=OBMol() loader=OBConversion() loader.SetInAndOutFormats("smi","smi") if not loader.ReadString(mol, smiles): return None mol.smilesCode=smiles return mol
def loadReferenceMolecule(file_name): ext = file_name.split(".")[1] mol = Molecula() conv = OBConversion() conv.SetInFormat(ext) conv.ReadFile(mol, file_name) return mol
def get_formula(str_data, in_format): # Inchi must start with 'InChI=' if in_format == 'inchi' and not str_data.startswith('InChI='): str_data = 'InChI=' + str_data validate_start_of_inchi(str_data) # Get the molecule using the "Hill Order" - i. e., C first, then H, # and then alphabetical. mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.ReadString(mol, str_data) return mol.GetFormula()
def __init__(self, active, inactive=[], minSupport=0.2, maxSupport=0.2, addWholeRings=True, canonicalPruning=True, findClosed=True): self.active=filter(lambda m:m, map(self.LoadMolecules, active)) self.inactive=filter(lambda m:m, map(self.LoadMolecules, inactive)) self.minSupport=minSupport self.maxSupport=maxSupport self.rings={} self.atomCount={} self.findClosed=findClosed self.addWholeRings=addWholeRings self.canonicalPruning=canonicalPruning self.canonicalPruningSet={} self.loader=OBConversion() self.loader.SetInAndOutFormats("smi","smi")
class Converter: def __init__(self, can=False): self._OBconverter = OBConversion() self._OBconverter.SetOutFormat('smi') options = 'in' if can: options += 'c' self._OBconverter.SetOptions(options, OBConversion.OUTOPTIONS) def getSmiles(self, mol): smiles = self._OBconverter.WriteString(mol.OBMol)[:-1] if '@' in smiles: smiles = self._OBconverter.WriteString(mol.OBMol)[:-1] return smiles
def ToCannonicalSmiles(self): atomCache={} mol=OBMol() for sourceAtom in self.atoms: atom=mol.NewAtom() atom.SetAtomicNum(sourceAtom.GetAtomicNum()) if sourceAtom.IsAromatic(): atom.SetAromatic() atom.SetSpinMultiplicity(2) atomCache[sourceAtom]=atom for sourceBond in self.bonds: mol.AddBond(atomCache[sourceBond.atom1].GetIdx(), atomCache[sourceBond.atom2].GetIdx(), sourceBond.GetBondOrder()) writer=OBConversion() writer.SetInAndOutFormats("smi", "can") return writer.WriteString(mol).strip()
def read_prot(prot_file, res_d): """Function to read in a protein to an OBMol""" conv = OBConversion() protref = OBMol() conv.SetInFormat("pdb") conv.ReadFile(protref, prot_file) # Now assign the residue names i = 0 my_res = [] for residue in OBResidueIter(protref): i += 1 residue.SetName(residue.GetName() + str(residue.GetNum())) my_res.append(residue.GetName()) # Now check that all the residues exist and print out if not fail_counter = 0 fail_list = [] # Loop through the res and check they are in the list for res_me in res_d: if res_me not in my_res: fail_counter += 1 fail_list.append(res_me) # If it's out of register by one do again if fail_counter > 0: i = 0 my_res = [] for residue in OBResidueIter(protref): i += 1 residue.SetName(residue.GetName() + str(residue.GetNum())) my_res.append(residue.GetName()) # Now check that all the residues exist and print out if not fail_counter = 0 fail_list = [] # Loop through the res and check they are in the list for res_me in res_d: if res_me not in my_res: fail_counter += 1 fail_list.append(res_me) out_err.write(prot_file + ",") out_err.write(str(fail_counter) + "\n") out_err.write(str(fail_list)) out_err.write(str(my_res)) out_err.write(str(res_d)) protref.AddHydrogens() return protref
def load_conformation(mol_list): for mol in mol_list: file_name = mol.getFileBelow().split('.')[0] + ".pdb" ######################################################### #Substiuir esses passos para remocao do openbabel ###### conv = OBConversion() conv.SetInFormat("pdb") end = conv.ReadFile(mol, file_name) for i in range(1, int(mol.getID())): mol = Molecula() end = conv.Read(mol)
def convert_str(str_data, in_format, out_format): mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.SetOutFormat(out_format) conv.ReadString(mol, str_data) return (conv.WriteString(mol), conv.GetOutFormat().GetMIMEType())
def to_inchi(str_data, in_format): mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.ReadString(mol, str_data) conv.SetOutFormat('inchi') inchi = conv.WriteString(mol).rstrip() conv.SetOptions('K', conv.OUTOPTIONS) inchikey = conv.WriteString(mol).rstrip() return (inchi, inchikey)
def write_ob_molecule(self, mol, format, f): """ Write an Open Babel molecule to file :param mol: The molecule :param format: The output format :param f: The file to write output to :param f_name: The file's name (for extension-finding purpose) """ conv = OBConversion() if not conv.SetOutFormat(format): raise ValueError("Error setting output format to " + format) # write to file try: s = conv.WriteString(mol) except (TypeError, ValueError, IOError): raise ValueError("Error writing data file using OpenBabel") if str.lower(format) == 'pdb': s = s.replace("END", "ENDMDL") f.write(s)
def get_inchikey_gen(): ob_conversion = OBConversion() ob_conversion.SetInAndOutFormats("inchi", "inchi") ob_conversion.SetOptions("K", ob_conversion.OUTOPTIONS) def get_inchikey(ser): try: if 'inchikey' in ser: return ser.inchikey if ser.inchi is None or ser.inchi == '': raise Exception('Empty inchi') mol = OBMol() ob_conversion.ReadString(mol, ser.inchi) return ob_conversion.WriteString(mol).strip('\n') except Exception as e: LOG.warning(e) return '{}-{}-{}'.format(ser.formula, ser['name'], ser['id']) return get_inchikey
def log_new_clusters(output_file, n, leader_list): print "log %d ligands. For change this number set the -n parameter" % n str_info = "\t%s\t \t%20s\t %20s\t %20s\n" % ("File", "Model", "T.Energy", "I.Energy") for i in range(0, n): str_info += "%s\t%20s\t%20s\t%20s\n" % ( leader_list[i].getFileBelow().split('/').pop(), leader_list[i].getID(), leader_list[i].getTotalEnergy(), leader_list[i].getInteractionEnergy()) out_log = file(output_file + ".log", "w") out_log.write(str_info) out_log.close() conv = OBConversion() conv.SetOutFormat("mol2") conv.WriteFile(leader_list[0], output_file + ".mol2") for i in range(1, n): conv.Write(leader_list[i]) conv.CloseOutFile()
def properties(str_data, in_format, add_hydrogens=False): # Returns a dict with the atom count, formula, heavy atom count, # mass, and spaced formula. if in_format == 'inchi' and not str_data.startswith('InChI='): # Inchi must start with 'InChI=' str_data = 'InChI=' + str_data validate_start_of_inchi(str_data) mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.ReadString(mol, str_data) if add_hydrogens: mol.AddHydrogens() props = {} props['atomCount'] = mol.NumAtoms() props['formula'] = mol.GetFormula() props['heavyAtomCount'] = mol.NumHvyAtoms() props['mass'] = mol.GetMolWt() props['spacedFormula'] = mol.GetSpacedFormula() return props
def one_to_many(): """Function to take multiple confs of ONE ligand and generate their PLIFS against one template protein""" # Set up the OpenBaebel conversion modules sdconv = OBConversion() ligref = OBMol() # Define the residues and the proteisn to analyse res_d, prot_list = get_dict("myFirstFile.txt") # Now read in the ligand sdconv.SetInFormat("sdf") notatend = sdconv.ReadFile(ligref, "../out.sdf") out_d = {} counter = 0 my_prot = "1qmz" protref = read_prot( r"C:\www\Protoype\media_coninchi\pdb" + "\\" + my_prot + "al.pdb", res_d) # Now read the ligand file while notatend: lig_name = ligref.GetTitle().strip(",") prot_name = lig_name.split("_")[0] ligref.AddHydrogens() counter += 1 print counter # Get the reference dictionary refresdict = pp.getresiduedict(protref, res_d) # Update this dict, to only residues in the binding site new_d = get_fp(protref, ligref, res_d) # Add it to the dict out_d[lig_name + str(counter)] = {} for res in new_d: # Assign each residue the scores for each molecule out_d[lig_name + str(counter)][res] = new_d[res] # Make the ligand ligref = OBMol() notatend = sdconv.Read(ligref) # Now write the results out write_res(out_d, res_d)
def type_mof(filename, output_dir, ff="uff", output_files=True): obconversion = OBConversion() obconversion.SetInAndOutFormats("cif", "xyz") obmol = OBMol() # Read MOF file and unit cell and write xyz file obconversion.ReadFile(obmol, filename) unitcell = openbabel.toUnitCell(obmol.GetData(openbabel.UnitCell)) uc = [ unitcell.GetA(), unitcell.GetB(), unitcell.GetC(), unitcell.GetAlpha(), unitcell.GetBeta(), unitcell.GetGamma() ] obconversion.WriteFile(obmol, 'mof_tmp.xyz') # Replicate unit cell using angstrom mol = Molecule(read='mof_tmp.xyz') mol.set_cell(uc) n_atoms = len(mol.atoms) mol333 = mol.replicate([3, 3, 3], center=True) print(mol333.cell) mol333.write('mof333.cif', cell=mol333.cell.to_list()) # Type FF obconversion.ReadFile(obmol, 'mof333.cif') ff = OBForceField.FindForceField("UFF") if not ff.Setup(obmol): print("Error: could not setup force field") ff.GetAtomTypes(obmol) # Get atom types for the middle cell types = [] for atom_idx, obatom in enumerate(OBMolAtomIter(obmol)): if atom_idx >= n_atoms * 13 and atom_idx < n_atoms * 14: ff_atom_type = obatom.GetData("FFAtomType").GetValue() types.append(ff_atom_type) if output_files: mof_name = os.path.splitext(os.path.basename(filename))[0] with open(os.path.join(output_dir, mof_name + "-obabel.log"), 'w') as f: f.write("NOTE: types order is the same as the CIF input file.\n") f.write("types= %s" % str(types)) uniq_types = sorted(set(types)) return [str(i) for i in uniq_types]
def convert_str(str_data, in_format, out_format, gen3d=False, add_hydrogens=False, perceive_bonds=False, out_options=None): # Make sure that the start of InChI is valid before passing it to # Open Babel, or Open Babel will crash the server. if in_format.lower() == 'inchi': validate_start_of_inchi(str_data) if out_options is None: out_options = {} obMol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.SetOutFormat(out_format) conv.ReadString(obMol, str_data) if add_hydrogens: obMol.AddHydrogens() if gen3d: # Generate 3D coordinates for the input mol = pybel.Molecule(obMol) mol.make3D() if perceive_bonds: obMol.ConnectTheDots() obMol.PerceiveBondOrders() for option, value in out_options.items(): conv.AddOption(option, conv.OUTOPTIONS, value) return (conv.WriteString(obMol), conv.GetOutFormat().GetMIMEType())
def all_to_all(): """Function to compare all to all""" # Set up the OpenBaebel conversion modules sdconv = OBConversion() ligref = OBMol() # Define the residues and the proteisn to analyse res_d, prot_list = get_dict("myFirstFile.txt") # Now read in the ligand sdconv.SetInFormat("sdf") notatend = sdconv.ReadFile(ligref, "../mols.sdf") out_d = {} counter = 0 # Now read the ligand file while notatend: lig_name = ligref.GetTitle().strip(",") prot_name = lig_name.split("_")[0] if prot_name not in prot_list: ligref = OBMol() notatend = sdconv.Read(ligref) continue ligref.AddHydrogens() counter += 1 print counter for j, my_prot in enumerate(prot_list): protref = read_prot( r"C:\www\Protoype\media_coninchi\pdb" + "\\" + my_prot + "al.pdb", res_d) # Get the reference dictionary refresdict = pp.getresiduedict(protref, res_d) # Update this dict, to only residues in the binding site new_d = get_fp(protref, ligref, res_d) # Make sure it is a unique name for the output while lig_name in out_d: lig_name = lig_name + "Z" # Add it to the dict out_d[lig_name + my_prot] = {} for res in new_d: # Assign each residue the scores for each molecule out_d[lig_name + my_prot][res] = new_d[res] # Make the ligand ligref = OBMol() notatend = sdconv.Read(ligref) # Now write the results out write_res(out_d, res_d)
def log_new_clusters(output_file, n, leader_list, ref=None): print "log %d ligands. For change this number set the -n parameter" % n str_info = "\t%s\t %20s\t %20s\t %20s \t%15s\n" % ( "File", "Model", "T.Energy", "I.Energy", "RMSD") if ref != None: mol_ref = loadReferenceMolecule(ref) if ref != None: for i in range(0, n): str_info += "%s\t%20s\t%20s\t%20s\t%15.3f\n" % ( leader_list[i].getFileBelow(), leader_list[i].getID(), leader_list[i].getTotalEnergy(), leader_list[i].getInteractionEnergy(), getRMSD(leader_list[i], mol_ref)) else: for i in range(0, n): str_info += "%s\t%20s\t%20s\t%20s\t%15.3f\n" % ( leader_list[i].getFileBelow(), leader_list[i].getID(), leader_list[i].getTotalEnergy(), leader_list[i].getInteractionEnergy(), getRMSD(leader_list[i], leader_list[0])) out_log = file(output_file + ".log", "w") out_log.write(str_info) out_log.close() conv = OBConversion() conv.SetOutFormat("mol2") conv.WriteFile(leader_list[0], output_file + ".mol2") for i in range(1, n): conv.Write(leader_list[i]) conv.CloseOutFile()
except: print 'The protein ligand folder can not be found' sys.exit(1) firstline = conflist.readline() mollisttemp = [line for line in conflist] mollist = [] scorelist = [] for mol in mollisttemp: mollist.append(mol.split(',')[0]) scorelist.append(mol.split(',')[1]) os.chdir('..') # opening the molecule files pbf = protein_ligand_folder + '/protein_bindingsite_fixed.mol2' conv = OBConversion() conv.SetInFormat("mol2") protfix = OBMol() protref = OBMol() ligref = OBMol() docklig = OBMol() dockprot = OBMol() conv.ReadFile(protfix, pbf) conv.ReadFile(protref, protein_reference) conv.ReadFile(ligref, ligand_reference) refresdict = getresiduedict(protref, residue_of_choice) refringdict = getringdict(protref) fixringdict = getringdict(protfix)
def to_inchi(str_data, in_format): mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) # Hackish for now, convert to xyz first... conv.SetOutFormat('xyz') conv.ReadString(mol, str_data) xyz = conv.WriteString(mol) # Now convert to inchi and inchikey. mol = OBMol() conv.SetInFormat('xyz') conv.ReadString(mol, xyz) conv.SetOutFormat('inchi') inchi = conv.WriteString(mol).rstrip() conv.SetOptions("K", conv.OUTOPTIONS) inchikey = conv.WriteString(mol).rstrip() return (inchi, inchikey)
snprintf(_logbuf, BUFF_SIZE, "%-5s %-5s %-5s %-5s%6.3f %d %6.3f %8.3f %8.3f\n", (*i).a->GetType(), (*i).b->GetType(), (*i).c->GetType(), (*i).d->GetType(), (*i).V, (*i).n, (*i).cosNPhi0, (*i).tor * RAD_TO_DEG, (*i).energy); ``` """ import sys sys.path.insert(0, "/usr/local/lib/python3.6/site-packages") import os from openbabel import OBMol, OBConversion, OBMolAtomIter, OBForceField mof = "csdac-linkers-cml/uio66-HNC3-alkane.cml" obconversion = OBConversion() obconversion.SetInAndOutFormats("cml", "cml") obmol = OBMol() obconversion.ReadFile(obmol, mof) ff = OBForceField.FindForceField("UFF") ff.SetLogToStdOut() ff.SetLogLevel(3) if not ff.Setup(obmol): print("Error: could not setup force field") ff.GetAtomTypes(obmol) for atom_idx, obatom in enumerate(OBMolAtomIter(obmol)): ff_atom_type = obatom.GetData("FFAtomType").GetValue() print(ff_atom_type)
def django_run(target, opt="XTAL"): """Function to take multiple confs of ONE ligand and generate their PLIFS against one template protein""" # Set up the OpenBaebel conversion modules sdconv = OBConversion() ligref = OBMol() # Define the residues and the proteisn to analyse if os.path.isfile( os.path.join(os.path.split(sys.argv[0])[0], 'data/res_def.py')): res_d = [ trans_res(x) for x in ast.literal_eval( open( os.path.join( os.path.split(sys.argv[0])[0], 'data/res_def.py')).read())[target.title].split() ] print res_d # Molecules # Now read in the ligand plif_method = PlifMethod() plif_method.text = "PYPLIF" feature_list = [ "POLAR", "FACE", "EDGE", "ACCEPTOR", "DONOR", "NEGATIVE", "POSITIVE" ] try: plif_method.validate_unique() plif_method.save() except ValidationError: plif_method = PlifMethod.objects.get(text="PYPLIF") out_d = {} counter = 0 # Create a file for the protein t = tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) my_prot = Protein.objects.get(code=target.title + "TEMP") t.write(my_prot.pdb_info.name) t.close() protref = read_prot(t.name, res_d) t = tempfile.NamedTemporaryFile(suffix=".sdf", delete=False) t.close() sdconv.SetInFormat("sdf") if opt == "XTAL": mols = Molecule.objects.exclude( prot_id__code__contains=target.title).filter( prot_id__target_id=target) elif opt == "LLOOMMPPAA": mols = [] sps = SynthPoint.objects.filter(target_id=target) for s in sps: mols.extend([m for m in s.mol_id.all()]) else: print "UNKNOWN OPTION" return for dj_mol in mols: out_sd = Chem.SDWriter(t.name) out_sd.write(Chem.MolFromMolBlock(str(dj_mol.sdf_info))) out_sd.close() sdconv.ReadFile(ligref, t.name) # Now make the new plif new_plif = Plif() new_plif.mol_id = dj_mol new_plif.prot_id = my_prot new_plif.method_id = plif_method try: new_plif.validate_unique() new_plif.save() except ValidationError: new_plif = Plif.objects.get(mol_id=dj_mol, prot_id=my_prot, method_id=plif_method) lig_name = ligref.GetTitle().strip(",") prot_name = lig_name.split("_")[0] ligref.AddHydrogens() counter += 1 refresdict = pp.getresiduedict(protref, res_d) new_d = get_fp(protref, ligref, res_d) for res in new_d: new_res = PlifRes() new_res.res_name = res[:3] new_res.res_num = int(res[3:]) new_res.prot_id = my_prot try: new_res.validate_unique() new_res.save() except ValidationError: new_res = PlifRes.objects.get(res_name=res[:3], res_num=int(res[3:]), prot_id=my_prot) new_plif.res_id.add(new_res) for bit_num, bit in enumerate(new_d[res]): new_bit = PlifBit() new_bit.feature = feature_list[bit_num] new_bit.method_id = plif_method new_bit.res_id = new_res try: new_bit.validate_unique() new_bit.save() my_fun(dj_mol, new_bit, new_plif, bit) except ValidationError: new_bit = PlifBit.objects.get( feature=feature_list[bit_num], method_id=plif_method, res_id=new_res) new_bit.save() new_plif.bit_id.add(new_bit) my_fun(dj_mol, new_bit, new_plif, bit) ligref = OBMol() notatend = sdconv.Read(ligref)