def atom_count(str_data, in_format): mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.ReadString(mol, str_data) return mol.NumAtoms()
def sucess_analyses(output, leader_list, n, ref): print "Calculating RMSD with the reference" ext = ref.split(".")[1] conv = OBConversion() conv.SetInFormat(ext) mol = OBMol() conv.ReadFile(mol, ref) str_info = "\t%s\t %20s\t %20s\n" % ("File", "Model", "RMSD") for i in range(0, n): rmsd = getRMSD(leader_list[i], mol) str_info += "%s\t%20s \t%20.3f\n" % (leader_list[i].getFileBelow(), leader_list[i].getID(), rmsd) out_log = file(output + "_rmsd_" + ".info", "w") out_log.write(str_info) out_log.close()
def convert_str(str_data, in_format, out_format): mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.SetOutFormat(out_format) conv.ReadString(mol, str_data) return (conv.WriteString(mol), conv.GetOutFormat().GetMIMEType())
def cjson_to_ob_molecule(cjson): cjson_str = json.dumps(cjson) sdf_str = avo_convert_str(cjson_str, 'cjson', 'sdf') conv = OBConversion() conv.SetInFormat('sdf') conv.SetOutFormat('sdf') mol = OBMol() conv.ReadString(mol, sdf_str) return mol
def autodetect_bonds(cjson): mol = cjson_to_ob_molecule(cjson) mol.ConnectTheDots() mol.PerceiveBondOrders() conv = OBConversion() conv.SetInFormat('sdf') conv.SetOutFormat('sdf') sdf_str = conv.WriteString(mol) cjson_str = avo_convert_str(sdf_str, 'sdf', 'cjson') return json.loads(cjson_str)
def to_inchi(str_data, in_format): mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) # Hackish for now, convert to xyz first... conv.SetOutFormat('xyz') conv.ReadString(mol, str_data) xyz = conv.WriteString(mol) # Now convert to inchi and inchikey. mol = OBMol() conv.SetInFormat('xyz') conv.ReadString(mol, xyz) conv.SetOutFormat('inchi') inchi = conv.WriteString(mol).rstrip() conv.SetOptions("K", conv.OUTOPTIONS) inchikey = conv.WriteString(mol).rstrip() return (inchi, inchikey)
def to_inchi(str_data, in_format): mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.ReadString(mol, str_data) conv.SetOutFormat('inchi') inchi = conv.WriteString(mol).rstrip() conv.SetOptions('K', conv.OUTOPTIONS) inchikey = conv.WriteString(mol).rstrip() return (inchi, inchikey)
def loadReferenceMolecule(file_name): ext = file_name.split(".")[1] mol = Molecula() conv = OBConversion() conv.SetInFormat(ext) conv.ReadFile(mol, file_name) return mol
def get_formula(str_data, in_format): # Inchi must start with 'InChI=' if in_format == 'inchi' and not str_data.startswith('InChI='): str_data = 'InChI=' + str_data validate_start_of_inchi(str_data) # Get the molecule using the "Hill Order" - i. e., C first, then H, # and then alphabetical. mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.ReadString(mol, str_data) return mol.GetFormula()
def read_prot(prot_file, res_d): """Function to read in a protein to an OBMol""" conv = OBConversion() protref = OBMol() conv.SetInFormat("pdb") conv.ReadFile(protref, prot_file) # Now assign the residue names i = 0 my_res = [] for residue in OBResidueIter(protref): i += 1 residue.SetName(residue.GetName() + str(residue.GetNum())) my_res.append(residue.GetName()) # Now check that all the residues exist and print out if not fail_counter = 0 fail_list = [] # Loop through the res and check they are in the list for res_me in res_d: if res_me not in my_res: fail_counter += 1 fail_list.append(res_me) # If it's out of register by one do again if fail_counter > 0: i = 0 my_res = [] for residue in OBResidueIter(protref): i += 1 residue.SetName(residue.GetName() + str(residue.GetNum())) my_res.append(residue.GetName()) # Now check that all the residues exist and print out if not fail_counter = 0 fail_list = [] # Loop through the res and check they are in the list for res_me in res_d: if res_me not in my_res: fail_counter += 1 fail_list.append(res_me) out_err.write(prot_file + ",") out_err.write(str(fail_counter) + "\n") out_err.write(str(fail_list)) out_err.write(str(my_res)) out_err.write(str(res_d)) protref.AddHydrogens() return protref
def load_conformation(mol_list): for mol in mol_list: file_name = mol.getFileBelow().split('.')[0] + ".pdb" ######################################################### #Substiuir esses passos para remocao do openbabel ###### conv = OBConversion() conv.SetInFormat("pdb") end = conv.ReadFile(mol, file_name) for i in range(1, int(mol.getID())): mol = Molecula() end = conv.Read(mol)
def all_to_all(): """Function to compare all to all""" # Set up the OpenBaebel conversion modules sdconv = OBConversion() ligref = OBMol() # Define the residues and the proteisn to analyse res_d, prot_list = get_dict("myFirstFile.txt") # Now read in the ligand sdconv.SetInFormat("sdf") notatend = sdconv.ReadFile(ligref, "../mols.sdf") out_d = {} counter = 0 # Now read the ligand file while notatend: lig_name = ligref.GetTitle().strip(",") prot_name = lig_name.split("_")[0] if prot_name not in prot_list: ligref = OBMol() notatend = sdconv.Read(ligref) continue ligref.AddHydrogens() counter += 1 print counter for j, my_prot in enumerate(prot_list): protref = read_prot( r"C:\www\Protoype\media_coninchi\pdb" + "\\" + my_prot + "al.pdb", res_d) # Get the reference dictionary refresdict = pp.getresiduedict(protref, res_d) # Update this dict, to only residues in the binding site new_d = get_fp(protref, ligref, res_d) # Make sure it is a unique name for the output while lig_name in out_d: lig_name = lig_name + "Z" # Add it to the dict out_d[lig_name + my_prot] = {} for res in new_d: # Assign each residue the scores for each molecule out_d[lig_name + my_prot][res] = new_d[res] # Make the ligand ligref = OBMol() notatend = sdconv.Read(ligref) # Now write the results out write_res(out_d, res_d)
def convert_str(str_data, in_format, out_format, gen3d=False, add_hydrogens=False, perceive_bonds=False, out_options=None): # Make sure that the start of InChI is valid before passing it to # Open Babel, or Open Babel will crash the server. if in_format.lower() == 'inchi': validate_start_of_inchi(str_data) if out_options is None: out_options = {} obMol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.SetOutFormat(out_format) conv.ReadString(obMol, str_data) if add_hydrogens: obMol.AddHydrogens() if gen3d: # Generate 3D coordinates for the input mol = pybel.Molecule(obMol) mol.make3D() if perceive_bonds: obMol.ConnectTheDots() obMol.PerceiveBondOrders() for option, value in out_options.items(): conv.AddOption(option, conv.OUTOPTIONS, value) return (conv.WriteString(obMol), conv.GetOutFormat().GetMIMEType())
def properties(str_data, in_format, add_hydrogens=False): # Returns a dict with the atom count, formula, heavy atom count, # mass, and spaced formula. if in_format == 'inchi' and not str_data.startswith('InChI='): # Inchi must start with 'InChI=' str_data = 'InChI=' + str_data validate_start_of_inchi(str_data) mol = OBMol() conv = OBConversion() conv.SetInFormat(in_format) conv.ReadString(mol, str_data) if add_hydrogens: mol.AddHydrogens() props = {} props['atomCount'] = mol.NumAtoms() props['formula'] = mol.GetFormula() props['heavyAtomCount'] = mol.NumHvyAtoms() props['mass'] = mol.GetMolWt() props['spacedFormula'] = mol.GetSpacedFormula() return props
def one_to_many(): """Function to take multiple confs of ONE ligand and generate their PLIFS against one template protein""" # Set up the OpenBaebel conversion modules sdconv = OBConversion() ligref = OBMol() # Define the residues and the proteisn to analyse res_d, prot_list = get_dict("myFirstFile.txt") # Now read in the ligand sdconv.SetInFormat("sdf") notatend = sdconv.ReadFile(ligref, "../out.sdf") out_d = {} counter = 0 my_prot = "1qmz" protref = read_prot( r"C:\www\Protoype\media_coninchi\pdb" + "\\" + my_prot + "al.pdb", res_d) # Now read the ligand file while notatend: lig_name = ligref.GetTitle().strip(",") prot_name = lig_name.split("_")[0] ligref.AddHydrogens() counter += 1 print counter # Get the reference dictionary refresdict = pp.getresiduedict(protref, res_d) # Update this dict, to only residues in the binding site new_d = get_fp(protref, ligref, res_d) # Add it to the dict out_d[lig_name + str(counter)] = {} for res in new_d: # Assign each residue the scores for each molecule out_d[lig_name + str(counter)][res] = new_d[res] # Make the ligand ligref = OBMol() notatend = sdconv.Read(ligref) # Now write the results out write_res(out_d, res_d)
print 'The protein ligand folder can not be found' sys.exit(1) firstline = conflist.readline() mollisttemp = [line for line in conflist] mollist = [] scorelist = [] for mol in mollisttemp: mollist.append(mol.split(',')[0]) scorelist.append(mol.split(',')[1]) os.chdir('..') # opening the molecule files pbf = protein_ligand_folder + '/protein_bindingsite_fixed.mol2' conv = OBConversion() conv.SetInFormat("mol2") protfix = OBMol() protref = OBMol() ligref = OBMol() docklig = OBMol() dockprot = OBMol() conv.ReadFile(protfix, pbf) conv.ReadFile(protref, protein_reference) conv.ReadFile(ligref, ligand_reference) refresdict = getresiduedict(protref, residue_of_choice) refringdict = getringdict(protref) fixringdict = getringdict(protfix)
def django_run(target, opt="XTAL"): """Function to take multiple confs of ONE ligand and generate their PLIFS against one template protein""" # Set up the OpenBaebel conversion modules sdconv = OBConversion() ligref = OBMol() # Define the residues and the proteisn to analyse if os.path.isfile( os.path.join(os.path.split(sys.argv[0])[0], 'data/res_def.py')): res_d = [ trans_res(x) for x in ast.literal_eval( open( os.path.join( os.path.split(sys.argv[0])[0], 'data/res_def.py')).read())[target.title].split() ] print res_d # Molecules # Now read in the ligand plif_method = PlifMethod() plif_method.text = "PYPLIF" feature_list = [ "POLAR", "FACE", "EDGE", "ACCEPTOR", "DONOR", "NEGATIVE", "POSITIVE" ] try: plif_method.validate_unique() plif_method.save() except ValidationError: plif_method = PlifMethod.objects.get(text="PYPLIF") out_d = {} counter = 0 # Create a file for the protein t = tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) my_prot = Protein.objects.get(code=target.title + "TEMP") t.write(my_prot.pdb_info.name) t.close() protref = read_prot(t.name, res_d) t = tempfile.NamedTemporaryFile(suffix=".sdf", delete=False) t.close() sdconv.SetInFormat("sdf") if opt == "XTAL": mols = Molecule.objects.exclude( prot_id__code__contains=target.title).filter( prot_id__target_id=target) elif opt == "LLOOMMPPAA": mols = [] sps = SynthPoint.objects.filter(target_id=target) for s in sps: mols.extend([m for m in s.mol_id.all()]) else: print "UNKNOWN OPTION" return for dj_mol in mols: out_sd = Chem.SDWriter(t.name) out_sd.write(Chem.MolFromMolBlock(str(dj_mol.sdf_info))) out_sd.close() sdconv.ReadFile(ligref, t.name) # Now make the new plif new_plif = Plif() new_plif.mol_id = dj_mol new_plif.prot_id = my_prot new_plif.method_id = plif_method try: new_plif.validate_unique() new_plif.save() except ValidationError: new_plif = Plif.objects.get(mol_id=dj_mol, prot_id=my_prot, method_id=plif_method) lig_name = ligref.GetTitle().strip(",") prot_name = lig_name.split("_")[0] ligref.AddHydrogens() counter += 1 refresdict = pp.getresiduedict(protref, res_d) new_d = get_fp(protref, ligref, res_d) for res in new_d: new_res = PlifRes() new_res.res_name = res[:3] new_res.res_num = int(res[3:]) new_res.prot_id = my_prot try: new_res.validate_unique() new_res.save() except ValidationError: new_res = PlifRes.objects.get(res_name=res[:3], res_num=int(res[3:]), prot_id=my_prot) new_plif.res_id.add(new_res) for bit_num, bit in enumerate(new_d[res]): new_bit = PlifBit() new_bit.feature = feature_list[bit_num] new_bit.method_id = plif_method new_bit.res_id = new_res try: new_bit.validate_unique() new_bit.save() my_fun(dj_mol, new_bit, new_plif, bit) except ValidationError: new_bit = PlifBit.objects.get( feature=feature_list[bit_num], method_id=plif_method, res_id=new_res) new_bit.save() new_plif.bit_id.add(new_bit) my_fun(dj_mol, new_bit, new_plif, bit) ligref = OBMol() notatend = sdconv.Read(ligref)
def getLigandListLeaders(list_files): obmol_list = [] for f in list_files: mol = Molecula() ######################################################### #Substiuir esses passos para remocao do openbabel ###### pdb_file_name = f + ".pdb" conv = OBConversion() conv.SetInFormat("pdb") end = conv.ReadFile(mol, pdb_file_name) obmol_list.append(mol) ########################################################### ##### Etapa de busca da informacao da energia ############# j = 0 for f in list_files: log_file_name = f + ".log" file_log = open(log_file_name) for lines in file_log: if re.search("^\$Leader_Info", lines) is not None: aux = int(re.search("\d+", lines).group(0)) obmol_list[j].setIDLog(aux) elif re.search("Total_Energy", lines) is not None: obmol_list[j].setTotalEnergy( float(re.search(".\d+.\d+", lines).group(0))) elif re.search("vdW", lines) is not None: obmol_list[j].setVdw( float(re.search(".\d+.\d+", lines).group(0))) elif re.search("Coulomb", lines) is not None: obmol_list[j].setCoulomb( float(re.search(".\d+.\d+", lines).group(0))) elif re.search("^}", lines) is not None: obmol_list[j].setFileBelow(log_file_name) obmol_list[j].setInteractionEnergy() break j += 1 file_log.close() return obmol_list
def getLigandListRMSD(list_files, ref): obmol_list = [] rmsd_list = [] j = 0 for f in list_files: mol = Molecula() ######################################################### #Substiuir esses passos para remocao do openbabel ###### pdb_file_name = f + ".pdb" conv = OBConversion() conv.SetInFormat("pdb") end = conv.ReadFile(mol, pdb_file_name) obmol_list.append(mol) while end: mol = Molecula() end = conv.Read(mol) obmol_list.append(mol) obmol_list.pop() #retira a ultima molecula ########################################################### ##### Etapa de busca da informacao da energia ############# log_file_name = f + ".log" file_log = open(log_file_name) for lines in file_log: if re.search("^\$Leader_Info", lines) is not None: obmol_list[j].setIDLog(int(re.search("\d+", lines).group(0))) elif re.search("Total_Energy", lines) is not None: obmol_list[j].setTotalEnergy( float(re.search(".\d+.\d+", lines).group(0))) elif re.search("vdW", lines) is not None: obmol_list[j].setVdw( float(re.search(".\d+.\d+", lines).group(0))) elif re.search("Coulomb", lines) is not None: obmol_list[j].setCoulomb( float(re.search(".\d+.\d+", lines).group(0))) elif re.search("^}", lines) is not None: obmol_list[j].setFileBelow(log_file_name) obmol_list[j].setInteractionEnergy() j += 1 qsort_RMSD(obmol_list, 0, len(obmol_list) - 1, ref) rmsd_list.append(obmol_list[0]) obmol_list = [] file_log.close() j = 0 qsort_RMSD(rmsd_list, 0, len(rmsd_list) - 1, ref) return rmsd_list