def ligands(name_database, pr_init): '''search ligands in PDB database out : list of ligands with PDB files associated''' print "Start Search Ligand In PDB file" # control file exist if path.exists(pr_init + "resultLigandInPDB") and path.getsize(pr_init + "resultLigandInPDB") != 0: return pr_init + "resultLigandInPDB" # import list PBD from file .dat # http://www.rcsb.org/pdb/rest/representatives?cluster=50 l_PDB = managePDB.retriveListPDB(name_database) l_d_lig = [] for PDB_ID in l_PDB: d_lig_PDB = structure.ligandPDB() d_lig_PDB["name"] = PDB_ID l_lig_in = parsing.retrieveListLigand(pathManage.pathDitrectoryPDB() + PDB_ID.lower() + ".pdb") d_lig_PDB["ligands"] = l_lig_in l_d_lig.append(d_lig_PDB) # write result file p_out = writeFile.resultLigandInPDB(l_d_lig, pr_init) print "END Search Ligand In PDB file" return p_out
def retriveListPDB (name_database): if name_database == "PDB" : pr_PDB = pathManage.pathDitrectoryPDB() l_filesPDB = listdir(pr_PDB) list_pdb = [] for file_PDB in l_filesPDB : if file_PDB[-4:] == ".pdb" : list_pdb.append (file_PDB[-8:-4]) return formatCharacter.lowerList(list_pdb) else : p_file_database = pathManage.repInit + name_database + ".xml" file_database = open (p_file_database, "r") file_read = file_database.read() file_database.close () regex = compile ("[0-9A-Za-z]{4}") list_pdb = regex.findall (file_read) list_pdb = list(set(list_pdb)) return formatCharacter.lowerList(list_pdb)
def waterGlobal (name_database, limit_acc = 00.0): """ Number of water molecules in PDB arg: -> Path folder database -> name folder result -> limit acc return: NONE """ pr_result = pathManage.result (name_database + "/water") # retrieve list PDB file l_PDBID = managePDB.retriveListPDB(name_database) # calcul acc with NACESS if limit_acc != 0.0 : for PDB_ID in l_PDBID : p_PDB = pathManage.pathDitrectoryPDB () + PDB_ID + ".pdb" runOtherSoft.runNACESS(p_PDB, pathManage.pathDitrectoryPDB (), multi_run = 0) p_filout = waterAnalysis.resolutionWater(l_PDBID, pr_result, limit_acc) runScriptR.waterPlotResolution (p_filout)
def resolutionWater (l_PDB, pr_result, limit_acc = 00.0): pr_PDB = pathManage.pathDitrectoryPDB() p_filout = pr_result + "statwater_" + str (limit_acc) + ".dat" #if path.isfile(p_filout) and path.getsize(p_filout) > 0 : # return p_filout filout = open (p_filout, "w") filout.write("PDB ID\tResolution\tNumber of exposed residues\tNumber of residue\tNumber of water\n") for PDB_ID in l_PDB : print PDB_ID d_PDB = loadFile.ExtractInfoPDBID(PDB_ID) if d_PDB == {} : continue if limit_acc == 0.0 : number_residue_exposed = 0 else : p_fileasa = pr_PDB + PDB_ID + ".asa" p_filersa = pr_PDB + PDB_ID + ".rsa" if not path.isfile(p_fileasa) or not path.isfile(p_filersa) : print "Error NACCESS", p_fileasa continue else : number_residue_exposed, number_residue = parseNACCESS.numberResExposed(p_filersa, limit_acc) RX = d_PDB["RX"] # case where resolution is not presented in the PDB file if RX == 100.0 : continue if not "HOH" in d_PDB.keys () : continue number_residue = len(d_PDB["protein"]) number_water = len(d_PDB["HOH"]) filout.write (str (PDB_ID) + "\t" + str (RX) + "\t" + str (number_residue_exposed) + "\t" + str (number_residue) + "\t" + str (number_water) + "\n") filout.close () return p_filout
def ExtractInfoPDBID(PDB_ID) : # control PDB exist in the folder where the PDB is included p_PDBfile = pathManage.pathDitrectoryPDB() + PDB_ID.lower() + ".pdb" if not path.exists(p_PDBfile) : print "ERROR load PDB ID -> ", PDB_ID return {} # initialisation of the output d_out = {} d_out["protein"] = [] d_out["RX"] = 100.0 d_out["RFree"] = 100.0 filin = open (p_PDBfile, "r") l_linesPDB = filin.readlines () filin.close () d_out["Header"] = l_linesPDB[0][6:].lower().strip () for linePDB in l_linesPDB : # Resolution if search("^REMARK 2 RESOLUTION", linePDB): lineRX = sub('[ ]{2,}', ' ', linePDB) try : d_out["RX"] = float (lineRX.split(" ")[3].replace (" ", "")) except : pass # Rfree elif search ("REMARK 3 R VALUE", linePDB) : rfactor = linePDB.strip ().split (":")[-1].replace (" ", "") if rfactor == "NULL" : rfactor = 0.0 else : rfactor = float (rfactor) d_out["RFree"] = rfactor # protein elif search ("^ATOM", linePDB) : atom_prot = parsing.lineCoords (linePDB, remove_H = 1) if atom_prot != None : d_out["protein"].append (atom_prot) elif search ("^HETATM", linePDB) : atom_HET = parsing.lineCoords (linePDB, remove_H = 1) if atom_HET != None : name_lig = atom_HET["resName"] if not name_lig in d_out.keys () : d_out[name_lig] = [] d_out[name_lig].append (atom_HET) # kept only first model in the protein in case of RMN structure elif search ("^ENDMDL", linePDB) : break # separate the ligand in double for k in d_out.keys() : if k != "protein" and k != "RX" and k != "RFree" and k != "Header" : d_out[k] = parsing.separateByLigand (d_out[k], debug = 0) for l_atom_lig in d_out[k] : l_atom_lig = calcul.buildConnectMatrix(l_atom_lig) return d_out